[D3D12] EDRAM storing and random cleanup

This commit is contained in:
Triang3l 2018-08-11 20:33:33 +03:00
parent a4b98cda31
commit 9b303c64ba
17 changed files with 760 additions and 11 deletions

View File

@ -377,7 +377,7 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = scratch_buffer_; barrier.Transition.pResource = scratch_buffer_;
barrier.Transition.Subresource = 0; barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = scratch_buffer_state_; barrier.Transition.StateBefore = scratch_buffer_state_;
barrier.Transition.StateAfter = state; barrier.Transition.StateAfter = state;
GetCurrentCommandList()->ResourceBarrier(1, &barrier); GetCurrentCommandList()->ResourceBarrier(1, &barrier);
@ -489,6 +489,10 @@ bool D3D12CommandProcessor::SetupContext() {
render_target_cache_ = render_target_cache_ =
std::make_unique<RenderTargetCache>(this, register_file_); std::make_unique<RenderTargetCache>(this, register_file_);
if (!render_target_cache_->Initialize()) {
XELOGE("Failed to initialize the render target cache");
return false;
}
return true; return true;
} }

View File

@ -21,13 +21,176 @@ namespace xe {
namespace gpu { namespace gpu {
namespace d3d12 { namespace d3d12 {
// Generated with `xb buildhlsl`.
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_32bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_64bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_7e3_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_load_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_load_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_color_32bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_color_64bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_color_7e3_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
const RenderTargetCache::EDRAMLoadStorePipelineInfo
RenderTargetCache::edram_load_store_pipeline_info_[size_t(
RenderTargetCache::EDRAMLoadStorePipelineIndex::kCount)] = {
{edram_load_color_32bpp_cs, sizeof(edram_load_color_32bpp_cs),
L"EDRAM Load 32bpp Color"},
{edram_store_color_32bpp_cs, sizeof(edram_store_color_32bpp_cs),
L"EDRAM Store 32bpp Color"},
{edram_load_color_64bpp_cs, sizeof(edram_load_color_64bpp_cs),
L"EDRAM Load 64bpp Color"},
{edram_store_color_64bpp_cs, sizeof(edram_store_color_64bpp_cs),
L"EDRAM Store 64bpp Color"},
{edram_load_color_7e3_cs, sizeof(edram_load_color_7e3_cs),
L"EDRAM Load 7e3 Color"},
{edram_store_color_7e3_cs, sizeof(edram_store_color_7e3_cs),
L"EDRAM Store 7e3 Color"},
{edram_load_depth_unorm_cs, sizeof(edram_load_depth_unorm_cs),
L"EDRAM Load UNorm Depth"},
{edram_store_depth_unorm_cs, sizeof(edram_store_depth_unorm_cs),
L"EDRAM Store UNorm Depth"},
{edram_load_depth_float_cs, sizeof(edram_load_depth_float_cs),
L"EDRAM Load Float Depth"},
{edram_store_depth_float_cs, sizeof(edram_store_depth_float_cs),
L"EDRAM Store Float Depth"},
};
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor, RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file) RegisterFile* register_file)
: command_processor_(command_processor), register_file_(register_file) {} : command_processor_(command_processor), register_file_(register_file) {}
RenderTargetCache::~RenderTargetCache() { Shutdown(); } RenderTargetCache::~RenderTargetCache() { Shutdown(); }
void RenderTargetCache::Shutdown() { ClearCache(); } bool RenderTargetCache::Initialize() {
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
// Create the buffer for reinterpreting EDRAM contents.
D3D12_RESOURCE_DESC edram_buffer_desc;
edram_buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
edram_buffer_desc.Alignment = 0;
// First 10 MB is guest pixel data, second 10 MB is 32-bit depth when using
// D24FS8 so loads/stores don't corrupt multipass rendering.
edram_buffer_desc.Width = 2 * 2048 * 5120;
edram_buffer_desc.Height = 1;
edram_buffer_desc.DepthOrArraySize = 1;
edram_buffer_desc.MipLevels = 1;
edram_buffer_desc.Format = DXGI_FORMAT_UNKNOWN;
edram_buffer_desc.SampleDesc.Count = 1;
edram_buffer_desc.SampleDesc.Quality = 0;
edram_buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
edram_buffer_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
D3D12_HEAP_PROPERTIES edram_buffer_heap_properties = {};
edram_buffer_heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
// The first operation will be a clear.
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
if (FAILED(device->CreateCommittedResource(
&edram_buffer_heap_properties, D3D12_HEAP_FLAG_NONE,
&edram_buffer_desc, edram_buffer_state_, nullptr,
IID_PPV_ARGS(&edram_buffer_)))) {
XELOGE("Failed to create the EDRAM buffer");
return false;
}
edram_buffer_cleared_ = false;
// Create the root signature for EDRAM buffer load/store.
D3D12_ROOT_PARAMETER root_parameters[2];
// Parameter 0 is constants (changed for each render target binding).
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
root_parameters[0].Constants.ShaderRegister = 0;
root_parameters[0].Constants.RegisterSpace = 0;
root_parameters[0].Constants.Num32BitValues =
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
// Parameter 1 is source and target.
D3D12_DESCRIPTOR_RANGE root_load_store_ranges[2];
root_load_store_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
root_load_store_ranges[0].NumDescriptors = 1;
root_load_store_ranges[0].BaseShaderRegister = 0;
root_load_store_ranges[0].RegisterSpace = 0;
root_load_store_ranges[0].OffsetInDescriptorsFromTableStart = 0;
root_load_store_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
root_load_store_ranges[1].NumDescriptors = 1;
root_load_store_ranges[1].BaseShaderRegister = 0;
root_load_store_ranges[1].RegisterSpace = 0;
root_load_store_ranges[1].OffsetInDescriptorsFromTableStart = 1;
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
root_parameters[1].DescriptorTable.pDescriptorRanges = root_load_store_ranges;
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
root_signature_desc.pParameters = root_parameters;
root_signature_desc.NumStaticSamplers = 0;
root_signature_desc.pStaticSamplers = nullptr;
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
ID3DBlob* root_signature_blob;
ID3DBlob* root_signature_error_blob = nullptr;
if (FAILED(D3D12SerializeRootSignature(
&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&root_signature_blob, &root_signature_error_blob))) {
XELOGE("Failed to serialize the EDRAM buffer load/store root signature");
if (root_signature_error_blob != nullptr) {
XELOGE("%s", reinterpret_cast<const char*>(
root_signature_error_blob->GetBufferPointer()));
root_signature_error_blob->Release();
}
Shutdown();
return false;
}
if (root_signature_error_blob != nullptr) {
root_signature_error_blob->Release();
}
if (FAILED(device->CreateRootSignature(
0, root_signature_blob->GetBufferPointer(),
root_signature_blob->GetBufferSize(),
IID_PPV_ARGS(&edram_load_store_root_signature_)))) {
XELOGE("Failed to create the EDRAM buffer load/store root signature");
root_signature_blob->Release();
Shutdown();
return false;
}
root_signature_blob->Release();
// Create the load/store pipelines.
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
pipeline_desc.pRootSignature = edram_load_store_root_signature_;
pipeline_desc.NodeMask = 0;
pipeline_desc.CachedPSO.pCachedBlob = nullptr;
pipeline_desc.CachedPSO.CachedBlobSizeInBytes = 0;
pipeline_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStorePipelineIndex::kCount); ++i) {
const EDRAMLoadStorePipelineInfo& pipeline_info =
edram_load_store_pipeline_info_[i];
pipeline_desc.CS.pShaderBytecode = pipeline_info.shader;
pipeline_desc.CS.BytecodeLength = pipeline_info.shader_size;
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&edram_load_store_pipelines_[i])))) {
XELOGE("Failed to create EDRAM load/store pipeline for mode %u", i);
Shutdown();
return false;
}
}
return true;
}
void RenderTargetCache::Shutdown() {
ClearCache();
if (edram_load_store_root_signature_ != nullptr) {
edram_load_store_root_signature_->Release();
edram_load_store_root_signature_ = nullptr;
}
if (edram_buffer_ != nullptr) {
edram_buffer_->Release();
edram_buffer_ = nullptr;
}
}
void RenderTargetCache::ClearCache() { void RenderTargetCache::ClearCache() {
for (auto render_target_pair : render_targets_) { for (auto render_target_pair : render_targets_) {
@ -334,7 +497,7 @@ bool RenderTargetCache::UpdateRenderTargets() {
uint32_t heap_usage[5] = {}; uint32_t heap_usage[5] = {};
if (full_update) { if (full_update) {
// Export the currently bound render targets before we ruin the bindings. // Export the currently bound render targets before we ruin the bindings.
WriteRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
ClearBindings(); ClearBindings();
current_surface_pitch_ = surface_pitch; current_surface_pitch_ = surface_pitch;
@ -527,7 +690,7 @@ bool RenderTargetCache::UpdateRenderTargets() {
} }
void RenderTargetCache::EndFrame() { void RenderTargetCache::EndFrame() {
WriteRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
ClearBindings(); ClearBindings();
} }
@ -709,6 +872,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
} }
++descriptor_heap->descriptors_used; ++descriptor_heap->descriptors_used;
// Get the layout for copying to the EDRAM buffer.
RenderTarget* render_target = new RenderTarget; RenderTarget* render_target = new RenderTarget;
render_target->resource = resource; render_target->resource = resource;
render_target->state = state; render_target->state = state;
@ -716,11 +880,245 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
render_target->key = key; render_target->key = key;
render_target->heap_page_first = heap_page_first; render_target->heap_page_first = heap_page_first;
render_target->heap_page_count = heap_page_count; render_target->heap_page_count = heap_page_count;
UINT64 copy_buffer_size;
device->GetCopyableFootprints(&resource_desc, 0, key.is_depth ? 2 : 1, 0,
render_target->footprints, nullptr, nullptr,
&copy_buffer_size);
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
render_targets_.insert(std::make_pair(key.value, render_target)); render_targets_.insert(std::make_pair(key.value, render_target));
return render_target; return render_target;
} }
void RenderTargetCache::WriteRenderTargetsToEDRAM() {} void RenderTargetCache::StoreRenderTargetsToEDRAM() {
auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) {
return;
}
uint32_t surface_pitch_ss =
current_surface_pitch_ *
(current_msaa_samples_ >= MsaaSamples::k4X ? 2 : 1);
uint32_t surface_pitch_tiles = (surface_pitch_ss + 79) / 80;
assert_true(surface_pitch_tiles != 0);
// TODO(Triang3l): Clear the buffer if calling for the first time.
uint32_t store_bindings[5];
uint32_t store_binding_count = 0;
D3D12_RESOURCE_BARRIER barriers[6];
uint32_t barrier_count;
// Extract only the render targets that need to be stored, transition them to
// copy sources and calculate intermediate buffer size.
uint32_t copy_buffer_size = 0;
barrier_count = 0;
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
RenderTarget* render_target = binding.render_target;
// TODO(Triang3l): Change edram_dirty_length to dirty row count.
if (!binding.is_bound || render_target == nullptr ||
binding.edram_dirty_length < surface_pitch_tiles) {
continue;
}
store_bindings[store_binding_count] = i;
copy_buffer_size =
std::max(copy_buffer_size, render_target->copy_buffer_size);
++store_binding_count;
if (render_target->state != D3D12_RESOURCE_STATE_COPY_SOURCE) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = render_target->resource;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = render_target->state;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
}
}
if (store_binding_count == 0) {
return;
}
if (edram_buffer_state_ != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
// Also transition the EDRAM buffer to UAV.
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = edram_buffer_;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = edram_buffer_state_;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
}
// Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
if (command_processor_->RequestViewDescriptors(0, 2, 2, descriptor_cpu_start,
descriptor_gpu_start) == 0) {
return;
}
// Get the buffer for copying.
D3D12_RESOURCE_STATES copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
copy_buffer_size, copy_buffer_state);
if (copy_buffer == nullptr) {
return;
}
// Prepare for writing.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto descriptor_size_view = provider->GetDescriptorSizeView();
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
srv_desc.Format = DXGI_FORMAT_R32_TYPELESS;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = copy_buffer_size >> 2;
srv_desc.Buffer.StructureByteStride = 0;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
device->CreateShaderResourceView(copy_buffer, &srv_desc,
descriptor_cpu_start);
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.FirstElement = 0;
uav_desc.Buffer.NumElements = 2 * 2048 * 1280;
uav_desc.Buffer.StructureByteStride = 0;
uav_desc.Buffer.CounterOffsetInBytes = 0;
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
D3D12_CPU_DESCRIPTOR_HANDLE uav_cpu_handle;
uav_cpu_handle.ptr = descriptor_cpu_start.ptr + descriptor_size_view;
device->CreateUnorderedAccessView(edram_buffer_, nullptr, &uav_desc,
uav_cpu_handle);
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Sort the bindings in ascending order of EDRAM base so data in the render
// targets placed farther in EDRAM isn't lost in case of overlap.
std::sort(
store_bindings, store_bindings + store_binding_count,
[this](uint32_t a, uint32_t b) {
if (current_bindings_[a].edram_base < current_bindings_[b].edram_base) {
return true;
}
return a < b;
});
// Store each render target.
for (uint32_t i = 0; i < store_binding_count; ++i) {
const RenderTargetBinding& binding = current_bindings_[store_bindings[i]];
const RenderTarget* render_target = binding.render_target;
EDRAMLoadStorePipelineIndex pipeline_index;
bool is_64bpp = false;
if (render_target->key.is_depth) {
if (DepthRenderTargetFormat(render_target->key.format) ==
DepthRenderTargetFormat::kD24FS8) {
pipeline_index = EDRAMLoadStorePipelineIndex::kDepthFloatStore;
} else {
pipeline_index = EDRAMLoadStorePipelineIndex::kDepthUnormStore;
}
} else {
switch (ColorRenderTargetFormat(render_target->key.format)) {
case ColorRenderTargetFormat::k_8_8_8_8:
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
case ColorRenderTargetFormat::k_2_10_10_10:
case ColorRenderTargetFormat::k_16_16:
case ColorRenderTargetFormat::k_16_16_FLOAT:
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
case ColorRenderTargetFormat::k_32_FLOAT:
pipeline_index = EDRAMLoadStorePipelineIndex::kColor32bppStore;
break;
case ColorRenderTargetFormat::k_16_16_16_16:
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
case ColorRenderTargetFormat::k_32_32_FLOAT:
pipeline_index = EDRAMLoadStorePipelineIndex::kColor64bppStore;
is_64bpp = true;
break;
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
pipeline_index = EDRAMLoadStorePipelineIndex::kColor7e3Store;
break;
default:
assert_unhandled_case(render_target->key.format);
continue;
}
}
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = render_target->resource;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
location_source.SubresourceIndex = 0;
location_dest.pResource = copy_buffer;
location_dest.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
location_dest.PlacedFootprint = render_target->footprints[0];
// TODO(Triang3l): Box for color render targets.
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr);
EDRAMLoadStoreRootConstants root_constants;
root_constants.base_tiles = binding.edram_base;
root_constants.pitch_tiles = surface_pitch_tiles * (is_64bpp ? 2 : 1);
root_constants.rt_color_depth_pitch =
location_dest.PlacedFootprint.Footprint.RowPitch;
if (render_target->key.is_depth) {
location_source.SubresourceIndex = 1;
location_dest.PlacedFootprint = render_target->footprints[1];
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr);
root_constants.rt_stencil_offset =
uint32_t(location_dest.PlacedFootprint.Offset);
root_constants.rt_stencil_pitch =
location_dest.PlacedFootprint.Footprint.RowPitch;
}
// Transition the copy buffer to SRV.
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].Transition.pResource = copy_buffer;
barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
barriers[0].Transition.StateAfter =
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
command_list->ResourceBarrier(1, barriers);
// Store the data.
command_list->SetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
command_processor_->SetPipeline(
edram_load_store_pipelines_[size_t(pipeline_index)]);
command_list->Dispatch(
root_constants.pitch_tiles,
binding.edram_dirty_length / root_constants.pitch_tiles, 1);
// Commit the UAV write and prepare for copying again.
barrier_count = 1;
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].UAV.pResource = edram_buffer_;
if (i + 1 < store_binding_count) {
barrier_count = 2;
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[1].Transition.pResource = copy_buffer;
barriers[1].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[1].Transition.StateBefore =
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
}
command_list->ResourceBarrier(barrier_count, barriers);
}
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
}
} // namespace d3d12 } // namespace d3d12
} // namespace gpu } // namespace gpu

View File

@ -201,6 +201,7 @@ class RenderTargetCache {
RegisterFile* register_file); RegisterFile* register_file);
~RenderTargetCache(); ~RenderTargetCache();
bool Initialize();
void Shutdown(); void Shutdown();
void ClearCache(); void ClearCache();
@ -233,6 +234,27 @@ class RenderTargetCache {
} }
private: private:
enum class EDRAMLoadStorePipelineIndex {
kColor32bppLoad,
kColor32bppStore,
kColor64bppLoad,
kColor64bppStore,
kColor7e3Load,
kColor7e3Store,
kDepthUnormLoad,
kDepthUnormStore,
kDepthFloatLoad,
kDepthFloatStore,
kCount
};
struct EDRAMLoadStorePipelineInfo {
const void* shader;
size_t shader_size;
const WCHAR* name;
};
union RenderTargetKey { union RenderTargetKey {
struct { struct {
// Supersampled (_ss - scaled 2x if needed) dimensions, divided by 80x16. // Supersampled (_ss - scaled 2x if needed) dimensions, divided by 80x16.
@ -267,8 +289,12 @@ class RenderTargetCache {
RenderTargetKey key; RenderTargetKey key;
// The first 4 MB page in the heaps. // The first 4 MB page in the heaps.
uint32_t heap_page_first; uint32_t heap_page_first;
// Number of 4 MB pages this render target uses. // The number of 4 MB pages this render target uses.
uint32_t heap_page_count; uint32_t heap_page_count;
// Color/depth and stencil layouts.
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprints[2];
// Buffer size needed to copy the render target to the EDRAM buffer.
uint32_t copy_buffer_size;
}; };
struct RenderTargetBinding { struct RenderTargetBinding {
@ -294,13 +320,34 @@ class RenderTargetCache {
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key, RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
uint32_t heap_page_first); uint32_t heap_page_first);
// Must be in a frame to call. Writes the dirty areas of the currently bound // Must be in a frame to call. Stores the dirty areas of the currently bound
// render targets and marks them as clean. // render targets and marks them as clean.
void WriteRenderTargetsToEDRAM(); void StoreRenderTargetsToEDRAM();
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;
// The EDRAM buffer allowing color and depth data to be reinterpreted.
ID3D12Resource* edram_buffer_ = nullptr;
D3D12_RESOURCE_STATES edram_buffer_state_;
bool edram_buffer_cleared_;
// EDRAM buffer load/store root signature.
ID3D12RootSignature* edram_load_store_root_signature_ = nullptr;
struct EDRAMLoadStoreRootConstants {
uint32_t base_tiles;
uint32_t pitch_tiles;
uint32_t rt_color_depth_pitch;
uint32_t rt_stencil_offset;
uint32_t rt_stencil_pitch;
};
// EDRAM buffer load/store pipelines.
static const EDRAMLoadStorePipelineInfo
edram_load_store_pipeline_info_[size_t(
EDRAMLoadStorePipelineIndex::kCount)];
ID3D12PipelineState* edram_load_store_pipelines_[size_t(
EDRAMLoadStorePipelineIndex::kCount)] = {};
// 32 MB heaps backing used render targets resources, created when needed. // 32 MB heaps backing used render targets resources, created when needed.
// 24 MB proved to be not enough to store a single render target occupying the // 24 MB proved to be not enough to store a single render target occupying the
// entire EDRAM - a 32-bit depth/stencil one - at some resolution. // entire EDRAM - a 32-bit depth/stencil one - at some resolution.

View File

@ -0,0 +1,14 @@
#include "edram_load_store.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
uint4 pixels = xe_edram_load_store_source.Load4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels);
}

View File

@ -0,0 +1,19 @@
#include "edram_load_store.hlsli"
[numthreads(40, 8, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// One tile contains 80x8 texels, and 2 rows within a 80x16 tile contain data
// from 1 render target row rather than 1. Threads with X 0-19 are for the
// first row, with 20-39 are for the second.
uint2 tile_dword_index = xe_group_thread_id.xy * uint2(4u, 2u);
[flatten] if (xe_group_thread_id.x >= 20u) {
tile_dword_index += uint2(uint(-80), 1u);
}
uint4 pixels = xe_edram_load_store_source.Load4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels);
}

View File

@ -0,0 +1,20 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(40, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 2u;
uint2 pixels_7e3_packed = xe_edram_load_store_source.Load2(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
uint4 pixel_0_f16u32 = XeFloat7e3To16(pixels_7e3_packed.x);
uint4 pixel_1_f16u32 = XeFloat7e3To16(pixels_7e3_packed.y);
uint4 pixels_f16u32_packed =
uint4(pixel_0_f16u32.xz, pixel_1_f16u32.xz) |
(uint4(pixel_0_f16u32.yw, pixel_1_f16u32.yw) << 16u);
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels_f16u32_packed);
}

View File

@ -0,0 +1,31 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
uint edram_offset = XeEDRAMOffset(xe_group_id.xy, tile_dword_index);
uint4 depth24_stencil = xe_edram_load_store_source.Load4(edram_offset);
uint4 depth24 = depth24_stencil & 0xFFFFFFu;
uint4 depth32 = xe_edram_load_store_source.Load4(10485760u + edram_offset);
// Depth. If the stored 32-bit depth converted to 24-bit is the same as the
// stored 24-bit depth, load the 32-bit value because it has more precision
// (and multipass rendering is possible), if it's not, convert the 24-bit
// depth because it was overwritten by aliasing.
uint4 depth24to32 = XeFloat20e4To32(depth24);
uint4 depth = depth24to32 + (depth32 - depth24to32) *
uint4(XeFloat32To20e4(depth32) == depth24);
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, depth);
// Stencil.
uint4 stencil = (depth24_stencil >> 24u) << uint4(0u, 8u, 16u, 24u);
stencil.xy |= stencil.zw;
stencil.x |= stencil.y;
rt_offset = xe_edram_rt_stencil_offset +
xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u;
xe_edram_load_store_dest.Store(rt_offset, stencil.x);
}

View File

@ -0,0 +1,22 @@
#include "edram_load_store.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
uint4 pixels = xe_edram_load_store_source.Load4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
// Depth.
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels & 0xFFFFFFu);
// Stencil.
uint4 stencil = (pixels >> 24u) << uint4(0u, 8u, 16u, 24u);
stencil.xy |= stencil.zw;
stencil.x |= stencil.y;
rt_offset = xe_edram_rt_stencil_offset +
xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u;
xe_edram_load_store_dest.Store(rt_offset, stencil.x);
}

View File

@ -0,0 +1,21 @@
#ifndef XENIA_GPU_D3D12_SHADERS_EDRAM_LOAD_STORE_HLSLI_
#define XENIA_GPU_D3D12_SHADERS_EDRAM_LOAD_STORE_HLSLI_
cbuffer XeEDRAMLoadStoreConstants : register(b0) {
uint xe_edram_base_tiles;
uint xe_edram_pitch_tiles;
uint xe_edram_rt_color_depth_pitch;
uint xe_edram_rt_stencil_offset;
uint xe_edram_rt_stencil_pitch;
};
ByteAddressBuffer xe_edram_load_store_source : register(t0);
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
uint XeEDRAMOffset(uint2 tile_index, uint2 tile_dword_index) {
return (xe_edram_base_tiles + (tile_index.y * xe_edram_pitch_tiles) +
tile_index.x) * 5120u + tile_dword_index.y * 320u +
tile_dword_index.x * 4u;
}
#endif // XENIA_GPU_D3D12_SHADERS_EDRAM_LOAD_STORE_HLSLI_

View File

@ -0,0 +1,14 @@
#include "edram_load_store.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
uint4 pixels = xe_edram_load_store_source.Load4(rt_offset);
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
xe_edram_load_store_dest.Store4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index), pixels);
}

View File

@ -0,0 +1,19 @@
#include "edram_load_store.hlsli"
[numthreads(40, 8, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
uint4 pixels = xe_edram_load_store_source.Load4(rt_offset);
// One tile contains 80x8 texels, and 2 rows within a 80x16 tile contain data
// from 1 render target row rather than 1. Threads with X 0-19 are for the
// first row, with 20-39 are for the second.
uint2 tile_dword_index = xe_group_thread_id.xy * uint2(4u, 2u);
[flatten] if (xe_group_thread_id.x >= 20u) {
tile_dword_index += uint2(uint(-80), 1u);
}
xe_edram_load_store_dest.Store4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index), pixels);
}

View File

@ -0,0 +1,19 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(40, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
uint4 pixels_f16u32_packed = xe_edram_load_store_source.Load4(rt_offset);
uint4 pixel_0_f16u32 = pixels_f16u32_packed.xxyy >> uint4(0u, 16u, 0u, 16u);
uint4 pixel_1_f16u32 = pixels_f16u32_packed.zzww >> uint4(0u, 16u, 0u, 16u);
uint2 pixels_7e3_packed =
uint2(XeFloat16To7e3(pixel_0_f16u32), XeFloat16To7e3(pixel_1_f16u32));
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 2u;
xe_edram_load_store_dest.Store2(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index), pixels_7e3_packed);
}

View File

@ -0,0 +1,25 @@
#include "edram_load_store.hlsli"
#include "pixel_formats.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// Depth.
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
uint4 depth32 = xe_edram_load_store_source.Load4(rt_offset);
uint4 depth24_stencil = XeFloat32To20e4(depth32);
// Stencil.
rt_offset = xe_edram_rt_stencil_offset +
xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u;
depth24_stencil |= xe_edram_load_store_source.Load(rt_offset).xxxx >>
uint4(0u, 8u, 16u, 24u) << 24u;
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
uint edram_offset = XeEDRAMOffset(xe_group_id.xy, tile_dword_index);
// Store 24-bit depth for aliasing and checking if 32-bit depth is up to date.
xe_edram_load_store_dest.Store4(edram_offset, depth24_stencil);
// Store 32-bit depth so precision isn't lost when doing multipass rendering.
xe_edram_load_store_dest.Store4(10485760u + edram_offset, depth32);
}

View File

@ -0,0 +1,20 @@
#include "edram_load_store.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// Depth.
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u;
uint4 pixels = xe_edram_load_store_source.Load4(rt_offset) & 0xFFFFFFu;
// Stencil.
rt_offset = xe_edram_rt_stencil_offset +
xe_thread_id.y * xe_edram_rt_stencil_pitch + xe_thread_id.x * 4u;
pixels |= xe_edram_load_store_source.Load(rt_offset).xxxx >>
uint4(0u, 8u, 16u, 24u) << 24u;
uint2 tile_dword_index = xe_group_thread_id.xy;
tile_dword_index.x *= 4u;
xe_edram_load_store_dest.Store4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index), pixels);
}

View File

@ -0,0 +1,74 @@
#ifndef XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_
#define XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
uint XeFloat16To7e3(uint4 rgba_f16u32) {
float4 rgba_f32 = f16tof32(rgba_f16u32);
uint3 rgb_f32u32 = asuint(rgba_f32.xyz);
// Keep only positive (high bit set means negative for both float and int) and
// saturate to 31.875 (also dropping NaNs).
rgb_f32u32 = uint3(clamp(int3(rgb_f32u32), 0, 0x41FF0000));
uint3 normalized = rgb_f32u32 + 0xC2000000u;
uint3 denormalized = ((rgb_f32u32 & 0x7FFFFFu) | 0x800000u) >>
((125u).xxx - (rgb_f32u32 >> 23u));
uint3 rgb_f10u32 = normalized + (denormalized - normalized) *
uint3(rgb_f32u32 < 0x3E800000u);
rgb_f10u32 =
((rgb_f10u32 + 0x7FFFu + ((rgb_f10u32 >> 16u) & 1u)) >> 16u) & 0x3FFu;
return rgb_f10u32.r | (rgb_f10u32.g << 10u) | (rgb_f10u32.b << 20u) |
(uint(saturate(rgba_f32.a) * 3.0) << 30u);
}
uint4 XeFloat7e3To16(uint rgba_packed) {
uint3 rgb_f10u32 = (rgba_packed.xxx >> uint3(0u, 10u, 20u)) & 0x3FFu;
uint3 mantissa = rgb_f10u32 & 0x7Fu;
uint3 exponent = rgb_f10u32 >> 7u;
// Normalize the values for the denormalized components.
// Exponent = 1;
// do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x80) == 0);
uint3 is_denormalized = uint3(exponent == 0u);
uint3 mantissa_lzcnt = (7u).xxx - firstbithigh(mantissa);
exponent += ((1u).xxx - mantissa_lzcnt - exponent) * is_denormalized;
mantissa +=
(((mantissa << mantissa_lzcnt) & 0x7Fu) - mantissa) * is_denormalized;
// Combine into 32-bit float bits and clear zeros.
uint3 rgb_f32u32 = (((exponent + 124u) << 23u) | (mantissa << 16u)) *
uint3(rgb_f10u32 != 0u);
return f32tof16(float4(asfloat(rgb_f32u32),
float(rgba_packed >> 30u) * (1.0 / 3.0)));
}
// Based on CFloat24 from d3dref9.dll and the 6e4 code from:
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// 6e4 has a different exponent bias allowing [0,512) values, 20e4 allows [0,2).
// We also can't clamp the stored value to 1 as load->store->load must be exact.
uint4 XeFloat32To20e4(uint4 f32u32) {
// Keep only positive (high bit set means negative for both float and int) and
// saturate to the maximum representable value near 2 (also dropping NaNs).
f32u32 = uint4(clamp(int4(f32u32), 0, 0x3FFFFFF8));
uint4 normalized = f32u32 + 0xC8000000u;
uint4 denormalized =
((f32u32 & 0x7FFFFFu) | 0x800000u) >> ((113u).xxxx - (f32u32 >> 23u));
uint4 f24u32 =
normalized + (denormalized - normalized) * uint4(f32u32 < 0x38800000u);
return ((f24u32 + 3u + ((f24u32 >> 3u) & 1u)) >> 3u) & 0xFFFFFFu;
}
uint4 XeFloat20e4To32(uint4 f24u32) {
uint4 mantissa = f24u32 & 0xF00000u;
uint4 exponent = f24u32 >> 20u;
// Normalize the values for the denormalized components.
// Exponent = 1;
// do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0);
uint4 is_denormalized = uint4(exponent == 0u);
uint4 mantissa_lzcnt = (20u).xxxx - firstbithigh(mantissa);
exponent += ((1u).xxxx - mantissa_lzcnt - exponent) * is_denormalized;
mantissa +=
(((mantissa << mantissa_lzcnt) & 0xFFFFFu) - mantissa) * is_denormalized;
// Combine into 32-bit float bits and clear zeros.
return (((exponent + 112u) << 23u) | (mantissa << 3u)) * uint4(f24u32 != 0u);
}
#endif // XENIA_GPU_D3D12_SHADERS_PIXEL_FORMATS_HLSLI_

View File

@ -394,7 +394,7 @@ void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buffer_; barrier.Transition.pResource = buffer_;
barrier.Transition.Subresource = 0; barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = buffer_state_; barrier.Transition.StateBefore = buffer_state_;
barrier.Transition.StateAfter = new_state; barrier.Transition.StateAfter = new_state;
command_list->ResourceBarrier(1, &barrier); command_list->ResourceBarrier(1, &barrier);

View File

@ -741,7 +741,8 @@ bool TextureCache::LoadTextureData(Texture* texture) {
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Transition.pResource = copy_buffer; barriers[0].Transition.pResource = copy_buffer;
barriers[0].Transition.Subresource = 0; barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = copy_buffer_state; barriers[0].Transition.StateBefore = copy_buffer_state;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
command_list->ResourceBarrier(1, barriers); command_list->ResourceBarrier(1, barriers);
@ -792,7 +793,8 @@ bool TextureCache::LoadTextureData(Texture* texture) {
barriers[0].UAV.pResource = copy_buffer; barriers[0].UAV.pResource = copy_buffer;
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[1].Transition.pResource = copy_buffer; barriers[1].Transition.pResource = copy_buffer;
barriers[1].Transition.Subresource = 0; barriers[1].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_list->ResourceBarrier(2, barriers); command_list->ResourceBarrier(2, barriers);