[D3D12] Shorten and batch barriers

This commit is contained in:
Triang3l 2018-08-22 23:00:56 +03:00
parent d204e9ba74
commit bc4125584c
6 changed files with 193 additions and 273 deletions

View File

@ -42,6 +42,48 @@ ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList()
return command_lists_[current_queue_frame_]->GetCommandList();
}
void D3D12CommandProcessor::PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
D3D12_RESOURCE_STATES new_state, UINT subresource) {
if (old_state == new_state) {
return;
}
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = resource;
barrier.Transition.Subresource = subresource;
barrier.Transition.StateBefore = old_state;
barrier.Transition.StateAfter = new_state;
barriers_.push_back(barrier);
}
void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource,
ID3D12Resource* new_resource) {
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Aliasing.pResourceBefore = old_resource;
barrier.Aliasing.pResourceAfter = new_resource;
barriers_.push_back(barrier);
}
void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) {
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.UAV.pResource = resource;
barriers_.push_back(barrier);
}
void D3D12CommandProcessor::SubmitBarriers() {
UINT barrier_count = UINT(barriers_.size());
if (barrier_count != 0) {
GetCurrentCommandList()->ResourceBarrier(barrier_count, barriers_.data());
barriers_.clear();
}
}
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
assert_true(vertex_shader->is_translated());
@ -372,17 +414,8 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
}
if (size <= scratch_buffer_size_) {
if (scratch_buffer_state_ != state) {
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = scratch_buffer_;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = scratch_buffer_state_;
barrier.Transition.StateAfter = state;
GetCurrentCommandList()->ResourceBarrier(1, &barrier);
scratch_buffer_state_ = state;
}
PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state);
scratch_buffer_state_ = state;
scratch_buffer_used_ = true;
return scratch_buffer_;
}
@ -470,7 +503,7 @@ bool D3D12CommandProcessor::SetupContext() {
sampler_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048);
shared_memory_ = std::make_unique<SharedMemory>(memory_, context);
shared_memory_ = std::make_unique<SharedMemory>(this, memory_);
if (!shared_memory_->Initialize()) {
XELOGE("Failed to initialize shared memory");
return false;
@ -764,7 +797,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
}
shared_memory_->RequestRange(
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC, command_list);
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
}
if (indexed) {
@ -774,9 +807,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
: sizeof(uint16_t);
index_base &= ~(index_size - 1);
uint32_t index_buffer_size = index_buffer_info->count * index_size;
shared_memory_->RequestRange(index_base, index_buffer_size, command_list);
shared_memory_->RequestRange(index_base, index_buffer_size);
shared_memory_->UseForReading(command_list);
shared_memory_->UseForReading();
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
index_buffer_view.BufferLocation =
shared_memory_->GetGPUAddress() + index_base;
@ -785,9 +818,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
? DXGI_FORMAT_R32_UINT
: DXGI_FORMAT_R16_UINT;
command_list->IASetIndexBuffer(&index_buffer_view);
SubmitBarriers();
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0);
} else {
shared_memory_->UseForReading(command_list);
shared_memory_->UseForReading();
SubmitBarriers();
command_list->DrawInstanced(index_count, 1, 0, 0);
}
@ -873,6 +908,9 @@ bool D3D12CommandProcessor::EndFrame() {
shared_memory_->EndFrame();
// Submit barriers now because resources the queued barriers are for may be
// destroyed between frames.
SubmitBarriers();
command_lists_[current_queue_frame_]->Execute();
sampler_heap_pool_->EndFrame();

View File

@ -47,6 +47,15 @@ class D3D12CommandProcessor : public CommandProcessor {
// Returns the drawing command list for the currently open frame.
ID3D12GraphicsCommandList* GetCurrentCommandList() const;
void PushTransitionBarrier(
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
D3D12_RESOURCE_STATES new_state,
UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
void PushAliasingBarrier(ID3D12Resource* old_resource,
ID3D12Resource* new_resource);
void PushUAVBarrier(ID3D12Resource* resource);
void SubmitBarriers();
// Finds or creates root signature for a pipeline.
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader);
@ -176,6 +185,9 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
// Unsubmitted barrier batch.
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
struct BufferForDeletion {
ID3D12Resource* buffer;
uint64_t last_usage_frame;

View File

@ -578,9 +578,6 @@ bool RenderTargetCache::UpdateRenderTargets() {
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_RESOURCE_BARRIER barriers[5];
uint32_t barrier_count = 0;
// Allocate new render targets and add them to the bindings list.
for (uint32_t i = 0; i < 5; ++i) {
if (!(render_targets_to_attach & (1 << i))) {
@ -636,14 +633,8 @@ bool RenderTargetCache::UpdateRenderTargets() {
heap_usage[heap_page_first >> 3] += heap_page_count;
// Inform Direct3D that we're reusing the heap for this render target.
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Aliasing.pResourceBefore = nullptr;
barrier.Aliasing.pResourceAfter = binding.render_target->resource;
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
command_processor_->PushAliasingBarrier(nullptr,
binding.render_target->resource);
}
// Load the contents of the new render targets from the EDRAM buffer (will
@ -671,7 +662,6 @@ bool RenderTargetCache::UpdateRenderTargets() {
// Transition the render targets to the appropriate state if needed,
// compress the list of the render target because null RTV descriptors are
// broken in Direct3D 12 and bind the render targets to the command list.
barrier_count = 0;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
uint32_t rtv_count = 0;
for (uint32_t i = 0; i < 4; ++i) {
@ -680,17 +670,10 @@ bool RenderTargetCache::UpdateRenderTargets() {
if (!binding.is_bound || render_target == nullptr) {
continue;
}
if (render_target->state != D3D12_RESOURCE_STATE_RENDER_TARGET) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = render_target->resource;
barrier.Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = render_target->state;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
}
command_processor_->PushTransitionBarrier(
render_target->resource, render_target->state,
D3D12_RESOURCE_STATE_RENDER_TARGET);
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
rtv_handles[rtv_count] = render_target->handle;
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
current_pipeline_render_targets_[rtv_count].format =
@ -706,17 +689,10 @@ bool RenderTargetCache::UpdateRenderTargets() {
RenderTarget* depth_render_target = depth_binding.render_target;
current_pipeline_render_targets_[4].guest_render_target = 4;
if (depth_binding.is_bound && depth_render_target != nullptr) {
if (depth_render_target->state != D3D12_RESOURCE_STATE_DEPTH_WRITE) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = depth_render_target->resource;
barrier.Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = depth_render_target->state;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
}
command_processor_->PushTransitionBarrier(
depth_render_target->resource, depth_render_target->state,
D3D12_RESOURCE_STATE_DEPTH_WRITE);
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
dsv_handle = &depth_binding.render_target->handle;
current_pipeline_render_targets_[4].format =
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
@ -724,9 +700,7 @@ bool RenderTargetCache::UpdateRenderTargets() {
dsv_handle = nullptr;
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
}
command_processor_->SubmitBarriers();
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle);
}
@ -1282,17 +1256,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
return;
}
uint32_t store_bindings[5];
uint32_t store_binding_count = 0;
// 6 for 5 render targets + the EDRAM buffer.
D3D12_RESOURCE_BARRIER barriers[6];
uint32_t barrier_count;
// Extract only the render targets that need to be stored, transition them to
// copy sources and calculate copy buffer size.
uint32_t store_bindings[5];
uint32_t store_binding_count = 0;
uint32_t copy_buffer_size = 0;
barrier_count = 0;
for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i];
RenderTarget* render_target = binding.render_target;
@ -1300,38 +1268,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
binding.edram_dirty_rows < 0) {
continue;
}
store_bindings[store_binding_count] = i;
store_bindings[store_binding_count++] = i;
copy_buffer_size =
std::max(copy_buffer_size, render_target->copy_buffer_size);
++store_binding_count;
if (render_target->state != D3D12_RESOURCE_STATE_COPY_SOURCE) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = render_target->resource;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = render_target->state;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
}
}
if (store_binding_count == 0) {
return;
}
if (edram_buffer_state_ != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
// Also transition the EDRAM buffer to UAV.
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = edram_buffer_;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = edram_buffer_state_;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
}
// Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
@ -1349,6 +1292,21 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
return;
}
// Transition the render targets that need to be stored to copy sources and
// the EDRAM buffer to a UAV.
for (uint32_t i = 0; i < store_binding_count; ++i) {
RenderTarget* render_target =
current_bindings_[store_bindings[i]].render_target;
command_processor_->PushTransitionBarrier(render_target->resource,
render_target->state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
}
command_processor_->PushTransitionBarrier(
edram_buffer_, edram_buffer_state_,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
// Prepare for storing.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
@ -1410,6 +1368,12 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
rt_pitch_tiles *= 2;
}
// Transition the copy buffer to copy destination.
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_COPY_DEST);
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
command_processor_->SubmitBarriers();
// Copy from the render target planes and set up the layout.
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = render_target->resource;
@ -1440,16 +1404,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
}
// Transition the copy buffer to SRV.
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].Transition.pResource = copy_buffer;
barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
barriers[0].Transition.StateAfter =
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
command_processor_->PushTransitionBarrier(
copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
command_list->ResourceBarrier(1, barriers);
command_processor_->SubmitBarriers();
// Store the data.
command_list->SetComputeRoot32BitConstants(
@ -1459,24 +1418,8 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]);
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
// Commit the UAV write and prepare for copying again.
barrier_count = 1;
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].UAV.pResource = edram_buffer_;
if (i + 1 < store_binding_count) {
barrier_count = 2;
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[1].Transition.pResource = copy_buffer;
barriers[1].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[1].Transition.StateBefore =
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
}
command_list->ResourceBarrier(barrier_count, barriers);
// Commit the UAV write.
command_processor_->PushUAVBarrier(edram_buffer_);
}
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
@ -1495,45 +1438,6 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
return;
}
// 6 for 5 render targets + the EDRAM buffer.
D3D12_RESOURCE_BARRIER barriers[6];
uint32_t barrier_count;
// Transition the render targets to copy destinations and calculate copy
// buffer size.
uint32_t copy_buffer_size = 0;
barrier_count = 0;
for (uint32_t i = 0; i < render_target_count; ++i) {
RenderTarget* render_target = render_targets[i];
copy_buffer_size =
std::max(copy_buffer_size, render_target->copy_buffer_size);
if (render_target->state != D3D12_RESOURCE_STATE_COPY_DEST) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = render_target->resource;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = render_target->state;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
}
}
if (edram_buffer_state_ != D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) {
// Also transition the EDRAM buffer to SRV.
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = edram_buffer_;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = edram_buffer_state_;
barrier.Transition.StateAfter =
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
}
// Allocate descriptors for the buffers.
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
@ -1543,6 +1447,11 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
}
// Get the buffer for copying.
uint32_t copy_buffer_size = 0;
for (uint32_t i = 0; i < render_target_count; ++i) {
copy_buffer_size =
std::max(copy_buffer_size, render_targets[i]->copy_buffer_size);
}
D3D12_RESOURCE_STATES copy_buffer_state =
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
@ -1551,7 +1460,21 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
return;
}
// Prepare for loading.
// Transition the render targets to copy destinations and the EDRAM buffer to
// a SRV.
for (uint32_t i = 0; i < render_target_count; ++i) {
RenderTarget* render_target = render_targets[i];
command_processor_->PushTransitionBarrier(render_target->resource,
render_target->state,
D3D12_RESOURCE_STATE_COPY_DEST);
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
}
command_processor_->PushTransitionBarrier(
edram_buffer_, edram_buffer_state_,
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
// Set up the bindings.
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto descriptor_size_view = provider->GetDescriptorSizeView();
@ -1583,8 +1506,8 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
// Load each render target.
for (uint32_t i = 0; i < render_target_count; ++i) {
if (edram_bases[i] >= 2048) {
// Something is wrong with the resolve.
return;
// Something is wrong with the load.
continue;
}
const RenderTarget* render_target = render_targets[i];
@ -1595,8 +1518,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
ColorRenderTargetFormat(render_target->key.format))) {
edram_pitch_tiles *= 2;
}
// Validate the height in case the resolve is somehow too large (shouldn't
// happen though, but who knows what games do).
// Clamp the height if somehow requested a render target that is too large.
uint32_t edram_rows =
std::min(render_target->key.height_ss_div_16,
(2048u - edram_bases[i]) / edram_pitch_tiles);
@ -1605,19 +1527,12 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
}
// Transition the copy buffer back to UAV if it's not the first load.
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].Transition.pResource = copy_buffer;
barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = copy_buffer_state;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
command_list->ResourceBarrier(1, barriers);
}
command_processor_->PushTransitionBarrier(
copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
// Load the data.
command_processor_->SubmitBarriers();
EDRAMLoadStoreRootConstants root_constants;
root_constants.base_pitch_tiles =
edram_bases[i] | (edram_pitch_tiles << 11);
@ -1638,21 +1553,14 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]);
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
// Commit the UAV write and transition the copy buffer to copy source.
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[0].UAV.pResource = copy_buffer;
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[1].Transition.pResource = copy_buffer;
barriers[1].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
// Commit the UAV write and transition the copy buffer to copy source now.
command_processor_->PushUAVBarrier(copy_buffer);
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_list->ResourceBarrier(2, barriers);
// Copy to the render target planes.
command_processor_->SubmitBarriers();
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
location_source.pResource = copy_buffer;
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;

View File

@ -17,13 +17,15 @@
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
namespace xe {
namespace gpu {
namespace d3d12 {
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
: memory_(memory), context_(context) {
SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor,
Memory* memory)
: command_processor_(command_processor), memory_(memory) {
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
page_count_ = kBufferSize >> page_size_log2_;
uint32_t page_bitmap_length = page_count_ >> 6;
@ -36,7 +38,8 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
SharedMemory::~SharedMemory() { Shutdown(); }
bool SharedMemory::Initialize() {
auto device = context_->GetD3D12Provider()->GetDevice();
auto context = command_processor_->GetD3D12Context();
auto device = context->GetD3D12Provider()->GetDevice();
buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
D3D12_RESOURCE_DESC buffer_desc;
@ -80,7 +83,7 @@ bool SharedMemory::Initialize() {
protected_pages_.size() * sizeof(uint64_t));
upload_buffer_pool_ =
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
std::make_unique<ui::d3d12::UploadBufferPool>(context, 4 * 1024 * 1024);
memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this);
@ -219,7 +222,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
// current frame anymore if have failed at least once.
return false;
}
auto provider = context_->GetD3D12Provider();
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
auto device = provider->GetDevice();
auto direct_queue = provider->GetDirectQueue();
D3D12_HEAP_DESC heap_desc = {};
@ -254,8 +257,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
return true;
}
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
ID3D12GraphicsCommandList* command_list) {
bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
if (length == 0) {
// Some texture is empty, for example - safe to draw in this case.
return true;
@ -267,6 +269,11 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
}
uint32_t last = start + length - 1;
auto command_list = command_processor_->GetCurrentCommandList();
if (command_list == nullptr) {
return false;
}
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
@ -284,7 +291,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
if (upload_ranges_.size() == 0) {
return true;
}
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list);
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST);
command_processor_->SubmitBarriers();
for (auto upload_range : upload_ranges_) {
uint32_t upload_range_start = upload_range.first;
uint32_t upload_range_length = upload_range.second;
@ -505,33 +513,23 @@ bool SharedMemory::MemoryWriteCallback(uint32_t address) {
return true;
}
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
ID3D12GraphicsCommandList* command_list) {
if (buffer_state_ == new_state) {
return;
}
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buffer_;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = buffer_state_;
barrier.Transition.StateAfter = new_state;
command_list->ResourceBarrier(1, &barrier);
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state) {
command_processor_->PushTransitionBarrier(buffer_, buffer_state_, new_state);
buffer_state_ = new_state;
}
void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) {
void SharedMemory::UseForReading() {
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
command_list);
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
}
void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) {
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list);
void SharedMemory::UseForWriting() {
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
@ -540,11 +538,12 @@ void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
desc.Buffer.NumElements = kBufferSize >> 2;
desc.Buffer.StructureByteStride = 0;
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
context_->GetD3D12Provider()->GetDevice()->CreateShaderResourceView(
buffer_, &desc, handle);
device->CreateShaderResourceView(buffer_, &desc, handle);
}
void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
D3D12_UNORDERED_ACCESS_VIEW_DESC desc;
desc.Format = DXGI_FORMAT_R32_TYPELESS;
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
@ -553,8 +552,7 @@ void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
desc.Buffer.StructureByteStride = 0;
desc.Buffer.CounterOffsetInBytes = 0;
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
context_->GetD3D12Provider()->GetDevice()->CreateUnorderedAccessView(
buffer_, nullptr, &desc, handle);
device->CreateUnorderedAccessView(buffer_, nullptr, &desc, handle);
}
} // namespace d3d12

View File

@ -23,12 +23,14 @@ namespace xe {
namespace gpu {
namespace d3d12 {
class D3D12CommandProcessor;
// Manages memory for unconverted textures, resolve targets, vertex and index
// buffers that can be accessed from shaders with Xenon physical addresses, with
// system page size granularity.
class SharedMemory {
public:
SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context);
SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory);
~SharedMemory();
bool Initialize();
@ -39,8 +41,6 @@ class SharedMemory {
}
void BeginFrame();
// Returns true if anything has been written to command_list been done.
// The draw command list is needed for the transition.
void EndFrame();
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument);
@ -76,8 +76,7 @@ class SharedMemory {
// tiled buffer to copy destination - call this before UseForReading or
// UseForWriting. Returns true if the range has been fully updated and is
// usable.
bool RequestRange(uint32_t start, uint32_t length,
ID3D12GraphicsCommandList* command_list);
bool RequestRange(uint32_t start, uint32_t length);
// Marks the range as containing GPU-generated data (such as resolves),
// triggering modification callbacks, making it valid (so pages are not
@ -86,17 +85,17 @@ class SharedMemory {
void RangeWrittenByGPU(uint32_t start, uint32_t length);
// Makes the buffer usable for vertices, indices and texture untiling.
void UseForReading(ID3D12GraphicsCommandList* command_list);
void UseForReading();
// Makes the buffer usable for texture tiling after a resolve.
void UseForWriting(ID3D12GraphicsCommandList* command_list);
void UseForWriting();
void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
private:
Memory* memory_;
D3D12CommandProcessor* command_processor_;
ui::d3d12::D3D12Context* context_;
Memory* memory_;
// The 512 MB tiled buffer.
static constexpr uint32_t kBufferSizeLog2 = 29;
@ -204,8 +203,7 @@ class SharedMemory {
uint32_t request_page_count);
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
ID3D12GraphicsCommandList* command_list);
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
};
} // namespace d3d12

View File

@ -296,8 +296,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
}
// Transition the textures to the needed usage.
D3D12_RESOURCE_BARRIER barriers[32];
uint32_t barrier_count = 0;
used_texture_mask = used_vertex_texture_mask | used_pixel_texture_mask;
while (xe::bit_scan_forward(used_texture_mask, &index)) {
uint32_t index_bit = 1u << index;
@ -313,20 +311,9 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
if (used_pixel_texture_mask & index_bit) {
state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
}
if (texture->state != state) {
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count];
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = texture->resource;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barrier.Transition.StateBefore = texture->state;
barrier.Transition.StateAfter = state;
++barrier_count;
texture->state = state;
}
}
if (barrier_count != 0) {
command_list->ResourceBarrier(barrier_count, barriers);
command_processor_->PushTransitionBarrier(texture->resource, texture->state,
state);
texture->state = state;
}
}
@ -746,13 +733,13 @@ bool TextureCache::LoadTextureData(Texture* texture) {
// Request uploading of the texture data to the shared memory.
if (!base_in_sync) {
if (!shared_memory_->RequestRange(texture->key.base_page << 12,
texture->base_size, command_list)) {
texture->base_size)) {
return false;
}
}
if (!mips_in_sync) {
if (!shared_memory_->RequestRange(texture->key.mip_page << 12,
texture->mip_size, command_list)) {
texture->mip_size)) {
return false;
}
}
@ -791,7 +778,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
return false;
}
shared_memory_->UseForReading(command_list);
shared_memory_->UseForReading();
shared_memory_->CreateSRV(descriptor_cpu_start);
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
@ -811,19 +798,9 @@ bool TextureCache::LoadTextureData(Texture* texture) {
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Submit commands.
D3D12_RESOURCE_BARRIER barriers[2];
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) {
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Transition.pResource = texture->resource;
barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = texture->state;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
command_list->ResourceBarrier(1, barriers);
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
}
command_processor_->PushTransitionBarrier(texture->resource, texture->state,
D3D12_RESOURCE_STATE_COPY_DEST);
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
uint32_t mip_first = base_in_sync ? 1 : 0;
uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1;
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
@ -836,16 +813,9 @@ bool TextureCache::LoadTextureData(Texture* texture) {
copy_constants.guest_mip_offset[2] = 0;
}
for (uint32_t i = 0; i < slice_count; ++i) {
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[0].Transition.pResource = copy_buffer;
barriers[0].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[0].Transition.StateBefore = copy_buffer_state;
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
command_list->ResourceBarrier(1, barriers);
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
}
command_processor_->PushTransitionBarrier(
copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
for (uint32_t j = mip_first; j <= mip_last; ++j) {
if (j == 0) {
copy_constants.guest_base =
@ -885,21 +855,17 @@ bool TextureCache::LoadTextureData(Texture* texture) {
}
std::memcpy(cbuffer_mapping, &copy_constants, sizeof(copy_constants));
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
command_processor_->SubmitBarriers();
// Each thread group processes 32x32x1 blocks.
command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5,
(copy_constants.size_blocks[1] + 31) >> 5,
copy_constants.size_blocks[2]);
}
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barriers[0].UAV.pResource = copy_buffer;
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barriers[1].Transition.pResource = copy_buffer;
barriers[1].Transition.Subresource =
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_list->ResourceBarrier(2, barriers);
command_processor_->PushUAVBarrier(copy_buffer);
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
D3D12_RESOURCE_STATE_COPY_SOURCE);
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
command_processor_->SubmitBarriers();
UINT slice_first_subresource = i * resource_desc.MipLevels;
for (uint32_t j = mip_first; j <= mip_last; ++j) {
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;