[D3D12] Shorten and batch barriers
This commit is contained in:
parent
d204e9ba74
commit
bc4125584c
|
@ -42,6 +42,48 @@ ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList()
|
||||||
return command_lists_[current_queue_frame_]->GetCommandList();
|
return command_lists_[current_queue_frame_]->GetCommandList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::PushTransitionBarrier(
|
||||||
|
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||||
|
D3D12_RESOURCE_STATES new_state, UINT subresource) {
|
||||||
|
if (old_state == new_state) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
D3D12_RESOURCE_BARRIER barrier;
|
||||||
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||||
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barrier.Transition.pResource = resource;
|
||||||
|
barrier.Transition.Subresource = subresource;
|
||||||
|
barrier.Transition.StateBefore = old_state;
|
||||||
|
barrier.Transition.StateAfter = new_state;
|
||||||
|
barriers_.push_back(barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource,
|
||||||
|
ID3D12Resource* new_resource) {
|
||||||
|
D3D12_RESOURCE_BARRIER barrier;
|
||||||
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
||||||
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barrier.Aliasing.pResourceBefore = old_resource;
|
||||||
|
barrier.Aliasing.pResourceAfter = new_resource;
|
||||||
|
barriers_.push_back(barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) {
|
||||||
|
D3D12_RESOURCE_BARRIER barrier;
|
||||||
|
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
||||||
|
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||||
|
barrier.UAV.pResource = resource;
|
||||||
|
barriers_.push_back(barrier);
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::SubmitBarriers() {
|
||||||
|
UINT barrier_count = UINT(barriers_.size());
|
||||||
|
if (barrier_count != 0) {
|
||||||
|
GetCurrentCommandList()->ResourceBarrier(barrier_count, barriers_.data());
|
||||||
|
barriers_.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
||||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader) {
|
||||||
assert_true(vertex_shader->is_translated());
|
assert_true(vertex_shader->is_translated());
|
||||||
|
@ -372,17 +414,8 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size <= scratch_buffer_size_) {
|
if (size <= scratch_buffer_size_) {
|
||||||
if (scratch_buffer_state_ != state) {
|
PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state);
|
||||||
D3D12_RESOURCE_BARRIER barrier;
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = scratch_buffer_;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = scratch_buffer_state_;
|
|
||||||
barrier.Transition.StateAfter = state;
|
|
||||||
GetCurrentCommandList()->ResourceBarrier(1, &barrier);
|
|
||||||
scratch_buffer_state_ = state;
|
scratch_buffer_state_ = state;
|
||||||
}
|
|
||||||
scratch_buffer_used_ = true;
|
scratch_buffer_used_ = true;
|
||||||
return scratch_buffer_;
|
return scratch_buffer_;
|
||||||
}
|
}
|
||||||
|
@ -470,7 +503,7 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
sampler_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
|
sampler_heap_pool_ = std::make_unique<ui::d3d12::DescriptorHeapPool>(
|
||||||
context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048);
|
context, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 2048);
|
||||||
|
|
||||||
shared_memory_ = std::make_unique<SharedMemory>(memory_, context);
|
shared_memory_ = std::make_unique<SharedMemory>(this, memory_);
|
||||||
if (!shared_memory_->Initialize()) {
|
if (!shared_memory_->Initialize()) {
|
||||||
XELOGE("Failed to initialize shared memory");
|
XELOGE("Failed to initialize shared memory");
|
||||||
return false;
|
return false;
|
||||||
|
@ -764,7 +797,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
shared_memory_->RequestRange(
|
shared_memory_->RequestRange(
|
||||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC, command_list);
|
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||||
}
|
}
|
||||||
if (indexed) {
|
if (indexed) {
|
||||||
|
@ -774,9 +807,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
: sizeof(uint16_t);
|
: sizeof(uint16_t);
|
||||||
index_base &= ~(index_size - 1);
|
index_base &= ~(index_size - 1);
|
||||||
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
||||||
shared_memory_->RequestRange(index_base, index_buffer_size, command_list);
|
shared_memory_->RequestRange(index_base, index_buffer_size);
|
||||||
|
|
||||||
shared_memory_->UseForReading(command_list);
|
shared_memory_->UseForReading();
|
||||||
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
||||||
index_buffer_view.BufferLocation =
|
index_buffer_view.BufferLocation =
|
||||||
shared_memory_->GetGPUAddress() + index_base;
|
shared_memory_->GetGPUAddress() + index_base;
|
||||||
|
@ -785,9 +818,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
? DXGI_FORMAT_R32_UINT
|
? DXGI_FORMAT_R32_UINT
|
||||||
: DXGI_FORMAT_R16_UINT;
|
: DXGI_FORMAT_R16_UINT;
|
||||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||||
|
SubmitBarriers();
|
||||||
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0);
|
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0);
|
||||||
} else {
|
} else {
|
||||||
shared_memory_->UseForReading(command_list);
|
shared_memory_->UseForReading();
|
||||||
|
SubmitBarriers();
|
||||||
command_list->DrawInstanced(index_count, 1, 0, 0);
|
command_list->DrawInstanced(index_count, 1, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -873,6 +908,9 @@ bool D3D12CommandProcessor::EndFrame() {
|
||||||
|
|
||||||
shared_memory_->EndFrame();
|
shared_memory_->EndFrame();
|
||||||
|
|
||||||
|
// Submit barriers now because resources the queued barriers are for may be
|
||||||
|
// destroyed between frames.
|
||||||
|
SubmitBarriers();
|
||||||
command_lists_[current_queue_frame_]->Execute();
|
command_lists_[current_queue_frame_]->Execute();
|
||||||
|
|
||||||
sampler_heap_pool_->EndFrame();
|
sampler_heap_pool_->EndFrame();
|
||||||
|
|
|
@ -47,6 +47,15 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
// Returns the drawing command list for the currently open frame.
|
// Returns the drawing command list for the currently open frame.
|
||||||
ID3D12GraphicsCommandList* GetCurrentCommandList() const;
|
ID3D12GraphicsCommandList* GetCurrentCommandList() const;
|
||||||
|
|
||||||
|
void PushTransitionBarrier(
|
||||||
|
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
||||||
|
D3D12_RESOURCE_STATES new_state,
|
||||||
|
UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
|
||||||
|
void PushAliasingBarrier(ID3D12Resource* old_resource,
|
||||||
|
ID3D12Resource* new_resource);
|
||||||
|
void PushUAVBarrier(ID3D12Resource* resource);
|
||||||
|
void SubmitBarriers();
|
||||||
|
|
||||||
// Finds or creates root signature for a pipeline.
|
// Finds or creates root signature for a pipeline.
|
||||||
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
||||||
const D3D12Shader* pixel_shader);
|
const D3D12Shader* pixel_shader);
|
||||||
|
@ -176,6 +185,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
||||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
||||||
|
|
||||||
|
// Unsubmitted barrier batch.
|
||||||
|
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
|
||||||
|
|
||||||
struct BufferForDeletion {
|
struct BufferForDeletion {
|
||||||
ID3D12Resource* buffer;
|
ID3D12Resource* buffer;
|
||||||
uint64_t last_usage_frame;
|
uint64_t last_usage_frame;
|
||||||
|
|
|
@ -578,9 +578,6 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
auto device =
|
auto device =
|
||||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
|
||||||
D3D12_RESOURCE_BARRIER barriers[5];
|
|
||||||
uint32_t barrier_count = 0;
|
|
||||||
|
|
||||||
// Allocate new render targets and add them to the bindings list.
|
// Allocate new render targets and add them to the bindings list.
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
if (!(render_targets_to_attach & (1 << i))) {
|
if (!(render_targets_to_attach & (1 << i))) {
|
||||||
|
@ -636,14 +633,8 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
heap_usage[heap_page_first >> 3] += heap_page_count;
|
heap_usage[heap_page_first >> 3] += heap_page_count;
|
||||||
|
|
||||||
// Inform Direct3D that we're reusing the heap for this render target.
|
// Inform Direct3D that we're reusing the heap for this render target.
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
command_processor_->PushAliasingBarrier(nullptr,
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING;
|
binding.render_target->resource);
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Aliasing.pResourceBefore = nullptr;
|
|
||||||
barrier.Aliasing.pResourceAfter = binding.render_target->resource;
|
|
||||||
}
|
|
||||||
if (barrier_count != 0) {
|
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the contents of the new render targets from the EDRAM buffer (will
|
// Load the contents of the new render targets from the EDRAM buffer (will
|
||||||
|
@ -671,7 +662,6 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
// Transition the render targets to the appropriate state if needed,
|
// Transition the render targets to the appropriate state if needed,
|
||||||
// compress the list of the render target because null RTV descriptors are
|
// compress the list of the render target because null RTV descriptors are
|
||||||
// broken in Direct3D 12 and bind the render targets to the command list.
|
// broken in Direct3D 12 and bind the render targets to the command list.
|
||||||
barrier_count = 0;
|
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[4];
|
||||||
uint32_t rtv_count = 0;
|
uint32_t rtv_count = 0;
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
@ -680,17 +670,10 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
if (!binding.is_bound || render_target == nullptr) {
|
if (!binding.is_bound || render_target == nullptr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (render_target->state != D3D12_RESOURCE_STATE_RENDER_TARGET) {
|
command_processor_->PushTransitionBarrier(
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
render_target->resource, render_target->state,
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = render_target->resource;
|
|
||||||
barrier.Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = render_target->state;
|
|
||||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
||||||
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
render_target->state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||||
}
|
|
||||||
rtv_handles[rtv_count] = render_target->handle;
|
rtv_handles[rtv_count] = render_target->handle;
|
||||||
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
|
current_pipeline_render_targets_[rtv_count].guest_render_target = i;
|
||||||
current_pipeline_render_targets_[rtv_count].format =
|
current_pipeline_render_targets_[rtv_count].format =
|
||||||
|
@ -706,17 +689,10 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
RenderTarget* depth_render_target = depth_binding.render_target;
|
RenderTarget* depth_render_target = depth_binding.render_target;
|
||||||
current_pipeline_render_targets_[4].guest_render_target = 4;
|
current_pipeline_render_targets_[4].guest_render_target = 4;
|
||||||
if (depth_binding.is_bound && depth_render_target != nullptr) {
|
if (depth_binding.is_bound && depth_render_target != nullptr) {
|
||||||
if (depth_render_target->state != D3D12_RESOURCE_STATE_DEPTH_WRITE) {
|
command_processor_->PushTransitionBarrier(
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
depth_render_target->resource, depth_render_target->state,
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = depth_render_target->resource;
|
|
||||||
barrier.Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = depth_render_target->state;
|
|
||||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
|
||||||
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
depth_render_target->state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||||
}
|
|
||||||
dsv_handle = &depth_binding.render_target->handle;
|
dsv_handle = &depth_binding.render_target->handle;
|
||||||
current_pipeline_render_targets_[4].format =
|
current_pipeline_render_targets_[4].format =
|
||||||
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
|
GetDepthDXGIFormat(DepthRenderTargetFormat(formats[4]));
|
||||||
|
@ -724,9 +700,7 @@ bool RenderTargetCache::UpdateRenderTargets() {
|
||||||
dsv_handle = nullptr;
|
dsv_handle = nullptr;
|
||||||
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
|
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
|
||||||
}
|
}
|
||||||
if (barrier_count != 0) {
|
command_processor_->SubmitBarriers();
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
|
||||||
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle);
|
command_list->OMSetRenderTargets(rtv_count, rtv_handles, FALSE, dsv_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1282,17 +1256,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t store_bindings[5];
|
|
||||||
uint32_t store_binding_count = 0;
|
|
||||||
|
|
||||||
// 6 for 5 render targets + the EDRAM buffer.
|
|
||||||
D3D12_RESOURCE_BARRIER barriers[6];
|
|
||||||
uint32_t barrier_count;
|
|
||||||
|
|
||||||
// Extract only the render targets that need to be stored, transition them to
|
// Extract only the render targets that need to be stored, transition them to
|
||||||
// copy sources and calculate copy buffer size.
|
// copy sources and calculate copy buffer size.
|
||||||
|
uint32_t store_bindings[5];
|
||||||
|
uint32_t store_binding_count = 0;
|
||||||
uint32_t copy_buffer_size = 0;
|
uint32_t copy_buffer_size = 0;
|
||||||
barrier_count = 0;
|
|
||||||
for (uint32_t i = 0; i < 5; ++i) {
|
for (uint32_t i = 0; i < 5; ++i) {
|
||||||
const RenderTargetBinding& binding = current_bindings_[i];
|
const RenderTargetBinding& binding = current_bindings_[i];
|
||||||
RenderTarget* render_target = binding.render_target;
|
RenderTarget* render_target = binding.render_target;
|
||||||
|
@ -1300,38 +1268,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
binding.edram_dirty_rows < 0) {
|
binding.edram_dirty_rows < 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
store_bindings[store_binding_count] = i;
|
store_bindings[store_binding_count++] = i;
|
||||||
copy_buffer_size =
|
copy_buffer_size =
|
||||||
std::max(copy_buffer_size, render_target->copy_buffer_size);
|
std::max(copy_buffer_size, render_target->copy_buffer_size);
|
||||||
++store_binding_count;
|
|
||||||
if (render_target->state != D3D12_RESOURCE_STATE_COPY_SOURCE) {
|
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = render_target->resource;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = render_target->state;
|
|
||||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
||||||
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (store_binding_count == 0) {
|
if (store_binding_count == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (edram_buffer_state_ != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
|
||||||
// Also transition the EDRAM buffer to UAV.
|
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = edram_buffer_;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = edram_buffer_state_;
|
|
||||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
}
|
|
||||||
if (barrier_count != 0) {
|
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate descriptors for the buffers.
|
// Allocate descriptors for the buffers.
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||||
|
@ -1349,6 +1292,21 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Transition the render targets that need to be stored to copy sources and
|
||||||
|
// the EDRAM buffer to a UAV.
|
||||||
|
for (uint32_t i = 0; i < store_binding_count; ++i) {
|
||||||
|
RenderTarget* render_target =
|
||||||
|
current_bindings_[store_bindings[i]].render_target;
|
||||||
|
command_processor_->PushTransitionBarrier(render_target->resource,
|
||||||
|
render_target->state,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||||
|
render_target->state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
}
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
edram_buffer_, edram_buffer_state_,
|
||||||
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
|
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
|
||||||
// Prepare for storing.
|
// Prepare for storing.
|
||||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
auto device = provider->GetDevice();
|
auto device = provider->GetDevice();
|
||||||
|
@ -1410,6 +1368,12 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
rt_pitch_tiles *= 2;
|
rt_pitch_tiles *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Transition the copy buffer to copy destination.
|
||||||
|
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
|
||||||
// Copy from the render target planes and set up the layout.
|
// Copy from the render target planes and set up the layout.
|
||||||
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||||
location_source.pResource = render_target->resource;
|
location_source.pResource = render_target->resource;
|
||||||
|
@ -1440,16 +1404,11 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transition the copy buffer to SRV.
|
// Transition the copy buffer to SRV.
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
command_processor_->PushTransitionBarrier(
|
||||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
copy_buffer, copy_buffer_state,
|
||||||
barriers[0].Transition.pResource = copy_buffer;
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||||
barriers[0].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
barriers[0].Transition.StateAfter =
|
|
||||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
||||||
command_list->ResourceBarrier(1, barriers);
|
command_processor_->SubmitBarriers();
|
||||||
|
|
||||||
// Store the data.
|
// Store the data.
|
||||||
command_list->SetComputeRoot32BitConstants(
|
command_list->SetComputeRoot32BitConstants(
|
||||||
|
@ -1459,24 +1418,8 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
||||||
command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]);
|
command_processor_->SetPipeline(edram_store_pipelines_[size_t(mode)]);
|
||||||
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
|
command_list->Dispatch(rt_pitch_tiles, binding.edram_dirty_rows, 1);
|
||||||
|
|
||||||
// Commit the UAV write and prepare for copying again.
|
// Commit the UAV write.
|
||||||
barrier_count = 1;
|
command_processor_->PushUAVBarrier(edram_buffer_);
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
||||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barriers[0].UAV.pResource = edram_buffer_;
|
|
||||||
if (i + 1 < store_binding_count) {
|
|
||||||
barrier_count = 2;
|
|
||||||
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barriers[1].Transition.pResource = copy_buffer;
|
|
||||||
barriers[1].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[1].Transition.StateBefore =
|
|
||||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
||||||
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
}
|
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||||
|
@ -1495,45 +1438,6 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6 for 5 render targets + the EDRAM buffer.
|
|
||||||
D3D12_RESOURCE_BARRIER barriers[6];
|
|
||||||
uint32_t barrier_count;
|
|
||||||
|
|
||||||
// Transition the render targets to copy destinations and calculate copy
|
|
||||||
// buffer size.
|
|
||||||
uint32_t copy_buffer_size = 0;
|
|
||||||
barrier_count = 0;
|
|
||||||
for (uint32_t i = 0; i < render_target_count; ++i) {
|
|
||||||
RenderTarget* render_target = render_targets[i];
|
|
||||||
copy_buffer_size =
|
|
||||||
std::max(copy_buffer_size, render_target->copy_buffer_size);
|
|
||||||
if (render_target->state != D3D12_RESOURCE_STATE_COPY_DEST) {
|
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = render_target->resource;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = render_target->state;
|
|
||||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (edram_buffer_state_ != D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) {
|
|
||||||
// Also transition the EDRAM buffer to SRV.
|
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count++];
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = edram_buffer_;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = edram_buffer_state_;
|
|
||||||
barrier.Transition.StateAfter =
|
|
||||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
||||||
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
|
||||||
}
|
|
||||||
if (barrier_count != 0) {
|
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate descriptors for the buffers.
|
// Allocate descriptors for the buffers.
|
||||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||||
|
@ -1543,6 +1447,11 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the buffer for copying.
|
// Get the buffer for copying.
|
||||||
|
uint32_t copy_buffer_size = 0;
|
||||||
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
||||||
|
copy_buffer_size =
|
||||||
|
std::max(copy_buffer_size, render_targets[i]->copy_buffer_size);
|
||||||
|
}
|
||||||
D3D12_RESOURCE_STATES copy_buffer_state =
|
D3D12_RESOURCE_STATES copy_buffer_state =
|
||||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
|
ID3D12Resource* copy_buffer = command_processor_->RequestScratchGPUBuffer(
|
||||||
|
@ -1551,7 +1460,21 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare for loading.
|
// Transition the render targets to copy destinations and the EDRAM buffer to
|
||||||
|
// a SRV.
|
||||||
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
||||||
|
RenderTarget* render_target = render_targets[i];
|
||||||
|
command_processor_->PushTransitionBarrier(render_target->resource,
|
||||||
|
render_target->state,
|
||||||
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
|
render_target->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
|
}
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
edram_buffer_, edram_buffer_state_,
|
||||||
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||||
|
edram_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
|
||||||
|
|
||||||
|
// Set up the bindings.
|
||||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
auto device = provider->GetDevice();
|
auto device = provider->GetDevice();
|
||||||
auto descriptor_size_view = provider->GetDescriptorSizeView();
|
auto descriptor_size_view = provider->GetDescriptorSizeView();
|
||||||
|
@ -1583,8 +1506,8 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
// Load each render target.
|
// Load each render target.
|
||||||
for (uint32_t i = 0; i < render_target_count; ++i) {
|
for (uint32_t i = 0; i < render_target_count; ++i) {
|
||||||
if (edram_bases[i] >= 2048) {
|
if (edram_bases[i] >= 2048) {
|
||||||
// Something is wrong with the resolve.
|
// Something is wrong with the load.
|
||||||
return;
|
continue;
|
||||||
}
|
}
|
||||||
const RenderTarget* render_target = render_targets[i];
|
const RenderTarget* render_target = render_targets[i];
|
||||||
|
|
||||||
|
@ -1595,8 +1518,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
ColorRenderTargetFormat(render_target->key.format))) {
|
ColorRenderTargetFormat(render_target->key.format))) {
|
||||||
edram_pitch_tiles *= 2;
|
edram_pitch_tiles *= 2;
|
||||||
}
|
}
|
||||||
// Validate the height in case the resolve is somehow too large (shouldn't
|
// Clamp the height if somehow requested a render target that is too large.
|
||||||
// happen though, but who knows what games do).
|
|
||||||
uint32_t edram_rows =
|
uint32_t edram_rows =
|
||||||
std::min(render_target->key.height_ss_div_16,
|
std::min(render_target->key.height_ss_div_16,
|
||||||
(2048u - edram_bases[i]) / edram_pitch_tiles);
|
(2048u - edram_bases[i]) / edram_pitch_tiles);
|
||||||
|
@ -1605,19 +1527,12 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transition the copy buffer back to UAV if it's not the first load.
|
// Transition the copy buffer back to UAV if it's not the first load.
|
||||||
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
command_processor_->PushTransitionBarrier(
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barriers[0].Transition.pResource = copy_buffer;
|
|
||||||
barriers[0].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[0].Transition.StateBefore = copy_buffer_state;
|
|
||||||
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
command_list->ResourceBarrier(1, barriers);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the data.
|
// Load the data.
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
EDRAMLoadStoreRootConstants root_constants;
|
EDRAMLoadStoreRootConstants root_constants;
|
||||||
root_constants.base_pitch_tiles =
|
root_constants.base_pitch_tiles =
|
||||||
edram_bases[i] | (edram_pitch_tiles << 11);
|
edram_bases[i] | (edram_pitch_tiles << 11);
|
||||||
|
@ -1638,21 +1553,14 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
||||||
command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]);
|
command_processor_->SetPipeline(edram_load_pipelines_[size_t(mode)]);
|
||||||
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
|
command_list->Dispatch(edram_pitch_tiles, edram_rows, 1);
|
||||||
|
|
||||||
// Commit the UAV write and transition the copy buffer to copy source.
|
// Commit the UAV write and transition the copy buffer to copy source now.
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
command_processor_->PushUAVBarrier(copy_buffer);
|
||||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
||||||
barriers[0].UAV.pResource = copy_buffer;
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||||
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barriers[1].Transition.pResource = copy_buffer;
|
|
||||||
barriers[1].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
command_list->ResourceBarrier(2, barriers);
|
|
||||||
|
|
||||||
// Copy to the render target planes.
|
// Copy to the render target planes.
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||||
location_source.pResource = copy_buffer;
|
location_source.pResource = copy_buffer;
|
||||||
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||||
|
|
|
@ -17,13 +17,15 @@
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/base/memory.h"
|
#include "xenia/base/memory.h"
|
||||||
#include "xenia/base/profiling.h"
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
||||||
SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor,
|
||||||
: memory_(memory), context_(context) {
|
Memory* memory)
|
||||||
|
: command_processor_(command_processor), memory_(memory) {
|
||||||
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
page_size_log2_ = xe::log2_ceil(uint32_t(xe::memory::page_size()));
|
||||||
page_count_ = kBufferSize >> page_size_log2_;
|
page_count_ = kBufferSize >> page_size_log2_;
|
||||||
uint32_t page_bitmap_length = page_count_ >> 6;
|
uint32_t page_bitmap_length = page_count_ >> 6;
|
||||||
|
@ -36,7 +38,8 @@ SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context)
|
||||||
SharedMemory::~SharedMemory() { Shutdown(); }
|
SharedMemory::~SharedMemory() { Shutdown(); }
|
||||||
|
|
||||||
bool SharedMemory::Initialize() {
|
bool SharedMemory::Initialize() {
|
||||||
auto device = context_->GetD3D12Provider()->GetDevice();
|
auto context = command_processor_->GetD3D12Context();
|
||||||
|
auto device = context->GetD3D12Provider()->GetDevice();
|
||||||
|
|
||||||
buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
D3D12_RESOURCE_DESC buffer_desc;
|
D3D12_RESOURCE_DESC buffer_desc;
|
||||||
|
@ -80,7 +83,7 @@ bool SharedMemory::Initialize() {
|
||||||
protected_pages_.size() * sizeof(uint64_t));
|
protected_pages_.size() * sizeof(uint64_t));
|
||||||
|
|
||||||
upload_buffer_pool_ =
|
upload_buffer_pool_ =
|
||||||
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
|
std::make_unique<ui::d3d12::UploadBufferPool>(context, 4 * 1024 * 1024);
|
||||||
|
|
||||||
memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this);
|
memory_->SetGlobalPhysicalAccessWatch(MemoryWriteCallbackThunk, this);
|
||||||
|
|
||||||
|
@ -219,7 +222,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
|
||||||
// current frame anymore if have failed at least once.
|
// current frame anymore if have failed at least once.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto provider = context_->GetD3D12Provider();
|
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||||
auto device = provider->GetDevice();
|
auto device = provider->GetDevice();
|
||||||
auto direct_queue = provider->GetDirectQueue();
|
auto direct_queue = provider->GetDirectQueue();
|
||||||
D3D12_HEAP_DESC heap_desc = {};
|
D3D12_HEAP_DESC heap_desc = {};
|
||||||
|
@ -254,8 +257,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
||||||
ID3D12GraphicsCommandList* command_list) {
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
// Some texture is empty, for example - safe to draw in this case.
|
// Some texture is empty, for example - safe to draw in this case.
|
||||||
return true;
|
return true;
|
||||||
|
@ -267,6 +269,11 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||||
}
|
}
|
||||||
uint32_t last = start + length - 1;
|
uint32_t last = start + length - 1;
|
||||||
|
|
||||||
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
|
if (command_list == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
@ -284,7 +291,8 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length,
|
||||||
if (upload_ranges_.size() == 0) {
|
if (upload_ranges_.size() == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST, command_list);
|
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
for (auto upload_range : upload_ranges_) {
|
for (auto upload_range : upload_ranges_) {
|
||||||
uint32_t upload_range_start = upload_range.first;
|
uint32_t upload_range_start = upload_range.first;
|
||||||
uint32_t upload_range_length = upload_range.second;
|
uint32_t upload_range_length = upload_range.second;
|
||||||
|
@ -505,33 +513,23 @@ bool SharedMemory::MemoryWriteCallback(uint32_t address) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
void SharedMemory::TransitionBuffer(D3D12_RESOURCE_STATES new_state) {
|
||||||
ID3D12GraphicsCommandList* command_list) {
|
command_processor_->PushTransitionBarrier(buffer_, buffer_state_, new_state);
|
||||||
if (buffer_state_ == new_state) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
D3D12_RESOURCE_BARRIER barrier;
|
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = buffer_;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = buffer_state_;
|
|
||||||
barrier.Transition.StateAfter = new_state;
|
|
||||||
command_list->ResourceBarrier(1, &barrier);
|
|
||||||
buffer_state_ = new_state;
|
buffer_state_ = new_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::UseForReading(ID3D12GraphicsCommandList* command_list) {
|
void SharedMemory::UseForReading() {
|
||||||
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
|
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
|
||||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||||
command_list);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::UseForWriting(ID3D12GraphicsCommandList* command_list) {
|
void SharedMemory::UseForWriting() {
|
||||||
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, command_list);
|
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
||||||
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||||
|
@ -540,11 +538,12 @@ void SharedMemory::CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
desc.Buffer.NumElements = kBufferSize >> 2;
|
desc.Buffer.NumElements = kBufferSize >> 2;
|
||||||
desc.Buffer.StructureByteStride = 0;
|
desc.Buffer.StructureByteStride = 0;
|
||||||
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
||||||
context_->GetD3D12Provider()->GetDevice()->CreateShaderResourceView(
|
device->CreateShaderResourceView(buffer_, &desc, handle);
|
||||||
buffer_, &desc, handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
D3D12_UNORDERED_ACCESS_VIEW_DESC desc;
|
D3D12_UNORDERED_ACCESS_VIEW_DESC desc;
|
||||||
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||||
|
@ -553,8 +552,7 @@ void SharedMemory::CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||||
desc.Buffer.StructureByteStride = 0;
|
desc.Buffer.StructureByteStride = 0;
|
||||||
desc.Buffer.CounterOffsetInBytes = 0;
|
desc.Buffer.CounterOffsetInBytes = 0;
|
||||||
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||||
context_->GetD3D12Provider()->GetDevice()->CreateUnorderedAccessView(
|
device->CreateUnorderedAccessView(buffer_, nullptr, &desc, handle);
|
||||||
buffer_, nullptr, &desc, handle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -23,12 +23,14 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
||||||
|
class D3D12CommandProcessor;
|
||||||
|
|
||||||
// Manages memory for unconverted textures, resolve targets, vertex and index
|
// Manages memory for unconverted textures, resolve targets, vertex and index
|
||||||
// buffers that can be accessed from shaders with Xenon physical addresses, with
|
// buffers that can be accessed from shaders with Xenon physical addresses, with
|
||||||
// system page size granularity.
|
// system page size granularity.
|
||||||
class SharedMemory {
|
class SharedMemory {
|
||||||
public:
|
public:
|
||||||
SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context);
|
SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory);
|
||||||
~SharedMemory();
|
~SharedMemory();
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
@ -39,8 +41,6 @@ class SharedMemory {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BeginFrame();
|
void BeginFrame();
|
||||||
// Returns true if anything has been written to command_list been done.
|
|
||||||
// The draw command list is needed for the transition.
|
|
||||||
void EndFrame();
|
void EndFrame();
|
||||||
|
|
||||||
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument);
|
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument);
|
||||||
|
@ -76,8 +76,7 @@ class SharedMemory {
|
||||||
// tiled buffer to copy destination - call this before UseForReading or
|
// tiled buffer to copy destination - call this before UseForReading or
|
||||||
// UseForWriting. Returns true if the range has been fully updated and is
|
// UseForWriting. Returns true if the range has been fully updated and is
|
||||||
// usable.
|
// usable.
|
||||||
bool RequestRange(uint32_t start, uint32_t length,
|
bool RequestRange(uint32_t start, uint32_t length);
|
||||||
ID3D12GraphicsCommandList* command_list);
|
|
||||||
|
|
||||||
// Marks the range as containing GPU-generated data (such as resolves),
|
// Marks the range as containing GPU-generated data (such as resolves),
|
||||||
// triggering modification callbacks, making it valid (so pages are not
|
// triggering modification callbacks, making it valid (so pages are not
|
||||||
|
@ -86,17 +85,17 @@ class SharedMemory {
|
||||||
void RangeWrittenByGPU(uint32_t start, uint32_t length);
|
void RangeWrittenByGPU(uint32_t start, uint32_t length);
|
||||||
|
|
||||||
// Makes the buffer usable for vertices, indices and texture untiling.
|
// Makes the buffer usable for vertices, indices and texture untiling.
|
||||||
void UseForReading(ID3D12GraphicsCommandList* command_list);
|
void UseForReading();
|
||||||
// Makes the buffer usable for texture tiling after a resolve.
|
// Makes the buffer usable for texture tiling after a resolve.
|
||||||
void UseForWriting(ID3D12GraphicsCommandList* command_list);
|
void UseForWriting();
|
||||||
|
|
||||||
void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
void CreateSRV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
void CreateUAV(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Memory* memory_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
|
|
||||||
ui::d3d12::D3D12Context* context_;
|
Memory* memory_;
|
||||||
|
|
||||||
// The 512 MB tiled buffer.
|
// The 512 MB tiled buffer.
|
||||||
static constexpr uint32_t kBufferSizeLog2 = 29;
|
static constexpr uint32_t kBufferSizeLog2 = 29;
|
||||||
|
@ -204,8 +203,7 @@ class SharedMemory {
|
||||||
uint32_t request_page_count);
|
uint32_t request_page_count);
|
||||||
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
||||||
|
|
||||||
void TransitionBuffer(D3D12_RESOURCE_STATES new_state,
|
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
|
||||||
ID3D12GraphicsCommandList* command_list);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -296,8 +296,6 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transition the textures to the needed usage.
|
// Transition the textures to the needed usage.
|
||||||
D3D12_RESOURCE_BARRIER barriers[32];
|
|
||||||
uint32_t barrier_count = 0;
|
|
||||||
used_texture_mask = used_vertex_texture_mask | used_pixel_texture_mask;
|
used_texture_mask = used_vertex_texture_mask | used_pixel_texture_mask;
|
||||||
while (xe::bit_scan_forward(used_texture_mask, &index)) {
|
while (xe::bit_scan_forward(used_texture_mask, &index)) {
|
||||||
uint32_t index_bit = 1u << index;
|
uint32_t index_bit = 1u << index;
|
||||||
|
@ -313,21 +311,10 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
if (used_pixel_texture_mask & index_bit) {
|
if (used_pixel_texture_mask & index_bit) {
|
||||||
state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
state |= D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
|
||||||
}
|
}
|
||||||
if (texture->state != state) {
|
command_processor_->PushTransitionBarrier(texture->resource, texture->state,
|
||||||
D3D12_RESOURCE_BARRIER& barrier = barriers[barrier_count];
|
state);
|
||||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
barrier.Transition.pResource = texture->resource;
|
|
||||||
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barrier.Transition.StateBefore = texture->state;
|
|
||||||
barrier.Transition.StateAfter = state;
|
|
||||||
++barrier_count;
|
|
||||||
texture->state = state;
|
texture->state = state;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (barrier_count != 0) {
|
|
||||||
command_list->ResourceBarrier(barrier_count, barriers);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::WriteTextureSRV(uint32_t fetch_constant,
|
void TextureCache::WriteTextureSRV(uint32_t fetch_constant,
|
||||||
|
@ -746,13 +733,13 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
// Request uploading of the texture data to the shared memory.
|
// Request uploading of the texture data to the shared memory.
|
||||||
if (!base_in_sync) {
|
if (!base_in_sync) {
|
||||||
if (!shared_memory_->RequestRange(texture->key.base_page << 12,
|
if (!shared_memory_->RequestRange(texture->key.base_page << 12,
|
||||||
texture->base_size, command_list)) {
|
texture->base_size)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!mips_in_sync) {
|
if (!mips_in_sync) {
|
||||||
if (!shared_memory_->RequestRange(texture->key.mip_page << 12,
|
if (!shared_memory_->RequestRange(texture->key.mip_page << 12,
|
||||||
texture->mip_size, command_list)) {
|
texture->mip_size)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -791,7 +778,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
shared_memory_->UseForReading(command_list);
|
shared_memory_->UseForReading();
|
||||||
shared_memory_->CreateSRV(descriptor_cpu_start);
|
shared_memory_->CreateSRV(descriptor_cpu_start);
|
||||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||||
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
|
@ -811,19 +798,9 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
|
||||||
// Submit commands.
|
// Submit commands.
|
||||||
D3D12_RESOURCE_BARRIER barriers[2];
|
command_processor_->PushTransitionBarrier(texture->resource, texture->state,
|
||||||
barriers[0].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
D3D12_RESOURCE_STATE_COPY_DEST);
|
||||||
barriers[1].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
|
||||||
if (texture->state != D3D12_RESOURCE_STATE_COPY_DEST) {
|
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
||||||
barriers[0].Transition.pResource = texture->resource;
|
|
||||||
barriers[0].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[0].Transition.StateBefore = texture->state;
|
|
||||||
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
||||||
command_list->ResourceBarrier(1, barriers);
|
|
||||||
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
texture->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||||
}
|
|
||||||
uint32_t mip_first = base_in_sync ? 1 : 0;
|
uint32_t mip_first = base_in_sync ? 1 : 0;
|
||||||
uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1;
|
uint32_t mip_last = mips_in_sync ? 0 : resource_desc.MipLevels - 1;
|
||||||
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
|
auto cbuffer_pool = command_processor_->GetConstantBufferPool();
|
||||||
|
@ -836,16 +813,9 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
copy_constants.guest_mip_offset[2] = 0;
|
copy_constants.guest_mip_offset[2] = 0;
|
||||||
}
|
}
|
||||||
for (uint32_t i = 0; i < slice_count; ++i) {
|
for (uint32_t i = 0; i < slice_count; ++i) {
|
||||||
if (copy_buffer_state != D3D12_RESOURCE_STATE_UNORDERED_ACCESS) {
|
command_processor_->PushTransitionBarrier(
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
barriers[0].Transition.pResource = copy_buffer;
|
|
||||||
barriers[0].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[0].Transition.StateBefore = copy_buffer_state;
|
|
||||||
barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
command_list->ResourceBarrier(1, barriers);
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
copy_buffer_state = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
}
|
|
||||||
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||||
if (j == 0) {
|
if (j == 0) {
|
||||||
copy_constants.guest_base =
|
copy_constants.guest_base =
|
||||||
|
@ -885,21 +855,17 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
}
|
}
|
||||||
std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants));
|
std::memcpy(cbuffer_mapping, ©_constants, sizeof(copy_constants));
|
||||||
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
|
command_list->SetComputeRootConstantBufferView(0, cbuffer_gpu_address);
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
// Each thread group processes 32x32x1 blocks.
|
// Each thread group processes 32x32x1 blocks.
|
||||||
command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5,
|
command_list->Dispatch((copy_constants.size_blocks[0] + 31) >> 5,
|
||||||
(copy_constants.size_blocks[1] + 31) >> 5,
|
(copy_constants.size_blocks[1] + 31) >> 5,
|
||||||
copy_constants.size_blocks[2]);
|
copy_constants.size_blocks[2]);
|
||||||
}
|
}
|
||||||
barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
command_processor_->PushUAVBarrier(copy_buffer);
|
||||||
barriers[0].UAV.pResource = copy_buffer;
|
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
||||||
barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||||
barriers[1].Transition.pResource = copy_buffer;
|
|
||||||
barriers[1].Transition.Subresource =
|
|
||||||
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
||||||
barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
||||||
barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
||||||
command_list->ResourceBarrier(2, barriers);
|
|
||||||
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
UINT slice_first_subresource = i * resource_desc.MipLevels;
|
UINT slice_first_subresource = i * resource_desc.MipLevels;
|
||||||
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
for (uint32_t j = mip_first; j <= mip_last; ++j) {
|
||||||
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
D3D12_TEXTURE_COPY_LOCATION location_source, location_dest;
|
||||||
|
|
Loading…
Reference in New Issue