[D3D12] Disable placed resources for RTV/DSV because of Nvidia
This commit is contained in:
parent
6a8a080703
commit
ae48b3ffa8
|
@ -46,7 +46,9 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_ps.h"
|
||||
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_vs.h"
|
||||
|
||||
#if 0
|
||||
constexpr uint32_t RenderTargetCache::kHeap4MBPages;
|
||||
#endif
|
||||
constexpr uint32_t RenderTargetCache::kRenderTargetDescriptorHeapSize;
|
||||
|
||||
const RenderTargetCache::EDRAMLoadStoreModeInfo
|
||||
|
@ -325,12 +327,14 @@ void RenderTargetCache::ClearCache() {
|
|||
delete heap;
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (uint32_t i = 0; i < xe::countof(heaps_); ++i) {
|
||||
if (heaps_[i] != nullptr) {
|
||||
heaps_[i]->Release();
|
||||
heaps_[i] = nullptr;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void RenderTargetCache::BeginFrame() {
|
||||
|
@ -613,7 +617,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
|
||||
// Need to change the bindings.
|
||||
if (full_update || render_targets_to_attach) {
|
||||
#if 0
|
||||
uint32_t heap_usage[5] = {};
|
||||
#endif
|
||||
if (full_update) {
|
||||
// Export the currently bound render targets before we ruin the bindings.
|
||||
StoreRenderTargetsToEDRAM();
|
||||
|
@ -631,6 +637,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
#if 0
|
||||
// If updating partially, only need to attach new render targets.
|
||||
for (uint32_t i = 0; i < 5; ++i) {
|
||||
const RenderTargetBinding& binding = current_bindings_[i];
|
||||
|
@ -642,9 +649,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
// There are no holes between 4 MB pages in each heap.
|
||||
heap_usage[render_target->heap_page_first / kHeap4MBPages] +=
|
||||
render_target->heap_page_count;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
|
||||
full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
|
||||
|
@ -676,6 +683,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
continue;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Calculate the number of 4 MB pages of the heaps this RT will use.
|
||||
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||
|
@ -710,6 +718,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
|
|||
// Inform Direct3D that we're reusing the heap for this render target.
|
||||
command_processor_->PushAliasingBarrier(nullptr,
|
||||
binding.render_target->resource);
|
||||
#else
|
||||
// If multiple render targets have the same format, assign different
|
||||
// instance numbers to them.
|
||||
uint32_t instance = 0;
|
||||
if (i != 4) {
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
const RenderTargetBinding& other_binding = current_bindings_[j];
|
||||
if (other_binding.is_bound &&
|
||||
other_binding.render_target != nullptr &&
|
||||
other_binding.format == formats[i]) {
|
||||
++instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
binding.render_target = FindOrCreateRenderTarget(key, instance);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sample positions when loading depth must match sample positions when
|
||||
|
@ -1236,9 +1260,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
uint32_t copy_width = copy_rect.right - copy_rect.left;
|
||||
uint32_t copy_height = copy_rect.bottom - copy_rect.top;
|
||||
// Resolve target for output merger format conversion.
|
||||
#if 0
|
||||
ResolveTarget* resolve_target =
|
||||
FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format,
|
||||
render_target->heap_page_count);
|
||||
#else
|
||||
ResolveTarget* resolve_target =
|
||||
FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format);
|
||||
#endif
|
||||
if (resolve_target == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1299,7 +1328,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
|
||||
// Copy the EDRAM buffer contents to the source texture.
|
||||
|
||||
#if 0
|
||||
command_processor_->PushAliasingBarrier(nullptr, render_target->resource);
|
||||
#endif
|
||||
command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
|
@ -1321,7 +1352,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// Do the resolve. Render targets unbound already, safe to call
|
||||
// OMSetRenderTargets.
|
||||
|
||||
#if 0
|
||||
command_processor_->PushAliasingBarrier(nullptr, resolve_target->resource);
|
||||
#endif
|
||||
command_processor_->PushTransitionBarrier(
|
||||
render_target->resource, render_target->state,
|
||||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
|
@ -1628,9 +1661,17 @@ ID3D12PipelineState* RenderTargetCache::GetResolvePipeline(
|
|||
}
|
||||
|
||||
RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
||||
#if 0
|
||||
uint32_t width, uint32_t height, DXGI_FORMAT format,
|
||||
uint32_t min_heap_page_first) {
|
||||
#else
|
||||
uint32_t width, uint32_t height, DXGI_FORMAT format
|
||||
#endif
|
||||
) {
|
||||
#if 0
|
||||
assert_true(min_heap_page_first < kHeap4MBPages * 5);
|
||||
#endif
|
||||
|
||||
if (width == 0 || height == 0 || width > 8192 || height > 8192) {
|
||||
assert_always();
|
||||
return nullptr;
|
||||
|
@ -1641,6 +1682,7 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
key.format = format;
|
||||
|
||||
// Try to find an existing target that isn't overlapping the resolve source.
|
||||
#if 0
|
||||
auto found_range = resolve_targets_.equal_range(key.value);
|
||||
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
|
||||
ResolveTarget* found_resolve_target = iter->second;
|
||||
|
@ -1648,6 +1690,12 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
return found_resolve_target;
|
||||
}
|
||||
}
|
||||
#else
|
||||
auto found_iter = resolve_targets_.find(key.value);
|
||||
if (found_iter != resolve_targets_.end()) {
|
||||
return found_iter->second;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure the new resolve target can get an RTV descriptor.
|
||||
if (!EnsureRTVHeapAvailable(false)) {
|
||||
|
@ -1669,6 +1717,8 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
resource_desc.SampleDesc.Quality = 0;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
|
||||
#if 0
|
||||
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||
uint32_t heap_page_count =
|
||||
|
@ -1687,17 +1737,18 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
min_heap_page_first = xe::round_up(min_heap_page_first, kHeap4MBPages);
|
||||
assert_true(min_heap_page_first < kHeap4MBPages * 5);
|
||||
}
|
||||
|
||||
// Create the memory heap if it doesn't exist yet.
|
||||
uint32_t heap_index = min_heap_page_first / kHeap4MBPages;
|
||||
if (!MakeHeapResident(heap_index)) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create it.
|
||||
// The first action likely to be done is resolve.
|
||||
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
||||
ID3D12Resource* resource;
|
||||
#if 0
|
||||
if (FAILED(device->CreatePlacedResource(
|
||||
heaps_[heap_index], (min_heap_page_first % kHeap4MBPages) << 22,
|
||||
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||
|
@ -1708,6 +1759,17 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
min_heap_page_first, min_heap_page_first + heap_page_count - 1);
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (FAILED(device->CreateCommittedResource(
|
||||
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||
XELOGE(
|
||||
"Failed to create a committed resource for %ux%u resolve target with "
|
||||
"DXGI format %u",
|
||||
uint32_t(resource_desc.Width), resource_desc.Height, format);
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create the RTV.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle =
|
||||
|
@ -1727,7 +1789,9 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
|
|||
resolve_target->state = state;
|
||||
resolve_target->rtv_handle.ptr = rtv_handle.ptr;
|
||||
resolve_target->key.value = key.value;
|
||||
#if 0
|
||||
resolve_target->heap_page_first = min_heap_page_first;
|
||||
#endif
|
||||
UINT64 copy_buffer_size;
|
||||
device->GetCopyableFootprints(&resource_desc, 0, 1, 0,
|
||||
&resolve_target->footprint, nullptr, nullptr,
|
||||
|
@ -1835,6 +1899,7 @@ void RenderTargetCache::ClearBindings() {
|
|||
std::memset(current_bindings_, 0, sizeof(current_bindings_));
|
||||
}
|
||||
|
||||
#if 0
|
||||
bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) {
|
||||
if (heap_index >= 5) {
|
||||
assert_always();
|
||||
|
@ -1859,6 +1924,7 @@ bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool RenderTargetCache::EnsureRTVHeapAvailable(bool is_depth) {
|
||||
auto& heap = is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_;
|
||||
|
@ -1918,16 +1984,29 @@ bool RenderTargetCache::GetResourceDesc(RenderTargetKey key,
|
|||
}
|
||||
|
||||
RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||
RenderTargetKey key, uint32_t heap_page_first) {
|
||||
#if 0
|
||||
RenderTargetKey key, uint32_t heap_page_first
|
||||
#else
|
||||
RenderTargetKey key, uint32_t instance
|
||||
#endif
|
||||
) {
|
||||
#if 0
|
||||
assert_true(heap_page_first < kHeap4MBPages * 5);
|
||||
#endif
|
||||
|
||||
// Try to find an existing render target.
|
||||
auto found_range = render_targets_.equal_range(key.value);
|
||||
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
|
||||
RenderTarget* found_render_target = iter->second;
|
||||
#if 0
|
||||
if (found_render_target->heap_page_first == heap_page_first) {
|
||||
return found_render_target;
|
||||
}
|
||||
#else
|
||||
if (found_render_target->instance == instance) {
|
||||
return found_render_target;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
D3D12_RESOURCE_DESC resource_desc;
|
||||
|
@ -1938,6 +2017,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
|
||||
#if 0
|
||||
// Get the number of heap pages needed for the render target.
|
||||
D3D12_RESOURCE_ALLOCATION_INFO allocation_info =
|
||||
device->GetResourceAllocationInfo(0, 1, &resource_desc);
|
||||
|
@ -1948,21 +2028,25 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
assert_always();
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure we can create a new descriptor in the render target heap.
|
||||
if (!EnsureRTVHeapAvailable(key.is_depth)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Create the memory heap if it doesn't exist yet.
|
||||
uint32_t heap_index = heap_page_first / kHeap4MBPages;
|
||||
if (!MakeHeapResident(heap_index)) {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// The first action likely to be done is EDRAM buffer load.
|
||||
D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST;
|
||||
ID3D12Resource* resource;
|
||||
#if 0
|
||||
if (FAILED(device->CreatePlacedResource(
|
||||
heaps_[heap_index], (heap_page_first % kHeap4MBPages) << 22,
|
||||
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||
|
@ -1974,6 +2058,18 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
heap_page_first + heap_page_count - 1);
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
if (FAILED(device->CreateCommittedResource(
|
||||
&ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE,
|
||||
&resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) {
|
||||
XELOGE(
|
||||
"Failed to create a committed resource for %ux%u %s render target with "
|
||||
"format %u",
|
||||
uint32_t(resource_desc.Width), resource_desc.Height,
|
||||
key.is_depth ? "depth" : "color", key.format);
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create the descriptor for the render target.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle;
|
||||
|
@ -2007,19 +2103,29 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
|||
render_target->state = state;
|
||||
render_target->handle = descriptor_handle;
|
||||
render_target->key = key;
|
||||
#if 0
|
||||
render_target->heap_page_first = heap_page_first;
|
||||
render_target->heap_page_count = heap_page_count;
|
||||
#else
|
||||
render_target->instance = instance;
|
||||
#endif
|
||||
UINT64 copy_buffer_size;
|
||||
device->GetCopyableFootprints(&resource_desc, 0, key.is_depth ? 2 : 1, 0,
|
||||
render_target->footprints, nullptr, nullptr,
|
||||
©_buffer_size);
|
||||
render_target->copy_buffer_size = uint32_t(copy_buffer_size);
|
||||
render_targets_.insert(std::make_pair(key.value, render_target));
|
||||
#if 0
|
||||
XELOGGPU(
|
||||
"Created %ux%u %s render target with format %u at heap 4 MB pages %u:%u",
|
||||
uint32_t(resource_desc.Width), resource_desc.Height,
|
||||
key.is_depth ? "depth" : "color", key.format, heap_page_first,
|
||||
heap_page_first + heap_page_count - 1);
|
||||
#else
|
||||
XELOGGPU("Created %ux%u %s render target with format %u",
|
||||
uint32_t(resource_desc.Width), resource_desc.Height,
|
||||
key.is_depth ? "depth" : "color", key.format);
|
||||
#endif
|
||||
return render_target;
|
||||
}
|
||||
|
||||
|
|
|
@ -214,7 +214,7 @@ class D3D12CommandProcessor;
|
|||
// not always present - D3DPT_RECTLIST is used very commonly, especially for
|
||||
// clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and
|
||||
// copying, and it's usually drawn without a viewport and with 8192x8192
|
||||
// scissor), there may be cases of simulatenously bound render targets
|
||||
// scissor), there may be cases of simultaneously bound render targets
|
||||
// overlapping each other in the EDRAM in a way that is difficult to resolve,
|
||||
// and stores/loads may destroy data.
|
||||
class RenderTargetCache {
|
||||
|
@ -337,10 +337,16 @@ class RenderTargetCache {
|
|||
D3D12_RESOURCE_STATES state;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle;
|
||||
RenderTargetKey key;
|
||||
#if 0
|
||||
// The first 4 MB page in the heaps.
|
||||
uint32_t heap_page_first;
|
||||
// The number of 4 MB pages this render target uses.
|
||||
uint32_t heap_page_count;
|
||||
#else
|
||||
// Index of the render target when multiple render targets with the same key
|
||||
// are bound simultaneously.
|
||||
uint32_t instance;
|
||||
#endif
|
||||
// Color/depth and stencil layouts.
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprints[2];
|
||||
// Buffer size needed to copy the render target to the EDRAM buffer.
|
||||
|
@ -383,8 +389,10 @@ class RenderTargetCache {
|
|||
D3D12_RESOURCE_STATES state;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
|
||||
ResolveTargetKey key;
|
||||
#if 0
|
||||
// The first 4 MB page in the heaps.
|
||||
uint32_t heap_page_first;
|
||||
#endif
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint;
|
||||
// Buffer size needed to copy the resolve target to a linear buffer.
|
||||
uint32_t copy_buffer_size;
|
||||
|
@ -396,9 +404,11 @@ class RenderTargetCache {
|
|||
|
||||
void ClearBindings();
|
||||
|
||||
#if 0
|
||||
// Checks if the heap for the render target exists and tries to create it if
|
||||
// it's not.
|
||||
bool MakeHeapResident(uint32_t heap_index);
|
||||
#endif
|
||||
|
||||
// Creates a new RTV/DSV descriptor heap if needed to be able to allocate one
|
||||
// descriptor in it.
|
||||
|
@ -407,8 +417,13 @@ class RenderTargetCache {
|
|||
// Returns true if a render target with such key can be created.
|
||||
static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc);
|
||||
|
||||
#if 0
|
||||
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
|
||||
uint32_t heap_page_first);
|
||||
#else
|
||||
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
|
||||
uint32_t instance);
|
||||
#endif
|
||||
|
||||
// Calculates the tile layout for a rectangle on a render target of the given
|
||||
// configuration. The base is adjusted so it points to the tile containing the
|
||||
|
@ -453,9 +468,14 @@ class RenderTargetCache {
|
|||
// Returns any available resolve target placed at least at
|
||||
// min_heap_first_page, or tries to place it at the specified position (if not
|
||||
// possible, will place it in the next heap).
|
||||
#if 0
|
||||
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
|
||||
DXGI_FORMAT format,
|
||||
uint32_t min_heap_first_page);
|
||||
#else
|
||||
ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height,
|
||||
DXGI_FORMAT format);
|
||||
#endif
|
||||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
|
@ -529,6 +549,9 @@ class RenderTargetCache {
|
|||
ID3D12PipelineState* edram_clear_64bpp_pipeline_ = nullptr;
|
||||
ID3D12PipelineState* edram_clear_depth_float_pipeline_ = nullptr;
|
||||
|
||||
// FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on
|
||||
// Nvidia Maxwell 1st generation and older.
|
||||
#if 0
|
||||
// 48 MB heaps backing used render targets resources, created when needed.
|
||||
// 24 MB proved to be not enough to store a single render target occupying the
|
||||
// entire EDRAM - a 32-bit depth/stencil one - at some resolution.
|
||||
|
@ -536,6 +559,7 @@ class RenderTargetCache {
|
|||
// into a k_32_32_32_32_FLOAT texture.
|
||||
ID3D12Heap* heaps_[5] = {};
|
||||
static constexpr uint32_t kHeap4MBPages = 12;
|
||||
#endif
|
||||
|
||||
static constexpr uint32_t kRenderTargetDescriptorHeapSize = 2048;
|
||||
// Descriptor heap, for linear allocation of heaps and descriptors.
|
||||
|
@ -581,7 +605,11 @@ class RenderTargetCache {
|
|||
uint32_t resolve_info;
|
||||
};
|
||||
std::vector<ResolvePipeline> resolve_pipelines_;
|
||||
#if 0
|
||||
std::unordered_multimap<uint32_t, ResolveTarget*> resolve_targets_;
|
||||
#else
|
||||
std::unordered_map<uint32_t, ResolveTarget*> resolve_targets_;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
Loading…
Reference in New Issue