diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index a36d7137d..438f6d101 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -46,7 +46,9 @@ namespace d3d12 { #include "xenia/gpu/d3d12/shaders/dxbc/resolve_ps.h" #include "xenia/gpu/d3d12/shaders/dxbc/resolve_vs.h" +#if 0 constexpr uint32_t RenderTargetCache::kHeap4MBPages; +#endif constexpr uint32_t RenderTargetCache::kRenderTargetDescriptorHeapSize; const RenderTargetCache::EDRAMLoadStoreModeInfo @@ -325,12 +327,14 @@ void RenderTargetCache::ClearCache() { delete heap; } +#if 0 for (uint32_t i = 0; i < xe::countof(heaps_); ++i) { if (heaps_[i] != nullptr) { heaps_[i]->Release(); heaps_[i] = nullptr; } } +#endif } void RenderTargetCache::BeginFrame() { @@ -613,7 +617,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // Need to change the bindings. if (full_update || render_targets_to_attach) { +#if 0 uint32_t heap_usage[5] = {}; +#endif if (full_update) { // Export the currently bound render targets before we ruin the bindings. StoreRenderTargetsToEDRAM(); @@ -631,6 +637,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { } } } else { +#if 0 // If updating partially, only need to attach new render targets. for (uint32_t i = 0; i < 5; ++i) { const RenderTargetBinding& binding = current_bindings_[i]; @@ -642,9 +649,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // There are no holes between 4 MB pages in each heap. heap_usage[render_target->heap_page_first / kHeap4MBPages] += render_target->heap_page_count; - continue; } } +#endif } XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u", full_update ? "Full" : "Partial", surface_pitch, msaa_samples, @@ -676,6 +683,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { continue; } +#if 0 // Calculate the number of 4 MB pages of the heaps this RT will use. D3D12_RESOURCE_ALLOCATION_INFO allocation_info = device->GetResourceAllocationInfo(0, 1, &resource_desc); @@ -710,6 +718,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // Inform Direct3D that we're reusing the heap for this render target. command_processor_->PushAliasingBarrier(nullptr, binding.render_target->resource); +#else + // If multiple render targets have the same format, assign different + // instance numbers to them. + uint32_t instance = 0; + if (i != 4) { + for (uint32_t j = 0; j < i; ++j) { + const RenderTargetBinding& other_binding = current_bindings_[j]; + if (other_binding.is_bound && + other_binding.render_target != nullptr && + other_binding.format == formats[i]) { + ++instance; + } + } + } + binding.render_target = FindOrCreateRenderTarget(key, instance); +#endif } // Sample positions when loading depth must match sample positions when @@ -1236,9 +1260,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, uint32_t copy_width = copy_rect.right - copy_rect.left; uint32_t copy_height = copy_rect.bottom - copy_rect.top; // Resolve target for output merger format conversion. +#if 0 ResolveTarget* resolve_target = FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format, render_target->heap_page_count); +#else + ResolveTarget* resolve_target = + FindOrCreateResolveTarget(copy_width, copy_height, dest_dxgi_format); +#endif if (resolve_target == nullptr) { return false; } @@ -1299,7 +1328,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // Copy the EDRAM buffer contents to the source texture. +#if 0 command_processor_->PushAliasingBarrier(nullptr, render_target->resource); +#endif command_processor_->PushTransitionBarrier(copy_buffer, copy_buffer_state, D3D12_RESOURCE_STATE_COPY_SOURCE); copy_buffer_state = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1321,7 +1352,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // Do the resolve. Render targets unbound already, safe to call // OMSetRenderTargets. +#if 0 command_processor_->PushAliasingBarrier(nullptr, resolve_target->resource); +#endif command_processor_->PushTransitionBarrier( render_target->resource, render_target->state, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); @@ -1628,9 +1661,17 @@ ID3D12PipelineState* RenderTargetCache::GetResolvePipeline( } RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( +#if 0 uint32_t width, uint32_t height, DXGI_FORMAT format, uint32_t min_heap_page_first) { +#else + uint32_t width, uint32_t height, DXGI_FORMAT format +#endif +) { +#if 0 assert_true(min_heap_page_first < kHeap4MBPages * 5); +#endif + if (width == 0 || height == 0 || width > 8192 || height > 8192) { assert_always(); return nullptr; @@ -1641,6 +1682,7 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( key.format = format; // Try to find an existing target that isn't overlapping the resolve source. +#if 0 auto found_range = resolve_targets_.equal_range(key.value); for (auto iter = found_range.first; iter != found_range.second; ++iter) { ResolveTarget* found_resolve_target = iter->second; @@ -1648,6 +1690,12 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( return found_resolve_target; } } +#else + auto found_iter = resolve_targets_.find(key.value); + if (found_iter != resolve_targets_.end()) { + return found_iter->second; + } +#endif // Ensure the new resolve target can get an RTV descriptor. if (!EnsureRTVHeapAvailable(false)) { @@ -1669,6 +1717,8 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( resource_desc.SampleDesc.Quality = 0; resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + +#if 0 D3D12_RESOURCE_ALLOCATION_INFO allocation_info = device->GetResourceAllocationInfo(0, 1, &resource_desc); uint32_t heap_page_count = @@ -1687,17 +1737,18 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( min_heap_page_first = xe::round_up(min_heap_page_first, kHeap4MBPages); assert_true(min_heap_page_first < kHeap4MBPages * 5); } - // Create the memory heap if it doesn't exist yet. uint32_t heap_index = min_heap_page_first / kHeap4MBPages; if (!MakeHeapResident(heap_index)) { return nullptr; } +#endif // Create it. // The first action likely to be done is resolve. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_RENDER_TARGET; ID3D12Resource* resource; +#if 0 if (FAILED(device->CreatePlacedResource( heaps_[heap_index], (min_heap_page_first % kHeap4MBPages) << 22, &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { @@ -1708,6 +1759,17 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( min_heap_page_first, min_heap_page_first + heap_page_count - 1); return nullptr; } +#else + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { + XELOGE( + "Failed to create a committed resource for %ux%u resolve target with " + "DXGI format %u", + uint32_t(resource_desc.Width), resource_desc.Height, format); + return nullptr; + } +#endif // Create the RTV. D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = @@ -1727,7 +1789,9 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget( resolve_target->state = state; resolve_target->rtv_handle.ptr = rtv_handle.ptr; resolve_target->key.value = key.value; +#if 0 resolve_target->heap_page_first = min_heap_page_first; +#endif UINT64 copy_buffer_size; device->GetCopyableFootprints(&resource_desc, 0, 1, 0, &resolve_target->footprint, nullptr, nullptr, @@ -1835,6 +1899,7 @@ void RenderTargetCache::ClearBindings() { std::memset(current_bindings_, 0, sizeof(current_bindings_)); } +#if 0 bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) { if (heap_index >= 5) { assert_always(); @@ -1859,6 +1924,7 @@ bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) { } return true; } +#endif bool RenderTargetCache::EnsureRTVHeapAvailable(bool is_depth) { auto& heap = is_depth ? descriptor_heaps_depth_ : descriptor_heaps_color_; @@ -1918,16 +1984,29 @@ bool RenderTargetCache::GetResourceDesc(RenderTargetKey key, } RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( - RenderTargetKey key, uint32_t heap_page_first) { +#if 0 + RenderTargetKey key, uint32_t heap_page_first +#else + RenderTargetKey key, uint32_t instance +#endif +) { +#if 0 assert_true(heap_page_first < kHeap4MBPages * 5); +#endif // Try to find an existing render target. auto found_range = render_targets_.equal_range(key.value); for (auto iter = found_range.first; iter != found_range.second; ++iter) { RenderTarget* found_render_target = iter->second; +#if 0 if (found_render_target->heap_page_first == heap_page_first) { return found_render_target; } +#else + if (found_render_target->instance == instance) { + return found_render_target; + } +#endif } D3D12_RESOURCE_DESC resource_desc; @@ -1938,6 +2017,7 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto device = provider->GetDevice(); +#if 0 // Get the number of heap pages needed for the render target. D3D12_RESOURCE_ALLOCATION_INFO allocation_info = device->GetResourceAllocationInfo(0, 1, &resource_desc); @@ -1948,21 +2028,25 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( assert_always(); return nullptr; } +#endif // Ensure we can create a new descriptor in the render target heap. if (!EnsureRTVHeapAvailable(key.is_depth)) { return nullptr; } +#if 0 // Create the memory heap if it doesn't exist yet. uint32_t heap_index = heap_page_first / kHeap4MBPages; if (!MakeHeapResident(heap_index)) { return nullptr; } +#endif // The first action likely to be done is EDRAM buffer load. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST; ID3D12Resource* resource; +#if 0 if (FAILED(device->CreatePlacedResource( heaps_[heap_index], (heap_page_first % kHeap4MBPages) << 22, &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { @@ -1974,6 +2058,18 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( heap_page_first + heap_page_count - 1); return nullptr; } +#else + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { + XELOGE( + "Failed to create a committed resource for %ux%u %s render target with " + "format %u", + uint32_t(resource_desc.Width), resource_desc.Height, + key.is_depth ? "depth" : "color", key.format); + return nullptr; + } +#endif // Create the descriptor for the render target. D3D12_CPU_DESCRIPTOR_HANDLE descriptor_handle; @@ -2007,19 +2103,29 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( render_target->state = state; render_target->handle = descriptor_handle; render_target->key = key; +#if 0 render_target->heap_page_first = heap_page_first; render_target->heap_page_count = heap_page_count; +#else + render_target->instance = instance; +#endif UINT64 copy_buffer_size; device->GetCopyableFootprints(&resource_desc, 0, key.is_depth ? 2 : 1, 0, render_target->footprints, nullptr, nullptr, ©_buffer_size); render_target->copy_buffer_size = uint32_t(copy_buffer_size); render_targets_.insert(std::make_pair(key.value, render_target)); +#if 0 XELOGGPU( "Created %ux%u %s render target with format %u at heap 4 MB pages %u:%u", uint32_t(resource_desc.Width), resource_desc.Height, key.is_depth ? "depth" : "color", key.format, heap_page_first, heap_page_first + heap_page_count - 1); +#else + XELOGGPU("Created %ux%u %s render target with format %u", + uint32_t(resource_desc.Width), resource_desc.Height, + key.is_depth ? "depth" : "color", key.format); +#endif return render_target; } diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index b500daec0..dae6951e7 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -214,7 +214,7 @@ class D3D12CommandProcessor; // not always present - D3DPT_RECTLIST is used very commonly, especially for // clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and // copying, and it's usually drawn without a viewport and with 8192x8192 -// scissor), there may be cases of simulatenously bound render targets +// scissor), there may be cases of simultaneously bound render targets // overlapping each other in the EDRAM in a way that is difficult to resolve, // and stores/loads may destroy data. class RenderTargetCache { @@ -337,10 +337,16 @@ class RenderTargetCache { D3D12_RESOURCE_STATES state; D3D12_CPU_DESCRIPTOR_HANDLE handle; RenderTargetKey key; +#if 0 // The first 4 MB page in the heaps. uint32_t heap_page_first; // The number of 4 MB pages this render target uses. uint32_t heap_page_count; +#else + // Index of the render target when multiple render targets with the same key + // are bound simultaneously. + uint32_t instance; +#endif // Color/depth and stencil layouts. D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprints[2]; // Buffer size needed to copy the render target to the EDRAM buffer. @@ -383,8 +389,10 @@ class RenderTargetCache { D3D12_RESOURCE_STATES state; D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle; ResolveTargetKey key; +#if 0 // The first 4 MB page in the heaps. uint32_t heap_page_first; +#endif D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint; // Buffer size needed to copy the resolve target to a linear buffer. uint32_t copy_buffer_size; @@ -396,9 +404,11 @@ class RenderTargetCache { void ClearBindings(); +#if 0 // Checks if the heap for the render target exists and tries to create it if // it's not. bool MakeHeapResident(uint32_t heap_index); +#endif // Creates a new RTV/DSV descriptor heap if needed to be able to allocate one // descriptor in it. @@ -407,8 +417,13 @@ class RenderTargetCache { // Returns true if a render target with such key can be created. static bool GetResourceDesc(RenderTargetKey key, D3D12_RESOURCE_DESC& desc); +#if 0 RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key, uint32_t heap_page_first); +#else + RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key, + uint32_t instance); +#endif // Calculates the tile layout for a rectangle on a render target of the given // configuration. The base is adjusted so it points to the tile containing the @@ -453,9 +468,14 @@ class RenderTargetCache { // Returns any available resolve target placed at least at // min_heap_first_page, or tries to place it at the specified position (if not // possible, will place it in the next heap). +#if 0 ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height, DXGI_FORMAT format, uint32_t min_heap_first_page); +#else + ResolveTarget* FindOrCreateResolveTarget(uint32_t width, uint32_t height, + DXGI_FORMAT format); +#endif D3D12CommandProcessor* command_processor_; RegisterFile* register_file_; @@ -529,6 +549,9 @@ class RenderTargetCache { ID3D12PipelineState* edram_clear_64bpp_pipeline_ = nullptr; ID3D12PipelineState* edram_clear_depth_float_pipeline_ = nullptr; + // FIXME(Triang3l): Investigate what's wrong with placed RTV/DSV aliasing on + // Nvidia Maxwell 1st generation and older. +#if 0 // 48 MB heaps backing used render targets resources, created when needed. // 24 MB proved to be not enough to store a single render target occupying the // entire EDRAM - a 32-bit depth/stencil one - at some resolution. @@ -536,6 +559,7 @@ class RenderTargetCache { // into a k_32_32_32_32_FLOAT texture. ID3D12Heap* heaps_[5] = {}; static constexpr uint32_t kHeap4MBPages = 12; +#endif static constexpr uint32_t kRenderTargetDescriptorHeapSize = 2048; // Descriptor heap, for linear allocation of heaps and descriptors. @@ -581,7 +605,11 @@ class RenderTargetCache { uint32_t resolve_info; }; std::vector resolve_pipelines_; +#if 0 std::unordered_multimap resolve_targets_; +#else + std::unordered_map resolve_targets_; +#endif }; } // namespace d3d12