diff --git a/Source/Core/VideoBackends/D3D12/D3DBase.cpp b/Source/Core/VideoBackends/D3D12/D3DBase.cpp index c95142e133..637b11b059 100644 --- a/Source/Core/VideoBackends/D3D12/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DBase.cpp @@ -362,7 +362,6 @@ HRESULT Create(HWND wnd) IDXGIFactory* factory; IDXGIAdapter* adapter; - IDXGIOutput* output; hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory); if (FAILED(hr)) MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); @@ -376,25 +375,6 @@ HRESULT Create(HWND wnd) MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); } - // TODO: Make this configurable - hr = adapter->EnumOutputs(0, &output); - if (FAILED(hr)) - { - // try using the first one - IDXGIAdapter* firstadapter; - hr = factory->EnumAdapters(0, &firstadapter); - if (!FAILED(hr)) - hr = firstadapter->EnumOutputs(0, &output); - if (FAILED(hr)) - MessageBox(wnd, - _T("Failed to enumerate outputs!\n") - _T("This usually happens when you've set your video adapter to the Nvidia GPU in an Optimus-equipped system.\n") - _T("Set Dolphin to use the high-performance graphics in Nvidia's drivers instead and leave Dolphin's video adapter set to the Intel GPU."), - _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - - SAFE_RELEASE(firstadapter); - } - // get supported AA modes s_aa_modes = EnumAAModes(adapter); @@ -423,52 +403,29 @@ HRESULT Create(HWND wnd) swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; -#if defined(_DEBUG) || defined(DEBUGFAST) - // Creating debug devices can sometimes fail if the user doesn't have the correct - // version of the DirectX SDK. If it does, simply fallback to a non-debug device. +#if defined(_DEBUG) || defined(DEBUGFAST) || defined(USE_D3D12_DEBUG_LAYER) + // Enabling the debug layer will fail if the Graphics Tools feature is not installed. + if (SUCCEEDED(hr)) { + ID3D12Debug* debug_controller; + hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); if (SUCCEEDED(hr)) { - ID3D12Debug* debug_controller; - hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); - if (SUCCEEDED(hr)) - { - debug_controller->EnableDebugLayer(); - debug_controller->Release(); - } - else - { - MessageBox(wnd, _T("Failed to initialize Direct3D debug layer, please make sure it is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } - - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - - s_feat_level = D3D_FEATURE_LEVEL_11_0; + debug_controller->EnableDebugLayer(); + debug_controller->Release(); + } + else + { + MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics Tools feature is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); } } - if (FAILED(hr)) #endif - { - if (SUCCEEDED(hr)) - { -#ifdef USE_D3D12_DEBUG_LAYER - ID3D12Debug* debug_controller; - hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller)); - if (SUCCEEDED(hr)) - { - debug_controller->EnableDebugLayer(); - debug_controller->Release(); - } - else - { - MessageBox(wnd, _T("Failed to initialize Direct3D debug layer."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); - } -#endif - hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); - s_feat_level = D3D_FEATURE_LEVEL_11_0; - } + if (SUCCEEDED(hr)) + { + hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12)); + s_feat_level = D3D_FEATURE_LEVEL_11_0; } if (SUCCEEDED(hr)) @@ -529,11 +486,7 @@ HRESULT Create(HWND wnd) D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign. D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign. D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, // Benign. - D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, // Benign. Probably. - D3D12_MESSAGE_ID_INVALID_SUBRESOURCE_STATE, D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // Benign. - D3D12_MESSAGE_ID_EXECUTECOMMANDLISTS_GPU_WRITTEN_READBACK_RESOURCE_MAPPED, // Benign. - D3D12_MESSAGE_ID_RESOURCE_BARRIER_BEFORE_AFTER_MISMATCH // Benign. Probably. }; filter.DenyList.NumIDs = ARRAYSIZE(id_list); filter.DenyList.pIDList = id_list; @@ -553,7 +506,6 @@ HRESULT Create(HWND wnd) MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR); SAFE_RELEASE(factory); - SAFE_RELEASE(output); SAFE_RELEASE(adapter) CreateDescriptorHeaps(); @@ -738,7 +690,7 @@ void CreateRootSignatures() void WaitForOutstandingRenderingToComplete() { - command_list_mgr->ClearQueueAndWaitForCompletionOfInflightWork(); + command_list_mgr->ExecuteQueuedWork(true); } void Close() @@ -754,8 +706,6 @@ void Close() D3D::CleanupPersistentD3DTextureResources(); - command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); - SAFE_RELEASE(s_swap_chain); command_list_mgr.reset(); @@ -839,15 +789,15 @@ unsigned int GetMaxTextureSize() void Reset() { - command_list_mgr->ExecuteQueuedWork(true); - // release all back buffer references for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++) { SAFE_RELEASE(s_backbuf[i]); } - D3D::command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction(); + // Block until all commands have finished. + // This will also final-release all pending resources (including the backbuffer above) + command_list_mgr->ExecuteQueuedWork(true); // resize swapchain buffers RECT client; diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp index fc4a98ae84..07c9b65eab 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include #include #include @@ -69,6 +70,9 @@ D3DCommandListManager::D3DCommandListManager( } m_current_deferred_destruction_list = 0; + + std::fill(m_command_allocator_list_fences.begin(), m_command_allocator_list_fences.end(), 0); + std::fill(m_deferred_destruction_list_fences.begin(), m_deferred_destruction_list_fences.end(), 0); } void D3DCommandListManager::SetInitialCommandListState() @@ -109,42 +113,29 @@ void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion) m_queue_fence_value++; #ifdef USE_D3D12_QUEUED_COMMAND_LISTS - CheckHR(m_queued_command_list->Close()); + m_queued_command_list->Close(); m_queued_command_list->QueueExecute(); - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - - ResetCommandListWithIdleCommandAllocator(); - - m_queued_command_list->ProcessQueuedItems(); + m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion, wait_for_gpu_completion); #else CheckHR(m_backing_command_list->Close()); - ID3D12CommandList* const commandListsToExecute[1] = { m_backing_command_list }; - m_command_queue->ExecuteCommandLists(1, commandListsToExecute); + ID3D12CommandList* const execute_list[1] = { m_backing_command_list }; + m_command_queue->ExecuteCommandLists(1, execute_list); - if (wait_for_gpu_completion) - { - CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); - } - - if (m_current_command_allocator == 0) - { - PerformGpuRolloverChecks(); - } - - ResetCommandListWithIdleCommandAllocator(); + CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); #endif + // Notify observers of the fence value for the current work to finish. for (auto it : m_queue_fence_callbacks) it.second(it.first, m_queue_fence_value); - SetInitialCommandListState(); - if (wait_for_gpu_completion) - { - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); - } + WaitForGPUCompletion(); + + // Re-open the command list, using the current allocator. + ResetCommandList(); + SetInitialCommandListState(); } void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags) @@ -152,62 +143,94 @@ void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_cha m_queue_fence_value++; #ifdef USE_D3D12_QUEUED_COMMAND_LISTS - CheckHR(m_queued_command_list->Close()); + m_queued_command_list->Close(); m_queued_command_list->QueueExecute(); - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags); - m_queued_command_list->ProcessQueuedItems(true); - - if (m_current_command_allocator == 0) - { - PerformGpuRolloverChecks(); - } - - m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size(); - - ResetCommandListWithIdleCommandAllocator(); - - SetInitialCommandListState(); -#else - ExecuteQueuedWork(); - m_command_queue->Signal(m_queue_fence, m_queue_fence_value); - CheckHR(swap_chain->Present(sync_interval, flags)); -#endif - - for (auto it : m_queue_fence_callbacks) - it.second(it.first, m_queue_fence_value); -} - -void D3DCommandListManager::WaitForQueuedWorkToBeExecutedOnGPU() -{ - // Wait for GPU to finish all outstanding work. - m_queue_fence_value++; - -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->QueueExecute(); m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - m_queued_command_list->ProcessQueuedItems(true); #else + CheckHR(m_backing_command_list->Close()); + + ID3D12CommandList* const execute_list[1] = { m_backing_command_list }; + m_command_queue->ExecuteCommandLists(1, execute_list); + + CheckHR(swap_chain->Present(sync_interval, flags)); CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); #endif - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); + // Notify observers of the fence value for the current work to finish. + for (auto it : m_queue_fence_callbacks) + it.second(it.first, m_queue_fence_value); + + // Move to the next command allocator, this may mean switching allocator lists. + MoveToNextCommandAllocator(); + ResetCommandList(); + SetInitialCommandListState(); } -void D3DCommandListManager::PerformGpuRolloverChecks() +void D3DCommandListManager::DestroyAllPendingResources() { - // Insert fence to measure GPU progress, ensure we aren't using in-use command allocators. - if (m_queue_frame_fence->GetCompletedValue() < m_queue_frame_fence_value) + for (auto& destruction_list : m_deferred_destruction_lists) { - WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); + for (auto& resource : destruction_list) + resource->Release(); + + destruction_list.clear(); } +} + +void D3DCommandListManager::ResetAllCommandAllocators() +{ + for (auto& allocator_list : m_command_allocator_lists) + { + for (auto& allocator : allocator_list) + allocator->Reset(); + } + + // Move back to the start, using the first allocator of first list. + m_current_command_allocator = 0; + m_current_command_allocator_list = 0; + m_current_deferred_destruction_list = 0; +} + +void D3DCommandListManager::WaitForGPUCompletion() +{ + // Wait for GPU to finish all outstanding work. + // This method assumes that no command lists are open. + m_queue_frame_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); + m_queued_command_list->ProcessQueuedItems(true); +#else + CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); +#endif + + WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value); + + // GPU is up to date with us. Therefore, it has finished with any pending resources. + DestroyAllPendingResources(); + + // Command allocators are also up-to-date, so reset these. + ResetAllCommandAllocators(); +} + +void D3DCommandListManager::PerformGPURolloverChecks() +{ + m_queue_frame_fence_value++; + +#ifdef USE_D3D12_QUEUED_COMMAND_LISTS + m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); +#else + CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); +#endif // We now know that the previous 'set' of command lists has completed on GPU, and it is safe to // release resources / start back at beginning of command allocator list. // Begin Deferred Resource Destruction UINT safe_to_delete_deferred_destruction_list = (m_current_deferred_destruction_list - 1) % m_deferred_destruction_lists.size(); + WaitOnCPUForFence(m_queue_frame_fence, m_deferred_destruction_list_fences[safe_to_delete_deferred_destruction_list]); for (UINT i = 0; i < m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].size(); i++) { @@ -216,30 +239,37 @@ void D3DCommandListManager::PerformGpuRolloverChecks() m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].clear(); + m_deferred_destruction_list_fences[m_current_deferred_destruction_list] = m_queue_frame_fence_value; m_current_deferred_destruction_list = (m_current_deferred_destruction_list + 1) % m_deferred_destruction_lists.size(); // End Deferred Resource Destruction // Begin Command Allocator Resets UINT safe_to_reset_command_allocator_list = (m_current_command_allocator_list - 1) % m_command_allocator_lists.size(); + WaitOnCPUForFence(m_queue_frame_fence, m_command_allocator_list_fences[safe_to_reset_command_allocator_list]); for (UINT i = 0; i < m_command_allocator_lists[safe_to_reset_command_allocator_list].size(); i++) { CheckHR(m_command_allocator_lists[safe_to_reset_command_allocator_list][i]->Reset()); } + m_command_allocator_list_fences[m_current_command_allocator_list] = m_queue_frame_fence_value; m_current_command_allocator_list = (m_current_command_allocator_list + 1) % m_command_allocator_lists.size(); + m_current_command_allocator = 0; // End Command Allocator Resets - - m_queue_frame_fence_value++; -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value); -#else - CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value)); -#endif } -void D3DCommandListManager::ResetCommandListWithIdleCommandAllocator() +void D3DCommandListManager::MoveToNextCommandAllocator() +{ + // Move to the next allocator in the current allocator list. + m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size(); + + // Did we wrap around? Move to the next set of allocators. + if (m_current_command_allocator == 0) + PerformGPURolloverChecks(); +} + +void D3DCommandListManager::ResetCommandList() { #ifdef USE_D3D12_QUEUED_COMMAND_LISTS ID3D12QueuedCommandList* command_list = m_queued_command_list; @@ -257,38 +287,18 @@ void D3DCommandListManager::DestroyResourceAfterCurrentCommandListExecuted(ID3D1 m_deferred_destruction_lists[m_current_deferred_destruction_list].push_back(resource); } -void D3DCommandListManager::ImmediatelyDestroyAllResourcesScheduledForDestruction() -{ - for (auto& destruction_list : m_deferred_destruction_lists) - { - for (auto& resource : destruction_list) - resource->Release(); - - destruction_list.clear(); - } -} - -void D3DCommandListManager::ClearQueueAndWaitForCompletionOfInflightWork() -{ - // Wait for GPU to finish all outstanding work. - m_queue_fence_value++; -#ifdef USE_D3D12_QUEUED_COMMAND_LISTS - m_queued_command_list->ClearQueue(); // Waits for currently-processing work to finish, then clears queue. - m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value); - m_queued_command_list->ProcessQueuedItems(true); -#else - CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value)); -#endif - WaitOnCPUForFence(m_queue_fence, m_queue_fence_value); -} - D3DCommandListManager::~D3DCommandListManager() { - ImmediatelyDestroyAllResourcesScheduledForDestruction(); - #ifdef USE_D3D12_QUEUED_COMMAND_LISTS + // Wait for background thread to exit. m_queued_command_list->Release(); #endif + + // The command list will still be open, close it before destroying. + m_backing_command_list->Close(); + + DestroyAllPendingResources(); + m_backing_command_list->Release(); for (auto& allocator_list : m_command_allocator_lists) @@ -305,8 +315,10 @@ D3DCommandListManager::~D3DCommandListManager() void D3DCommandListManager::WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value) { - CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event)); + if (fence->GetCompletedValue() >= fence_value) + return; + CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event)); WaitForSingleObject(m_wait_on_cpu_fence_event, INFINITE); } diff --git a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h index b9622df5fc..f404a8a0ac 100644 --- a/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h +++ b/Source/Core/VideoBackends/D3D12/D3DCommandListManager.h @@ -38,11 +38,7 @@ public: void ExecuteQueuedWork(bool wait_for_gpu_completion = false); void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); - void WaitForQueuedWorkToBeExecutedOnGPU(); - - void ClearQueueAndWaitForCompletionOfInflightWork(); void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource); - void ImmediatelyDestroyAllResourcesScheduledForDestruction(); void SetCommandListDirtyState(unsigned int command_list_state, bool dirty); bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const; @@ -64,9 +60,13 @@ public: void WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value); private: + void DestroyAllPendingResources(); + void ResetAllCommandAllocators(); + void WaitForGPUCompletion(); - void PerformGpuRolloverChecks(); - void ResetCommandListWithIdleCommandAllocator(); + void PerformGPURolloverChecks(); + void MoveToNextCommandAllocator(); + void ResetCommandList(); unsigned int m_command_list_dirty_state = UINT_MAX; D3D_PRIMITIVE_TOPOLOGY m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; @@ -85,6 +85,7 @@ private: UINT m_current_command_allocator; UINT m_current_command_allocator_list; std::array, 2> m_command_allocator_lists; + std::array m_command_allocator_list_fences; ID3D12GraphicsCommandList* m_backing_command_list; ID3D12QueuedCommandList* m_queued_command_list; @@ -93,6 +94,7 @@ private: UINT m_current_deferred_destruction_list; std::array, 2> m_deferred_destruction_lists; + std::array m_deferred_destruction_list_fences; }; } // namespace \ No newline at end of file diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp index b3f672a8ed..4bfb58735a 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.cpp @@ -14,9 +14,8 @@ constexpr size_t BufferOffsetForQueueItemType() return sizeof(T) + sizeof(D3DQueueItemType) * 2; } -DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) +void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list) { - ID3D12QueuedCommandList* parent_queued_command_list = static_cast(param); ID3D12GraphicsCommandList* command_list = parent_queued_command_list->m_command_list; byte* queue_array = parent_queued_command_list->m_queue_array; @@ -340,6 +339,7 @@ DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) bool eligible_to_move_to_front_of_queue = reinterpret_cast(item)->Stop.eligible_to_move_to_front_of_queue; bool signal_stop_event = reinterpret_cast(item)->Stop.signal_stop_event; + bool terminate_worker_thread = reinterpret_cast(item)->Stop.terminate_worker_thread; item += BufferOffsetForQueueItemType(); @@ -353,6 +353,9 @@ DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param) SetEvent(parent_queued_command_list->m_stop_execution_event); } + if (terminate_worker_thread) + return; + goto exitLoop; } } @@ -374,13 +377,14 @@ ID3D12QueuedCommandList::ID3D12QueuedCommandList(ID3D12GraphicsCommandList* back m_begin_execution_event = CreateSemaphore(nullptr, 0, 256, nullptr); m_stop_execution_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); - m_background_thread = CreateThread(nullptr, 0, BackgroundThreadFunction, this, 0, &m_background_thread_id); + m_background_thread = std::thread(BackgroundThreadFunction, this); } ID3D12QueuedCommandList::~ID3D12QueuedCommandList() { - TerminateThread(m_background_thread, 0); - CloseHandle(m_background_thread); + // Kick worker thread, and tell it to exit. + ProcessQueuedItems(true, true, true); + m_background_thread.join(); CloseHandle(m_begin_execution_event); CloseHandle(m_stop_execution_event); @@ -461,22 +465,14 @@ void ID3D12QueuedCommandList::QueuePresent(IDXGISwapChain* swap_chain, UINT sync CheckForOverflow(); } -void ID3D12QueuedCommandList::ClearQueue() -{ - // Drain semaphore to ensure no new previously queued work executes (though inflight work may continue). - while (WaitForSingleObject(m_begin_execution_event, 0) != WAIT_TIMEOUT) { } - - // Assume that any inflight queued work will complete within 100ms. This is a safe assumption. - Sleep(100); -} - -void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop) +void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop, bool terminate_worker_thread) { D3DQueueItem item = {}; item.Type = D3DQueueItemType::Stop; item.Stop.eligible_to_move_to_front_of_queue = eligible_to_move_to_front_of_queue; item.Stop.signal_stop_event = wait_for_stop; + item.Stop.terminate_worker_thread = terminate_worker_thread; *reinterpret_cast(m_queue_array_back) = item; @@ -500,6 +496,7 @@ void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_ if (wait_for_stop) { WaitForSingleObject(m_stop_execution_event, INFINITE); + ResetEvent(m_stop_execution_event); } } diff --git a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h index 068b66e723..1ca0334dba 100644 --- a/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h +++ b/Source/Core/VideoBackends/D3D12/D3DQueuedCommandList.h @@ -6,6 +6,7 @@ #include #include +#include namespace DX12 { @@ -210,6 +211,7 @@ struct StopArguments { bool eligible_to_move_to_front_of_queue; bool signal_stop_event; + bool terminate_worker_thread; }; struct D3DQueueItem @@ -254,13 +256,12 @@ public: ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list, ID3D12CommandQueue* backing_command_queue); - void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false); + void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false, bool terminate_worker_thread = false); void QueueExecute(); void QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); void QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value); void QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags); - void ClearQueue(); // IUnknown methods @@ -612,15 +613,14 @@ private: void ResetQueueOverflowTracking(); void CheckForOverflow(); - static DWORD WINAPI BackgroundThreadFunction(LPVOID param); + static void BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list); byte m_queue_array[QUEUE_ARRAY_SIZE]; byte* m_queue_array_back = m_queue_array; byte* m_queue_array_back_at_start_of_frame = m_queue_array_back; - DWORD m_background_thread_id; - HANDLE m_background_thread; + std::thread m_background_thread; HANDLE m_begin_execution_event; HANDLE m_stop_execution_event; diff --git a/Source/Core/VideoBackends/D3D12/D3DState.cpp b/Source/Core/VideoBackends/D3D12/D3DState.cpp index d0df14371f..5810a74536 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DState.cpp @@ -461,6 +461,18 @@ HRESULT StateCache::GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D return S_OK; } +void StateCache::OnMSAASettingsChanged() +{ + for (auto& it : m_small_pso_map) + { + SAFE_RELEASE(it.second); + } + m_small_pso_map.clear(); + + // Update sample count for new PSOs being created + gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples; +} + void StateCache::Clear() { for (auto& it : m_pso_map) diff --git a/Source/Core/VideoBackends/D3D12/D3DState.h b/Source/Core/VideoBackends/D3D12/D3DState.h index 85e83592d0..49d4fab157 100644 --- a/Source/Core/VideoBackends/D3D12/D3DState.h +++ b/Source/Core/VideoBackends/D3D12/D3DState.h @@ -95,6 +95,9 @@ public: HRESULT GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso); HRESULT GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid); + // Called when the MSAA count/quality changes. Invalidates all small PSOs. + void OnMSAASettingsChanged(); + // Release all cached states and clear hash tables. void Clear(); @@ -126,7 +129,8 @@ private: lhs.BlendState.RenderTarget[0].DestBlend, lhs.BlendState.RenderTarget[0].SrcBlend, lhs.BlendState.RenderTarget[0].RenderTargetWriteMask, - lhs.RTVFormats[0]) == + lhs.RTVFormats[0], + lhs.SampleDesc.Count) == std::tie(rhs.PS.pShaderBytecode, rhs.VS.pShaderBytecode, rhs.GS.pShaderBytecode, rhs.RasterizerState.CullMode, rhs.DepthStencilState.DepthEnable, @@ -137,7 +141,8 @@ private: rhs.BlendState.RenderTarget[0].DestBlend, rhs.BlendState.RenderTarget[0].SrcBlend, rhs.BlendState.RenderTarget[0].RenderTargetWriteMask, - rhs.RTVFormats[0]); + rhs.RTVFormats[0], + rhs.SampleDesc.Count); } }; diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp index fa91352b99..22c83a3654 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.cpp @@ -39,7 +39,7 @@ D3DStreamBuffer::~D3DStreamBuffer() // Obviously this is non-performant, so the buffer max_size should be large enough to // ensure this never happens. -bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment) +bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute) { CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer is greater than max allowed size of backing buffer."); @@ -75,7 +75,7 @@ bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t align // Slow path. No room at front, or back, due to the GPU still (possibly) accessing parts of the buffer. // Resize if possible, else stall. - bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size); + bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size, allow_execute); return command_list_executed; } @@ -113,14 +113,25 @@ void D3DStreamBuffer::AllocateBuffer(size_t size) CheckHR(m_buffer->Map(0, nullptr, &m_buffer_cpu_address)); m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress(); - m_buffer_size = size; + + // Start at the beginning of the new buffer. + m_buffer_gpu_completion_offset = 0; + m_buffer_current_allocation_offset = 0; + m_buffer_offset = 0; + + // Notify observers. + if (m_buffer_reallocation_notification != nullptr) + *m_buffer_reallocation_notification = true; + + // If we had any fences queued, they are no longer relevant. + ClearFences(); } // Function returns true if current command list executed as a result of current command list // referencing all of buffer's contents, AND we are already at max_size. No alternative but to // flush. See comments above AllocateSpaceInBuffer for more details. -bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) +bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute) { // This function will attempt to increase the size of the buffer, in response // to running out of room. If the buffer is already at its maximum size specified @@ -155,14 +166,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) if (new_size > m_buffer_size) { AllocateBuffer(new_size); - m_buffer_current_allocation_offset = 0; m_buffer_offset = allocation_size; - - if (m_buffer_reallocation_notification != nullptr) - { - *m_buffer_reallocation_notification = true; - } - return false; } @@ -177,6 +181,14 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) return false; } + // If allow_execute is false, the caller cannot handle command list execution (and the associated reset), so re-allocate the same-sized buffer. + if (!allow_execute) + { + AllocateBuffer(new_size); + m_buffer_offset = allocation_size; + return false; + } + // 4) If we get to this point, that means there is no outstanding queued GPU work, and we're still out of room. // This is bad - and performance will suffer due to the CPU/GPU serialization, but the show must go on. @@ -188,6 +200,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size) m_buffer_offset = allocation_size; m_buffer_current_allocation_offset = 0; m_buffer_gpu_completion_offset = 0; + ClearFences(); return true; } @@ -293,13 +306,13 @@ void D3DStreamBuffer::UpdateGPUProgress() m_queued_fences.pop(); // Has fence gone past this point? - if (fence_value > tracking_information.fence_value) + if (fence_value >= tracking_information.fence_value) { m_buffer_gpu_completion_offset = tracking_information.buffer_offset; } else { - // Fences are stored in assending order, so once we hit a fence we haven't yet crossed on GPU, abort search. + // Fences are stored in ascending order, so once we hit a fence we haven't yet crossed on GPU, abort search. break; } } @@ -307,7 +320,24 @@ void D3DStreamBuffer::UpdateGPUProgress() void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value) { - reinterpret_cast(owning_object)->QueueFence(fence_value); + D3DStreamBuffer* owning_stream_buffer = reinterpret_cast(owning_object); + if (owning_stream_buffer->HasBufferOffsetChangedSinceLastFence()) + owning_stream_buffer->QueueFence(fence_value); +} + +void D3DStreamBuffer::ClearFences() +{ + while (!m_queued_fences.empty()) + m_queued_fences.pop(); +} + +bool D3DStreamBuffer::HasBufferOffsetChangedSinceLastFence() const +{ + if (m_queued_fences.empty()) + return true; + + // Don't add a new fence tracking entry when our offset hasn't changed. + return (m_queued_fences.back().buffer_offset != m_buffer_offset); } void D3DStreamBuffer::QueueFence(UINT64 fence_value) diff --git a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h index 897e30a66d..9aeb18468c 100644 --- a/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h +++ b/Source/Core/VideoBackends/D3D12/D3DStreamBuffer.h @@ -17,7 +17,7 @@ public: D3DStreamBuffer(size_t initial_size, size_t max_size, bool* buffer_reallocation_notification); ~D3DStreamBuffer(); - bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment); + bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute = true); void OverrideSizeOfPreviousAllocation(size_t override_allocation_size); void* GetBaseCPUAddress() const; @@ -32,13 +32,16 @@ public: private: void AllocateBuffer(size_t size); - bool AttemptBufferResizeOrElseStall(size_t new_size); + bool AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute); bool AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(size_t allocation_size); bool AttemptToFindExistingFenceToStallOn(size_t allocation_size); void UpdateGPUProgress(); + + void ClearFences(); + bool HasBufferOffsetChangedSinceLastFence() const; void QueueFence(UINT64 fence_value); struct FenceTrackingInformation diff --git a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp index 6ad5da8d59..f913715e75 100644 --- a/Source/Core/VideoBackends/D3D12/D3DTexture.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DTexture.cpp @@ -261,17 +261,6 @@ void D3DTexture2D::TransitionToResourceState(ID3D12GraphicsCommandList* command_ D3DTexture2D::~D3DTexture2D() { DX12::D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_tex12); - - if (m_srv12_cpu.ptr) - { - D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {}; - null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - - null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - - DX12::D3D::device12->CreateShaderResourceView(NULL, &null_srv_desc, m_srv12_cpu); - } } } // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp index 606c4a466f..360cd04df9 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.cpp +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.cpp @@ -16,6 +16,7 @@ #include "VideoBackends/D3D12/D3DTexture.h" #include "VideoBackends/D3D12/D3DUtil.h" +#include "VideoBackends/D3D12/FramebufferManager.h" #include "VideoBackends/D3D12/Render.h" #include "VideoBackends/D3D12/StaticShaderCache.h" @@ -71,7 +72,7 @@ public: // returns vertex offset to the new data size_t AppendData(const void* data, size_t size, size_t vertex_size) { - m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false); memcpy(static_cast(m_stream_buffer->GetCPUAddressOfCurrentAllocation()), data, size); @@ -80,7 +81,7 @@ public: size_t BeginAppendData(void** write_ptr, size_t size, size_t vertex_size) { - m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size); + m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false); *write_ptr = m_stream_buffer->GetCPUAddressOfCurrentAllocation(); @@ -359,24 +360,7 @@ int CD3DFont::Init() const unsigned int text_vb_size = s_max_num_vertices * sizeof(FONT2DVERTEX); - CheckHR( - device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(text_vb_size), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&m_vb12) - ) - ); - - SetDebugObjectName12(m_vb12, "vertex buffer of a CD3DFont object"); - - m_vb12_view.BufferLocation = m_vb12->GetGPUVirtualAddress(); - m_vb12_view.SizeInBytes = text_vb_size; - m_vb12_view.StrideInBytes = sizeof(FONT2DVERTEX); - - CheckHR(m_vb12->Map(0, nullptr, &m_vb12_data)); + m_vertex_buffer = std::make_unique(text_vb_size * 2, text_vb_size * 16, nullptr); D3D12_GRAPHICS_PIPELINE_STATE_DESC text_pso_desc = { default_root_signature, // ID3D12RootSignature *pRootSignature; @@ -409,7 +393,7 @@ int CD3DFont::Init() int CD3DFont::Shutdown() { - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_vb12); + m_vertex_buffer.reset(); D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_texture12); return S_OK; @@ -417,7 +401,7 @@ int CD3DFont::Shutdown() int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor, const std::string& text) { - if (!m_vb12) + if (!m_vertex_buffer) return 0; float scale_x = 1 / static_cast(D3D::GetBackBufferWidth()) * 2.f; @@ -428,10 +412,6 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw float sx = x * scale_x - 1.f; float sy = 1.f - y * scale_y; - // Fill vertex buffer - FONT2DVERTEX* vertices12 = static_cast(m_vb12_data) + m_vb12_offset / sizeof(FONT2DVERTEX); - int num_triangles = 0L; - // set general pipeline state D3D::current_command_list->SetPipelineState(m_pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); @@ -441,13 +421,11 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, m_texture12_gpu); - // If we are close to running off edge of vertex buffer, jump back to beginning. - if (m_vb12_offset + text.length() * 6 * sizeof(FONT2DVERTEX) >= s_max_num_vertices * sizeof(FONT2DVERTEX)) - { - m_vb12_offset = 0; - vertices12 = static_cast(m_vb12_data); - } + // upper bound is nchars * 6, assuming no spaces + m_vertex_buffer->AllocateSpaceInBuffer(static_cast(text.length()) * 6 * sizeof(FONT2DVERTEX), sizeof(FONT2DVERTEX), false); + FONT2DVERTEX* vertices12 = reinterpret_cast(m_vertex_buffer->GetCPUAddressOfCurrentAllocation()); + int num_triangles = 0; float start_x = sx; for (char c : text) { @@ -487,13 +465,14 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw // Render the vertex buffer if (num_triangles > 0) { - D3D::current_command_list->IASetVertexBuffers(0, 1, &m_vb12_view); + u32 written_size = num_triangles * 3 * sizeof(FONT2DVERTEX); + m_vertex_buffer->OverrideSizeOfPreviousAllocation(written_size); - D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, m_vb12_offset / sizeof(FONT2DVERTEX), 0); + D3D12_VERTEX_BUFFER_VIEW vb_view = { m_vertex_buffer->GetGPUAddressOfCurrentAllocation(), written_size, sizeof(FONT2DVERTEX) }; + D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view); + D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, 0, 0); } - m_vb12_offset += 3 * num_triangles * sizeof(FONT2DVERTEX); - return S_OK; } @@ -609,6 +588,28 @@ void SetLinearCopySampler() D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true); } +void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth, float max_depth) +{ + D3D12_VIEWPORT viewport = { + static_cast(top_left_x), + static_cast(top_left_y), + static_cast(width), + static_cast(height), + min_depth, + max_depth + }; + + D3D12_RECT scissor = { + static_cast(top_left_x), + static_cast(top_left_y), + static_cast(top_left_x + width), + static_cast(top_left_y + height) + }; + + D3D::current_command_list->RSSetViewports(1, &viewport); + D3D::current_command_list->RSSetScissorRects(1, &scissor); +}; + void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* rSource, int source_width, @@ -621,8 +622,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, u32 slice, DXGI_FORMAT rt_format, bool inherit_srv_binding, - bool rt_multisampled, - D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override + bool rt_multisampled ) { float sw = 1.0f / static_cast(source_width); @@ -685,9 +685,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, Renderer::GetResetBlendDesc(), // D3D12_BLEND_DESC BlendState; UINT_MAX, // UINT SampleMask; Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState - depth_stencil_desc_override ? - *depth_stencil_desc_override : - Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState + Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType @@ -708,13 +706,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture, D3D::current_command_list->SetPipelineState(pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. - // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid - // dirtying state. - - // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - D3D::current_command_list->DrawInstanced(4, 1, static_cast(stq_offset), 0); g_renderer->RestoreAPIState(); @@ -864,13 +855,6 @@ void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH D3D::current_command_list->SetPipelineState(pso); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true); - // In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled. - // Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid - // dirtying state. - - // 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072 - D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072)); - D3D::current_command_list->DrawInstanced(4, 1, static_cast(clearq_offset), 0); g_renderer->RestoreAPIState(); @@ -889,7 +873,6 @@ void DrawEFBPokeQuads(EFBAccessType type, size_t num_points, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, - D3D12_VIEWPORT* viewport, D3D12_CPU_DESCRIPTOR_HANDLE* render_target, D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled @@ -949,7 +932,6 @@ void DrawEFBPokeQuads(EFBAccessType type, // Corresponding dirty flags set outside loop. D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer); - D3D::current_command_list->RSSetViewports(1, viewport); D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); D3D12_VERTEX_BUFFER_VIEW vb_view = { @@ -989,6 +971,11 @@ void DrawEFBPokeQuads(EFBAccessType type, InitColVertex(&vertex[3], x1, y2, z, col); InitColVertex(&vertex[4], x2, y1, z, col); InitColVertex(&vertex[5], x2, y2, z, col); + + if (type == POKE_COLOR) + FramebufferManager::UpdateEFBColorAccessCopy(point->x, point->y, col); + else if (type == POKE_Z) + FramebufferManager::UpdateEFBDepthAccessCopy(point->x, point->y, z); } // Issue the draw diff --git a/Source/Core/VideoBackends/D3D12/D3DUtil.h b/Source/Core/VideoBackends/D3D12/D3DUtil.h index c40784425c..add8516be1 100644 --- a/Source/Core/VideoBackends/D3D12/D3DUtil.h +++ b/Source/Core/VideoBackends/D3D12/D3DUtil.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include "Common/MathUtil.h" #include "VideoBackends/D3D12/D3DState.h" +#include "VideoBackends/D3D12/D3DStreamBuffer.h" #include "VideoCommon/RenderBase.h" @@ -48,10 +50,7 @@ private: D3D12_CPU_DESCRIPTOR_HANDLE m_texture12_cpu = {}; D3D12_GPU_DESCRIPTOR_HANDLE m_texture12_gpu = {}; - ID3D12Resource* m_vb12 = nullptr; - D3D12_VERTEX_BUFFER_VIEW m_vb12_view = {}; - void* m_vb12_data = nullptr; - unsigned int m_vb12_offset = 0; + std::unique_ptr m_vertex_buffer; D3D12_INPUT_LAYOUT_DESC m_input_layout12 = {}; D3D12_SHADER_BYTECODE m_pshader12 = {}; @@ -75,6 +74,8 @@ void ShutdownUtils(); void SetPointCopySampler(); void SetLinearCopySampler(); +void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH); + void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* source, int source_width, @@ -87,8 +88,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture, u32 slice = 0, DXGI_FORMAT rt_format = DXGI_FORMAT_R8G8B8A8_UNORM, bool inherit_srv_binding = false, - bool rt_multisampled = false, - D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override = nullptr + bool rt_multisampled = false ); void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled); @@ -99,7 +99,6 @@ void DrawEFBPokeQuads(EFBAccessType type, size_t num_points, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, - D3D12_VIEWPORT* viewport, D3D12_CPU_DESCRIPTOR_HANDLE* render_target, D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled); diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp index f0118a20bf..a3c8064318 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.cpp @@ -21,14 +21,8 @@ FramebufferManager::Efb FramebufferManager::m_efb; unsigned int FramebufferManager::m_target_width; unsigned int FramebufferManager::m_target_height; -D3D12_DEPTH_STENCIL_DESC FramebufferManager::m_depth_resolve_depth_stencil_desc; - D3DTexture2D*& FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; } -ID3D12Resource*& FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; } - D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() { return m_efb.depth_tex; } -D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() { return m_efb.depth_read_texture; } -ID3D12Resource*& FramebufferManager::GetEFBDepthStagingBuffer() { return m_efb.depth_staging_buf; } D3DTexture2D*& FramebufferManager::GetEFBColorTempTexture() { return m_efb.color_temp_tex; } @@ -107,11 +101,6 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture"); - // AccessEFB - Sysmem buffer used to retrieve the pixel data from color_tex - texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); - CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_staging_buf))); - CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr); - // EFB depth buffer - primary depth buffer texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12))); @@ -120,24 +109,6 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture"); - // Render buffer for AccessEFB (depth data) - texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 1, 1, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); - optimized_clear_valueRTV.Format = DXGI_FORMAT_R32_FLOAT; - hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)); - CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr); - - m_efb.depth_read_texture = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); - - SAFE_RELEASE(buf12); - D3D::SetDebugObjectName12(m_efb.depth_read_texture->GetTex12(), "EFB depth read texture (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024); - hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_staging_buf)); - CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr); - - D3D::SetDebugObjectName12(m_efb.depth_staging_buf, "EFB depth staging texture (used for Renderer::AccessEFB)"); - if (g_ActiveConfig.iMultisamples > 1) { // Framebuffer resolve textures (color+depth) @@ -148,18 +119,12 @@ FramebufferManager::FramebufferManager() SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(), "EFB color resolve texture shader resource view"); - texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12)); CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); + m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON); SAFE_RELEASE(buf12); D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(), "EFB depth resolve texture shader resource view"); - - m_depth_resolve_depth_stencil_desc = {}; - m_depth_resolve_depth_stencil_desc.StencilEnable = FALSE; - m_depth_resolve_depth_stencil_desc.DepthEnable = TRUE; - m_depth_resolve_depth_stencil_desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - m_depth_resolve_depth_stencil_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; } else { @@ -167,6 +132,8 @@ FramebufferManager::FramebufferManager() m_efb.resolved_depth_tex = nullptr; } + InitializeEFBAccessCopies(); + s_xfbEncoder.Init(); } @@ -174,17 +141,12 @@ FramebufferManager::~FramebufferManager() { s_xfbEncoder.Shutdown(); + DestroyEFBAccessCopies(); + SAFE_RELEASE(m_efb.color_tex); - SAFE_RELEASE(m_efb.color_temp_tex); - - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_staging_buf); - - SAFE_RELEASE(m_efb.resolved_color_tex); SAFE_RELEASE(m_efb.depth_tex); - - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_staging_buf); - - SAFE_RELEASE(m_efb.depth_read_texture); + SAFE_RELEASE(m_efb.color_temp_tex); + SAFE_RELEASE(m_efb.resolved_color_tex); SAFE_RELEASE(m_efb.resolved_depth_tex); } @@ -211,25 +173,20 @@ void FramebufferManager::ResolveDepthTexture() { // ResolveSubresource does not work with depth textures. // Instead, we use a shader that selects the minimum depth from all samples. + D3D::SetViewportAndScissor(0, 0, m_target_width, m_target_height); - const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(m_target_width), static_cast(m_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); - - m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE, &m_efb.resolved_depth_tex->GetDSV12()); FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - D3D::SetLinearCopySampler(); - - // Render a quad covering the entire target, writing SV_Depth. const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, m_target_width, m_target_height); D3D::DrawShadedTexQuad( FramebufferManager::GetEFBDepthTexture(), &source_rect, m_target_width, m_target_height, - StaticShaderCache::GetDepthCopyPixelShader(true), + StaticShaderCache::GetDepthResolveToColorPixelShader(), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), StaticShaderCache::GetCopyGeometryShader(), @@ -246,6 +203,215 @@ void FramebufferManager::ResolveDepthTexture() g_renderer->RestoreAPIState(); } +u32 FramebufferManager::ReadEFBColorAccessCopy(u32 x, u32 y) +{ + if (!m_efb.color_access_readback_map) + MapEFBColorAccessCopy(); + + u32 color; + size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32); + memcpy(&color, &m_efb.color_access_readback_map[buffer_offset], sizeof(color)); + return color; +} + +float FramebufferManager::ReadEFBDepthAccessCopy(u32 x, u32 y) +{ + if (!m_efb.depth_access_readback_map) + MapEFBDepthAccessCopy(); + + float depth; + size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float); + memcpy(&depth, &m_efb.depth_access_readback_map[buffer_offset], sizeof(depth)); + return depth; +} + +void FramebufferManager::UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color) +{ + if (!m_efb.color_access_readback_map) + return; + + size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32); + memcpy(&m_efb.color_access_readback_map[buffer_offset], &color, sizeof(color)); +} + +void FramebufferManager::UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth) +{ + if (!m_efb.depth_access_readback_map) + return; + + size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float); + memcpy(&m_efb.depth_access_readback_map[buffer_offset], &depth, sizeof(depth)); +} + +void FramebufferManager::InitializeEFBAccessCopies() +{ + D3D12_CLEAR_VALUE optimized_color_clear_value = { DXGI_FORMAT_R8G8B8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.0f } }; + D3D12_CLEAR_VALUE optimized_depth_clear_value = { DXGI_FORMAT_R32_FLOAT, { 1.0f } }; + CD3DX12_RESOURCE_DESC texdesc12; + ID3D12Resource* buf12; + HRESULT hr; + + // EFB access - color resize buffer + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_color_clear_value, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB access color resize buffer (hr=%#x)", hr); + m_efb.color_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM); + D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access color resize buffer"); + buf12->Release(); + + // EFB access - color staging/readback buffer + m_efb.color_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.color_access_readback_pitch * EFB_HEIGHT); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_access_readback_buffer)); + D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access color readback buffer"); + + // EFB access - depth resize buffer + texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_depth_clear_value, IID_PPV_ARGS(&buf12)); + CHECK(hr == S_OK, "create EFB access depth resize buffer (hr=%#x)", hr); + m_efb.depth_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT); + D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access depth resize buffer"); + buf12->Release(); + + // EFB access - depth staging/readback buffer + m_efb.depth_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(float), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.depth_access_readback_pitch * EFB_HEIGHT); + hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_access_readback_buffer)); + D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access depth readback buffer"); +} + +void FramebufferManager::MapEFBColorAccessCopy() +{ + D3D::command_list_mgr->CPUAccessNotify(); + + ID3D12Resource* src_resource; + if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1) + { + // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first + m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); + D3D::SetPointCopySampler(); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); + + CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); + D3D::DrawShadedTexQuad(m_efb.color_tex, &src_rect, m_target_width, m_target_height, + StaticShaderCache::GetColorCopyPixelShader(true), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); + + m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.color_access_resize_tex->GetTex12(); + } + else + { + // Can source the EFB buffer + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.color_tex->GetTex12(); + } + + // Copy to staging resource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.color_access_readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.color_access_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); + + // Block until completion + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Restore EFB resource state if it was sourced from here + if (src_resource == m_efb.color_tex->GetTex12()) + m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + // Restore state after resetting command list + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + g_renderer->RestoreAPIState(); + + // Resource copy has finished, so safe to map now + m_efb.color_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.color_access_readback_map)); +} + +void FramebufferManager::MapEFBDepthAccessCopy() +{ + D3D::command_list_mgr->CPUAccessNotify(); + + ID3D12Resource* src_resource; + if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1) + { + // for non-1xIR or multisampled cases, we need to copy to an intermediate texture first + m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); + + D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); + D3D::SetPointCopySampler(); + D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr); + + CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height); + D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height, + (g_ActiveConfig.iMultisamples > 1) ? StaticShaderCache::GetDepthResolveToColorPixelShader() : StaticShaderCache::GetColorCopyPixelShader(false), + StaticShaderCache::GetSimpleVertexShader(), + StaticShaderCache::GetSimpleVertexShaderInputLayout(), + {}, 1.0f, 0, DXGI_FORMAT_R32_FLOAT, false, false); + + m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.depth_access_resize_tex->GetTex12(); + } + else + { + // Can source the EFB buffer + m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); + src_resource = m_efb.depth_tex->GetTex12(); + } + + // Copy to staging resource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0,{ DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.depth_access_readback_pitch } }; + CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.depth_access_readback_buffer, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0); + D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); + + // Block until completion + D3D::command_list_mgr->ExecuteQueuedWork(true); + + // Restore EFB resource state if it was sourced from here + if (src_resource == m_efb.depth_tex->GetTex12()) + m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // Restore state after resetting command list + D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); + g_renderer->RestoreAPIState(); + + // Resource copy has finished, so safe to map now + m_efb.depth_access_readback_buffer->Map(0, nullptr, reinterpret_cast(&m_efb.depth_access_readback_map)); +} + +void FramebufferManager::InvalidateEFBAccessCopies() +{ + if (m_efb.color_access_readback_map) + { + m_efb.color_access_readback_buffer->Unmap(0, nullptr); + m_efb.color_access_readback_map = nullptr; + } + + if (m_efb.depth_access_readback_map) + { + m_efb.depth_access_readback_buffer->Unmap(0, nullptr); + m_efb.depth_access_readback_map = nullptr; + } +} + +void FramebufferManager::DestroyEFBAccessCopies() +{ + InvalidateEFBAccessCopies(); + + SAFE_RELEASE(m_efb.color_access_resize_tex); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_access_readback_buffer); + m_efb.color_access_readback_buffer = nullptr; + + SAFE_RELEASE(m_efb.depth_access_resize_tex); + D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_access_readback_buffer); + m_efb.depth_access_readback_buffer = nullptr; +} + void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) { // DX12's XFB decoder does not use this function. @@ -255,8 +421,7 @@ void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) void XFBSource::CopyEFB(float gamma) { // Copy EFB data to XFB and restore render target again - const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast(texWidth), static_cast(texHeight), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::SetViewportAndScissor(0, 0, texWidth, texHeight); const D3D12_RECT rect = CD3DX12_RECT(0, 0, texWidth, texHeight); diff --git a/Source/Core/VideoBackends/D3D12/FramebufferManager.h b/Source/Core/VideoBackends/D3D12/FramebufferManager.h index 8f0376a9bc..13e9ae205c 100644 --- a/Source/Core/VideoBackends/D3D12/FramebufferManager.h +++ b/Source/Core/VideoBackends/D3D12/FramebufferManager.h @@ -61,11 +61,7 @@ public: ~FramebufferManager(); static D3DTexture2D*& GetEFBColorTexture(); - static ID3D12Resource*& GetEFBColorStagingBuffer(); - static D3DTexture2D*& GetEFBDepthTexture(); - static D3DTexture2D*& GetEFBDepthReadTexture(); - static ID3D12Resource*& GetEFBDepthStagingBuffer(); static D3DTexture2D*& GetResolvedEFBColorTexture(); static D3DTexture2D*& GetResolvedEFBDepthTexture(); @@ -74,6 +70,17 @@ public: static void ResolveDepthTexture(); + // Access EFB from CPU + static u32 ReadEFBColorAccessCopy(u32 x, u32 y); + static float ReadEFBDepthAccessCopy(u32 x, u32 y); + static void UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color); + static void UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth); + static void InitializeEFBAccessCopies(); + static void MapEFBColorAccessCopy(); + static void MapEFBDepthAccessCopy(); + static void InvalidateEFBAccessCopies(); + static void DestroyEFBAccessCopies(); + private: std::unique_ptr CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override; void GetTargetSize(unsigned int* width, unsigned int* height) override; @@ -83,25 +90,29 @@ private: static struct Efb { D3DTexture2D* color_tex; - ID3D12Resource* color_staging_buf; D3DTexture2D* depth_tex; - ID3D12Resource* depth_staging_buf; - - D3DTexture2D* depth_read_texture; D3DTexture2D* color_temp_tex; D3DTexture2D* resolved_color_tex; D3DTexture2D* resolved_depth_tex; + D3DTexture2D* color_access_resize_tex; + ID3D12Resource* color_access_readback_buffer; + u8* color_access_readback_map; + u32 color_access_readback_pitch; + + D3DTexture2D* depth_access_resize_tex; + ID3D12Resource* depth_access_readback_buffer; + u8* depth_access_readback_map; + u32 depth_access_readback_pitch; + int slices; } m_efb; static unsigned int m_target_width; static unsigned int m_target_height; - - static D3D12_DEPTH_STENCIL_DESC m_depth_resolve_depth_stencil_desc; }; } // namespace DX12 diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp index 6af30765ba..5c27244bdd 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.cpp @@ -89,8 +89,6 @@ void PSTextureEncoder::Init() D3D::SetDebugObjectName12(m_out_readback_buffer, "efb encoder output staging buffer"); - CheckHR(m_out_readback_buffer->Map(0, nullptr, &m_out_readback_buffer_data)); - // Create constant buffer for uploading data to shaders. Need to align to 256 bytes. unsigned int encode_params_buffer_size = (sizeof(EFBEncodeParams) + 0xff) & ~0xff; @@ -152,8 +150,7 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p const u32 words_per_row = bytes_per_row / sizeof(u32); - D3D12_VIEWPORT vp = { 0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, words_per_row, num_blocks_y); constexpr EFBRectangle full_src_rect(0, 0, EFB_WIDTH, EFB_HEIGHT); @@ -221,8 +218,10 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p D3D::command_list_mgr->ExecuteQueuedWork(true); // Transfer staging buffer to GameCube/Wii RAM + void* readback_data_map; + CheckHR(m_out_readback_buffer->Map(0, nullptr, &readback_data_map)); - u8* src = static_cast(m_out_readback_buffer_data); + u8* src = static_cast(readback_data_map); u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch); for (unsigned int y = 0; y < num_blocks_y; ++y) { @@ -232,6 +231,8 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p src += dst_location.PlacedFootprint.Footprint.RowPitch; } + m_out_readback_buffer->Unmap(0, nullptr); + // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h index c8f05788e8..1e42a87f6a 100644 --- a/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D12/PSTextureEncoder.h @@ -29,7 +29,6 @@ private: D3D12_CPU_DESCRIPTOR_HANDLE m_out_rtv_cpu = {}; ID3D12Resource* m_out_readback_buffer = nullptr; - void* m_out_readback_buffer_data = nullptr; ID3D12Resource* m_encode_params_buffer = nullptr; void* m_encode_params_buffer_data = nullptr; diff --git a/Source/Core/VideoBackends/D3D12/Render.cpp b/Source/Core/VideoBackends/D3D12/Render.cpp index e3fc160c9d..4881fec717 100644 --- a/Source/Core/VideoBackends/D3D12/Render.cpp +++ b/Source/Core/VideoBackends/D3D12/Render.cpp @@ -51,8 +51,6 @@ static bool s_last_xfb_mode = false; static Television s_television; -static ID3D12Resource* s_access_efb_constant_buffer = nullptr; - enum CLEAR_BLEND_DESC { CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0, @@ -78,7 +76,6 @@ D3D12_DEPTH_STENCIL_DESC g_reset_depth_desc = {}; D3D12_RASTERIZER_DESC g_reset_rast_desc = {}; static ID3D12Resource* s_screenshot_texture = nullptr; -static void* s_screenshot_texture_data = nullptr; // Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK typedef struct _Nv_Stereo_Image_Header @@ -110,25 +107,6 @@ static void SetupDeviceObjects() g_framebuffer_manager = std::make_unique(); - float colmat[20] = { 0.0f }; - colmat[0] = colmat[5] = colmat[10] = 1.0f; - - CheckHR( - D3D::device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(sizeof(colmat)), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&s_access_efb_constant_buffer) - ) - ); - - // Copy inital data to access_efb_cbuf12. - void* access_efb_constant_buffer_data = nullptr; - CheckHR(s_access_efb_constant_buffer->Map(0, nullptr, &access_efb_constant_buffer_data)); - memcpy(access_efb_constant_buffer_data, colmat, sizeof(colmat)); - D3D12_DEPTH_STENCIL_DESC depth_desc; depth_desc.DepthEnable = FALSE; depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; @@ -183,7 +161,6 @@ static void SetupDeviceObjects() g_reset_rast_desc = rast_desc; s_screenshot_texture = nullptr; - s_screenshot_texture_data = nullptr; } // Kill off all device objects @@ -197,9 +174,6 @@ static void TeardownDeviceObjects() s_screenshot_texture = nullptr; } - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_access_efb_constant_buffer); - s_access_efb_constant_buffer = nullptr; - s_television.Shutdown(); gx_state_cache.Clear(); @@ -224,8 +198,6 @@ void CreateScreenshotTexture() IID_PPV_ARGS(&s_screenshot_texture) ) ); - - CheckHR(s_screenshot_texture->Map(0, nullptr, &s_screenshot_texture_data)); } static D3D12_BOX GetScreenshotSourceBox(const TargetRectangle& target_rc) @@ -394,197 +366,65 @@ void Renderer::SetColorMask() // - GX_PokeZMode (TODO) u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) { - // EXISTINGD3D11TODO: This function currently is broken if anti-aliasing is enabled - - // Convert EFB dimensions to the ones of our render target - EFBRectangle efb_pixel_rc; - efb_pixel_rc.left = x; - efb_pixel_rc.top = y; - efb_pixel_rc.right = x + 1; - efb_pixel_rc.bottom = y + 1; - TargetRectangle target_pixel_rc = Renderer::ConvertEFBRectangle(efb_pixel_rc); - - // Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the average color instead - D3D12_RECT rect_to_lock; - if (type == PEEK_COLOR || type == PEEK_Z) + if (type == PEEK_COLOR) { - rect_to_lock.left = (target_pixel_rc.left + target_pixel_rc.right) / 2; - rect_to_lock.top = (target_pixel_rc.top + target_pixel_rc.bottom) / 2; - rect_to_lock.right = rect_to_lock.left + 1; - rect_to_lock.bottom = rect_to_lock.top + 1; - } - else - { - rect_to_lock.left = target_pixel_rc.left; - rect_to_lock.right = target_pixel_rc.right; - rect_to_lock.top = target_pixel_rc.top; - rect_to_lock.bottom = target_pixel_rc.bottom; - } + u32 color = FramebufferManager::ReadEFBColorAccessCopy(x, y); - if (type == PEEK_Z) - { - D3D::command_list_mgr->CPUAccessNotify(); - - // depth buffers can only be completely CopySubresourceRegion'ed, so we're using DrawShadedTexQuad instead - // D3D12TODO: Is above statement true on D3D12? - D3D12_VIEWPORT vp12 = { 0.f, 0.f, 1.f, 1.f, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp12); - - D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_access_efb_constant_buffer->GetGPUVirtualAddress()); - D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); - - FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBDepthReadTexture()->GetRTV12(), FALSE, nullptr); - - D3D::SetPointCopySampler(); - - D3D::DrawShadedTexQuad( - FramebufferManager::GetEFBDepthTexture(), - &rect_to_lock, - Renderer::GetTargetWidth(), - Renderer::GetTargetHeight(), - StaticShaderCache::GetColorCopyPixelShader(true), - StaticShaderCache::GetSimpleVertexShader(), - StaticShaderCache::GetSimpleVertexShaderInputLayout(), - D3D12_SHADER_BYTECODE(), - 1.0f, - 0, - DXGI_FORMAT_R32_FLOAT, - false, - FramebufferManager::GetEFBDepthReadTexture()->GetMultisampled() - ); - - // copy to system memory - D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, 1, 1, 1); - ID3D12Resource* readback_buffer = FramebufferManager::GetEFBDepthStagingBuffer(); - - D3D12_TEXTURE_COPY_LOCATION dst_location = {}; - dst_location.pResource = readback_buffer; - dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_location.PlacedFootprint.Offset = 0; - dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT; - dst_location.PlacedFootprint.Footprint.Width = 1; - dst_location.PlacedFootprint.Footprint.Height = 1; - dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_TEXTURE_COPY_LOCATION src_location = {}; - src_location.pResource = FramebufferManager::GetEFBDepthReadTexture()->GetTex12(); - src_location.SubresourceIndex = 0; - src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); - D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); - - // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. - D3D::command_list_mgr->ExecuteQueuedWork(true); - - FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE ); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - - // read the data from system memory - void* readback_buffer_data = nullptr; - CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); - - // depth buffer is inverted in the d3d backend - float val = 1.0f - reinterpret_cast(readback_buffer_data)[0]; - u32 ret = 0; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(val * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(val * 16777216.0f), 0, 0xFFFFFF); - } - - // EXISTINGD3D11TODO: in RE0 this value is often off by one in Video_DX9 (where this code is derived from), which causes lighting to disappear - return ret; - } - else if (type == PEEK_COLOR) - { - D3D::command_list_mgr->CPUAccessNotify(); - - ID3D12Resource* readback_buffer = FramebufferManager::GetEFBColorStagingBuffer(); - - D3D12_BOX src_box = CD3DX12_BOX(rect_to_lock.left, rect_to_lock.top, 0, rect_to_lock.right, rect_to_lock.bottom, 1); - - D3D12_TEXTURE_COPY_LOCATION dst_location = {}; - dst_location.pResource = readback_buffer; - dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - dst_location.PlacedFootprint.Offset = 0; - dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - dst_location.PlacedFootprint.Footprint.Width = 1; - dst_location.PlacedFootprint.Footprint.Height = 1; - dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - D3D12_TEXTURE_COPY_LOCATION src_location = {}; - src_location.pResource = FramebufferManager::GetResolvedEFBColorTexture()->GetTex12(); - src_location.SubresourceIndex = 0; - src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - - FramebufferManager::GetResolvedEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); - D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); - - // Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU. - D3D::command_list_mgr->ExecuteQueuedWork(true); - - FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); - FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE); - D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12()); - - // Restores proper viewport/scissor settings. - g_renderer->RestoreAPIState(); - - // read the data from system memory - void* readback_buffer_data = nullptr; - CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data)); - - u32 ret = reinterpret_cast(readback_buffer_data)[0]; + // a little-endian value is expected to be returned + color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); // check what to do with the alpha channel (GX_PokeAlphaRead) PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) { - ret = RGBA8ToRGBA6ToRGBA8(ret); + color = RGBA8ToRGBA6ToRGBA8(color); } else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) { - ret = RGBA8ToRGB565ToRGBA8(ret); + color = RGBA8ToRGB565ToRGBA8(color); } if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) { - ret |= 0xFF000000; + color |= 0xFF000000; } if (alpha_read_mode.ReadMode == 2) { - return ret; // GX_READ_NONE + return color; // GX_READ_NONE } else if (alpha_read_mode.ReadMode == 1) { - return (ret | 0xFF000000); // GX_READ_FF + return (color | 0xFF000000); // GX_READ_FF } else /*if(alpha_read_mode.ReadMode == 0)*/ { - return (ret & 0x00FFFFFF); // GX_READ_00 + return (color & 0x00FFFFFF); // GX_READ_00 } } + else // if (type == PEEK_Z) + { + // depth buffer is inverted in the d3d backend + float depth = 1.0f - FramebufferManager::ReadEFBDepthAccessCopy(x, y); + u32 ret = 0; - return 0; + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + } + + return ret; + } } void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) { - D3D12_VIEWPORT vp = { 0.0f, 0.0f, static_cast(GetTargetWidth()), static_cast(GetTargetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; + D3D::SetViewportAndScissor(0, 0, GetTargetWidth(), GetTargetHeight()); if (type == POKE_COLOR) { @@ -595,7 +435,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num num_points, &g_reset_blend_desc, &g_reset_depth_desc, - &vp, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), nullptr, FramebufferManager::GetEFBColorTexture()->GetMultisampled() @@ -609,7 +448,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num num_points, &s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED], &s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED], - &vp, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), &FramebufferManager::GetEFBDepthTexture()->GetDSV12(), FramebufferManager::GetEFBColorTexture()->GetMultisampled() @@ -689,23 +527,15 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha // Update the view port for clearing the picture TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); - D3D12_VIEWPORT vp = { - static_cast(target_rc.left), - static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), - static_cast(target_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); - // Color is passed in bgra mode so we need to convert it to rgba u32 rgba_color = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); + D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); D3D::DrawClearQuad(rgba_color, 1.0f - (z & 0xFFFFFF) / 16777216.0f, blend_desc, depth_stencil_desc, FramebufferManager::GetEFBColorTexture()->GetMultisampled()); // Restores proper viewport/scissor settings. g_renderer->RestoreAPIState(); + + FramebufferManager::InvalidateEFBAccessCopies(); } void Renderer::ReinterpretPixelData(unsigned int convtype) @@ -729,16 +559,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) return; } - D3D12_VIEWPORT vp = { - 0.f, - 0.f, - static_cast(g_renderer->GetTargetWidth()), - static_cast(g_renderer->GetTargetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); FramebufferManager::GetEFBColorTempTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV12(), FALSE, nullptr); @@ -852,7 +673,12 @@ bool Renderer::SaveScreenshot(const std::string& filename, const TargetRectangle D3D::command_list_mgr->ExecuteQueuedWork(true); - saved_png = TextureToPng(static_cast(s_screenshot_texture_data), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); + void* screenshot_texture_map; + CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + + saved_png = TextureToPng(static_cast(screenshot_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false); + + s_screenshot_texture->Unmap(0, nullptr); if (saved_png) { @@ -906,6 +732,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height return; } + // Invalidate EFB access copies. Not strictly necessary, but this avoids having the buffers mapped when calling Present(). + FramebufferManager::InvalidateEFBAccessCopies(); + // Prepare to copy the XFBs to our backbuffer UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height); TargetRectangle target_rc = GetTargetRectangle(); @@ -916,27 +745,13 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height float clear_color[4] = { 0.f, 0.f, 0.f, 1.f }; D3D::current_command_list->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV12(), clear_color, 0, nullptr); - // D3D12: Because scissor-testing is always enabled, change scissor rect to backbuffer in case EFB is smaller - // than swap chain back buffer. - D3D12_RECT back_buffer_rect = { 0L, 0L, GetBackbufferWidth(), GetBackbufferHeight() }; - D3D::current_command_list->RSSetScissorRects(1, &back_buffer_rect); - // activate linear filtering for the buffer copies D3D::SetLinearCopySampler(); if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB) { // EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well. - D3D12_VIEWPORT vp12 = { - static_cast(target_rc.left), - static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), - static_cast(target_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp12); + D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight()); s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height); s_television.Render(); @@ -1063,7 +878,12 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height w = s_record_width; h = s_record_height; } - formatBufferDump(static_cast(s_screenshot_texture_data), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); + + void* screenshot_texture_map; + CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map)); + formatBufferDump(static_cast(screenshot_texture_map), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch); + s_screenshot_texture->Unmap(0, nullptr); + FlipImageData(&frame_data[0], w, h); AVIDump::AddFrame(&frame_data[0], source_width, source_height); } @@ -1084,16 +904,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height } // Reset viewport for drawing text - D3D12_VIEWPORT vp = { - 0.0f, - 0.0f, - static_cast(GetBackbufferWidth()), - static_cast(GetBackbufferHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(0, 0, GetBackbufferWidth(), GetBackbufferHeight()); Renderer::DrawDebugText(); @@ -1135,9 +946,16 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0)) { s_last_xfb_mode = g_ActiveConfig.bUseRealXFB; - s_last_multisamples = g_ActiveConfig.iMultisamples; - StaticShaderCache::InvalidateMSAAShaders(); + // Block on any changes until the GPU catches up, so we can free resources safely. + D3D::command_list_mgr->ExecuteQueuedWork(true); + + if (s_last_multisamples != g_ActiveConfig.iMultisamples) + { + s_last_multisamples = g_ActiveConfig.iMultisamples; + StaticShaderCache::InvalidateMSAAShaders(); + gx_state_cache.OnMSAASettingsChanged(); + } if (window_resized) { @@ -1272,6 +1090,9 @@ void Renderer::ApplyState(bool use_dst_alpha) D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, false); } + + // Always called prior to drawing, so we can invalidate the CPU EFB copies here. + FramebufferManager::InvalidateEFBAccessCopies(); } void Renderer::RestoreState() @@ -1509,30 +1330,12 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D TargetRectangle left_rc, right_rc; ConvertStereoRectangle(dst, left_rc, right_rc); - D3D12_VIEWPORT left_vp = { - static_cast(left_rc.left), - static_cast(left_rc.top), - static_cast(left_rc.GetWidth()), - static_cast(left_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D12_VIEWPORT right_vp = { - static_cast(right_rc.left), - static_cast(right_rc.top), - static_cast(right_rc.GetWidth()), - static_cast(right_rc.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - // Swap chain backbuffer is never multisampled.. - D3D::current_command_list->RSSetViewports(1, &left_vp); + D3D::SetViewportAndScissor(left_rc.left, left_rc.top, left_rc.GetWidth(), left_rc.GetHeight()); D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); - D3D::current_command_list->RSSetViewports(1, &right_vp); + D3D::SetViewportAndScissor(right_rc.left, right_rc.top, right_rc.GetWidth(), right_rc.GetHeight()); D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 1, DXGI_FORMAT_R8G8B8A8_UNORM, false, false); } else if (g_ActiveConfig.iStereoMode == STEREO_3DVISION) @@ -1574,8 +1377,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D } else { - D3D12_VIEWPORT vp = { static_cast(dst.left), static_cast(dst.top), static_cast(dst.GetWidth()), static_cast(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(dst.left, dst.top, dst.GetWidth(), dst.GetHeight()); D3D::DrawShadedTexQuad( src_texture, diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp index adbe83346b..e2bd65aac9 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.cpp @@ -15,7 +15,7 @@ namespace DX12 static ID3DBlob* s_color_matrix_program_blob[2] = {}; static ID3DBlob* s_color_copy_program_blob[2] = {}; static ID3DBlob* s_depth_matrix_program_blob[2] = {}; -static ID3DBlob* s_depth_copy_program_blob[2] = {}; +static ID3DBlob* s_depth_resolve_to_color_program_blob = {}; static ID3DBlob* s_clear_program_blob = {}; static ID3DBlob* s_anaglyph_program_blob = {}; static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {}; @@ -73,17 +73,6 @@ static constexpr const char s_color_copy_program_hlsl[] = { "}\n" }; -static constexpr const char s_depth_copy_program_hlsl[] = { - "sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float odepth : SV_Depth,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "odepth = Tex0.Sample(samp0,uv0);\n" - "}\n" -}; - // Anaglyph Red-Cyan shader based on Dubois algorithm // Constants taken from the paper: // "Conversion of a Stereo Pair to Anaglyph with @@ -126,19 +115,19 @@ static constexpr const char s_color_copy_program_msaa_hlsl[] = { "}\n" }; -static constexpr const char s_depth_copy_program_msaa_hlsl[] = { +static constexpr const char s_depth_resolve_to_color_program_hlsl[] = { "#define SAMPLES %d\n" "Texture2DMSArray Tex0 : register(t0);\n" "void main(\n" - " out float depth : SV_Depth,\n" + " out float ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" " in float3 uv0 : TEXCOORD0)\n" "{\n" " int width, height, slices, samples;\n" " Tex0.GetDimensions(width, height, slices, samples);\n" - " depth = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" + " ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" " for(int i = 1; i < SAMPLES; ++i)\n" - " depth = min(depth, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" + " ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" "}\n" }; @@ -497,25 +486,21 @@ D3D12_SHADER_BYTECODE StaticShaderCache::GetColorCopyPixelShader(bool multisampl return bytecode; } -D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthCopyPixelShader(bool multisampled) +D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthResolveToColorPixelShader() { D3D12_SHADER_BYTECODE bytecode = {}; - if (!multisampled || g_ActiveConfig.iMultisamples == 1) + if (s_depth_resolve_to_color_program_blob) { - bytecode = { s_depth_copy_program_blob[0]->GetBufferPointer(), s_depth_copy_program_blob[0]->GetBufferSize() }; - } - else if (s_depth_copy_program_blob[1]) - { - bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() }; } else { // create MSAA shader for current AA mode - std::string buf = StringFromFormat(s_depth_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples); + std::string buf = StringFromFormat(s_depth_resolve_to_color_program_hlsl, g_ActiveConfig.iMultisamples); - D3D::CompilePixelShader(buf, &s_depth_copy_program_blob[1]); - bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() }; + D3D::CompilePixelShader(buf, &s_depth_resolve_to_color_program_blob); + bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() }; } return bytecode; @@ -646,7 +631,6 @@ void StaticShaderCache::Init() D3D::CompilePixelShader(s_clear_program_hlsl, &s_clear_program_blob); D3D::CompilePixelShader(s_anaglyph_program_hlsl, &s_anaglyph_program_blob); D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]); - D3D::CompilePixelShader(s_depth_copy_program_hlsl, &s_depth_copy_program_blob[0]); D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]); D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]); @@ -667,6 +651,7 @@ void StaticShaderCache::InvalidateMSAAShaders() SAFE_RELEASE(s_depth_matrix_program_blob[1]); SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[1]); SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[1]); + SAFE_RELEASE(s_depth_resolve_to_color_program_blob); } void StaticShaderCache::Shutdown() @@ -675,6 +660,7 @@ void StaticShaderCache::Shutdown() SAFE_RELEASE(s_clear_program_blob); SAFE_RELEASE(s_anaglyph_program_blob); + SAFE_RELEASE(s_depth_resolve_to_color_program_blob); for (unsigned int i = 0; i < 2; ++i) { diff --git a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h index bbdb37cb9e..4b9f6959a8 100644 --- a/Source/Core/VideoBackends/D3D12/StaticShaderCache.h +++ b/Source/Core/VideoBackends/D3D12/StaticShaderCache.h @@ -18,7 +18,7 @@ public: static D3D12_SHADER_BYTECODE GetColorMatrixPixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetColorCopyPixelShader(bool multisampled); static D3D12_SHADER_BYTECODE GetDepthMatrixPixelShader(bool multisampled); - static D3D12_SHADER_BYTECODE GetDepthCopyPixelShader(bool multisampled); + static D3D12_SHADER_BYTECODE GetDepthResolveToColorPixelShader(); static D3D12_SHADER_BYTECODE GetClearPixelShader(); static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader(); static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled); diff --git a/Source/Core/VideoBackends/D3D12/TextureCache.cpp b/Source/Core/VideoBackends/D3D12/TextureCache.cpp index d88fce86c7..309e62c6fc 100644 --- a/Source/Core/VideoBackends/D3D12/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D12/TextureCache.cpp @@ -25,10 +25,10 @@ namespace DX12 static std::unique_ptr s_encoder = nullptr; static std::unique_ptr s_efb_copy_stream_buffer = nullptr; +static u32 s_efb_copy_last_cbuf_id = UINT_MAX; static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr; -static void* s_texture_cache_entry_readback_buffer_data = nullptr; -static UINT s_texture_cache_entry_readback_buffer_size = 0; +static size_t s_texture_cache_entry_readback_buffer_size = 0; TextureCache::TCacheEntry::~TCacheEntry() { @@ -42,47 +42,27 @@ void TextureCache::TCacheEntry::Bind(unsigned int stage) bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level) { - // EXISTINGD3D11TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs) - static bool warn_once = true; - if (level && warn_once) + u32 level_width = std::max(config.width >> level, 1u); + u32 level_height = std::max(config.height >> level, 1u); + size_t level_pitch = D3D::AlignValue(level_width * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + size_t required_readback_buffer_size = level_pitch * level_height; + + // Check if the current readback buffer is large enough + if (required_readback_buffer_size > s_texture_cache_entry_readback_buffer_size) { - WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D12 backend!"); - warn_once = false; - return false; - } + // Reallocate the buffer with the new size. Safe to immediately release because we're the only user and we block until completion. + if (s_texture_cache_entry_readback_buffer) + s_texture_cache_entry_readback_buffer->Release(); - D3D12_RESOURCE_DESC texture_desc = m_texture->GetTex12()->GetDesc(); - - const unsigned int required_readback_buffer_size = D3D::AlignValue(static_cast(texture_desc.Width) * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - - if (s_texture_cache_entry_readback_buffer_size < required_readback_buffer_size) - { s_texture_cache_entry_readback_buffer_size = required_readback_buffer_size; - - // We know the readback buffer won't be in use right now, since we wait on this thread - // for the GPU to finish execution right after copying to it. - - SAFE_RELEASE(s_texture_cache_entry_readback_buffer); + CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer))); } - if (!s_texture_cache_entry_readback_buffer_size) - { - CheckHR( - D3D::device12->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer) - ) - ); - - CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &s_texture_cache_entry_readback_buffer_data)); - } - - bool saved_png = false; - m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE); D3D12_TEXTURE_COPY_LOCATION dst_location = {}; @@ -90,26 +70,31 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dst_location.PlacedFootprint.Offset = 0; dst_location.PlacedFootprint.Footprint.Depth = 1; - dst_location.PlacedFootprint.Footprint.Format = texture_desc.Format; - dst_location.PlacedFootprint.Footprint.Width = static_cast(texture_desc.Width); - dst_location.PlacedFootprint.Footprint.Height = texture_desc.Height; - dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + dst_location.PlacedFootprint.Footprint.Width = level_width; + dst_location.PlacedFootprint.Footprint.Height = level_height; + dst_location.PlacedFootprint.Footprint.RowPitch = static_cast(level_pitch); - D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0); + D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), level); D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr); D3D::command_list_mgr->ExecuteQueuedWork(true); - saved_png = TextureToPng( - static_cast(s_texture_cache_entry_readback_buffer_data), + // Map readback buffer and save to file. + void* readback_texture_map; + CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &readback_texture_map)); + + bool saved = TextureToPng( + static_cast(readback_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, dst_location.PlacedFootprint.Footprint.Width, dst_location.PlacedFootprint.Footprint.Height ); - return saved_png; + s_texture_cache_entry_readback_buffer->Unmap(0, nullptr); + return saved; } void TextureCache::TCacheEntry::CopyRectangleFromTexture( @@ -164,15 +149,7 @@ void TextureCache::TCacheEntry::CopyRectangleFromTexture( return; } - const D3D12_VIEWPORT vp = { - float(dst_rect.left), - float(dst_rect.top), - float(dst_rect.GetWidth()), - float(dst_rect.GetHeight()), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - D3D::current_command_list->RSSetViewports(1, &vp); + D3D::SetViewportAndScissor(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight()); m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET); D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr); @@ -272,8 +249,6 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& srcRect, bool scale_by_half, unsigned int cbuf_id, const float* colmat) { - static unsigned int old_cbuf_id = UINT_MAX; - // When copying at half size, in multisampled mode, resolve the color/depth buffer first. // This is because multisampled texture reads go through Load, not Sample, and the linear // filter is ignored. @@ -289,28 +264,19 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat FramebufferManager::GetResolvedEFBColorTexture(); } - // stretch picture with increased internal resolution - const D3D12_VIEWPORT vp = { - 0.f, - 0.f, - static_cast(config.width), - static_cast(config.height), - D3D12_MIN_DEPTH, - D3D12_MAX_DEPTH - }; - - D3D::current_command_list->RSSetViewports(1, &vp); - // set transformation - if (cbuf_id != old_cbuf_id) + if (s_efb_copy_last_cbuf_id != cbuf_id) { s_efb_copy_stream_buffer->AllocateSpaceInBuffer(28 * sizeof(float), 256); memcpy(s_efb_copy_stream_buffer->GetCPUAddressOfCurrentAllocation(), colmat, 28 * sizeof(float)); - old_cbuf_id = cbuf_id; + s_efb_copy_last_cbuf_id = cbuf_id; } + // stretch picture with increased internal resolution + D3D::SetViewportAndScissor(0, 0, config.width, config.height); + D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_stream_buffer->GetGPUAddressOfCurrentAllocation()); D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true); @@ -441,14 +407,13 @@ void main( void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format) { - // stretch picture with increased internal resolution - const D3D12_VIEWPORT vp = { 0.f, 0.f, static_cast(unconverted->config.width), static_cast(unconverted->config.height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH }; - D3D::current_command_list->RSSetViewports(1, &vp); - const unsigned int palette_buffer_allocation_size = 512; m_palette_stream_buffer->AllocateSpaceInBuffer(palette_buffer_allocation_size, 256); memcpy(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation(), palette, palette_buffer_allocation_size); + // stretch picture with increased internal resolution + D3D::SetViewportAndScissor(0, 0, unconverted->config.width, unconverted->config.height); + // D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture occupies the first SRV slot, // we need to allocate temporary space out of our descriptor heap, place the palette SRV in the second slot, then copy the // existing texture's descriptor into the first slot. @@ -554,9 +519,9 @@ TextureCache::TextureCache() s_encoder->Init(); s_efb_copy_stream_buffer = std::make_unique(1024 * 1024, 1024 * 1024, nullptr); + s_efb_copy_last_cbuf_id = UINT_MAX; s_texture_cache_entry_readback_buffer = nullptr; - s_texture_cache_entry_readback_buffer_data = nullptr; s_texture_cache_entry_readback_buffer_size = 0; m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader12(std::string("IA8")); @@ -606,8 +571,10 @@ TextureCache::~TextureCache() if (s_texture_cache_entry_readback_buffer) { - D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_texture_cache_entry_readback_buffer); + // Safe to destroy the readback buffer immediately, as the only time it's used is blocked until completion. + s_texture_cache_entry_readback_buffer->Release(); s_texture_cache_entry_readback_buffer = nullptr; + s_texture_cache_entry_readback_buffer_size = 0; } D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_palette_uniform_buffer); @@ -635,7 +602,7 @@ void TextureCache::BindTextures() D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle; DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(&s_group_base_texture_cpu_handle, 8, &s_group_base_texture_gpu_handle, nullptr, true); - for (unsigned int stage = 0; stage <= last_texture; stage++) + for (unsigned int stage = 0; stage < 8; stage++) { if (bound_textures[stage] != nullptr) {