Merge pull request #3647 from stenzek/d3d12-fences

D3D12: Multiple fixes (texture cache, fences, MSAA, CPU EFB access)
This commit is contained in:
Pierre Bourdon 2016-02-29 16:49:03 +01:00
commit 07ff8379ba
20 changed files with 642 additions and 725 deletions

View File

@ -362,7 +362,6 @@ HRESULT Create(HWND wnd)
IDXGIFactory* factory;
IDXGIAdapter* adapter;
IDXGIOutput* output;
hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory);
if (FAILED(hr))
MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
@ -376,25 +375,6 @@ HRESULT Create(HWND wnd)
MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
}
// TODO: Make this configurable
hr = adapter->EnumOutputs(0, &output);
if (FAILED(hr))
{
// try using the first one
IDXGIAdapter* firstadapter;
hr = factory->EnumAdapters(0, &firstadapter);
if (!FAILED(hr))
hr = firstadapter->EnumOutputs(0, &output);
if (FAILED(hr))
MessageBox(wnd,
_T("Failed to enumerate outputs!\n")
_T("This usually happens when you've set your video adapter to the Nvidia GPU in an Optimus-equipped system.\n")
_T("Set Dolphin to use the high-performance graphics in Nvidia's drivers instead and leave Dolphin's video adapter set to the Intel GPU."),
_T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
SAFE_RELEASE(firstadapter);
}
// get supported AA modes
s_aa_modes = EnumAAModes(adapter);
@ -423,10 +403,8 @@ HRESULT Create(HWND wnd)
swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
#if defined(_DEBUG) || defined(DEBUGFAST)
// Creating debug devices can sometimes fail if the user doesn't have the correct
// version of the DirectX SDK. If it does, simply fallback to a non-debug device.
{
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(USE_D3D12_DEBUG_LAYER)
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
if (SUCCEEDED(hr))
{
ID3D12Debug* debug_controller;
@ -438,38 +416,17 @@ HRESULT Create(HWND wnd)
}
else
{
MessageBox(wnd, _T("Failed to initialize Direct3D debug layer, please make sure it is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
}
hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12));
s_feat_level = D3D_FEATURE_LEVEL_11_0;
MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics Tools feature is installed."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
}
}
if (FAILED(hr))
#endif
{
if (SUCCEEDED(hr))
{
#ifdef USE_D3D12_DEBUG_LAYER
ID3D12Debug* debug_controller;
hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller));
if (SUCCEEDED(hr))
{
debug_controller->EnableDebugLayer();
debug_controller->Release();
}
else
{
MessageBox(wnd, _T("Failed to initialize Direct3D debug layer."), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
}
#endif
hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12));
s_feat_level = D3D_FEATURE_LEVEL_11_0;
}
}
if (SUCCEEDED(hr))
{
@ -529,11 +486,7 @@ HRESULT Create(HWND wnd)
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign.
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign.
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, // Benign.
D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, // Benign. Probably.
D3D12_MESSAGE_ID_INVALID_SUBRESOURCE_STATE,
D3D12_MESSAGE_ID_MAP_INVALID_NULLRANGE, // Benign.
D3D12_MESSAGE_ID_EXECUTECOMMANDLISTS_GPU_WRITTEN_READBACK_RESOURCE_MAPPED, // Benign.
D3D12_MESSAGE_ID_RESOURCE_BARRIER_BEFORE_AFTER_MISMATCH // Benign. Probably.
};
filter.DenyList.NumIDs = ARRAYSIZE(id_list);
filter.DenyList.pIDList = id_list;
@ -553,7 +506,6 @@ HRESULT Create(HWND wnd)
MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
SAFE_RELEASE(factory);
SAFE_RELEASE(output);
SAFE_RELEASE(adapter)
CreateDescriptorHeaps();
@ -738,7 +690,7 @@ void CreateRootSignatures()
void WaitForOutstandingRenderingToComplete()
{
command_list_mgr->ClearQueueAndWaitForCompletionOfInflightWork();
command_list_mgr->ExecuteQueuedWork(true);
}
void Close()
@ -754,8 +706,6 @@ void Close()
D3D::CleanupPersistentD3DTextureResources();
command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction();
SAFE_RELEASE(s_swap_chain);
command_list_mgr.reset();
@ -839,15 +789,15 @@ unsigned int GetMaxTextureSize()
void Reset()
{
command_list_mgr->ExecuteQueuedWork(true);
// release all back buffer references
for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++)
{
SAFE_RELEASE(s_backbuf[i]);
}
D3D::command_list_mgr->ImmediatelyDestroyAllResourcesScheduledForDestruction();
// Block until all commands have finished.
// This will also final-release all pending resources (including the backbuffer above)
command_list_mgr->ExecuteQueuedWork(true);
// resize swapchain buffers
RECT client;

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <queue>
#include <vector>
@ -69,6 +70,9 @@ D3DCommandListManager::D3DCommandListManager(
}
m_current_deferred_destruction_list = 0;
std::fill(m_command_allocator_list_fences.begin(), m_command_allocator_list_fences.end(), 0);
std::fill(m_deferred_destruction_list_fences.begin(), m_deferred_destruction_list_fences.end(), 0);
}
void D3DCommandListManager::SetInitialCommandListState()
@ -109,42 +113,29 @@ void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion)
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
CheckHR(m_queued_command_list->Close());
m_queued_command_list->Close();
m_queued_command_list->QueueExecute();
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
ResetCommandListWithIdleCommandAllocator();
m_queued_command_list->ProcessQueuedItems();
m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion, wait_for_gpu_completion);
#else
CheckHR(m_backing_command_list->Close());
ID3D12CommandList* const commandListsToExecute[1] = { m_backing_command_list };
m_command_queue->ExecuteCommandLists(1, commandListsToExecute);
ID3D12CommandList* const execute_list[1] = { m_backing_command_list };
m_command_queue->ExecuteCommandLists(1, execute_list);
if (wait_for_gpu_completion)
{
CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value));
}
if (m_current_command_allocator == 0)
{
PerformGpuRolloverChecks();
}
ResetCommandListWithIdleCommandAllocator();
#endif
// Notify observers of the fence value for the current work to finish.
for (auto it : m_queue_fence_callbacks)
it.second(it.first, m_queue_fence_value);
SetInitialCommandListState();
if (wait_for_gpu_completion)
{
WaitOnCPUForFence(m_queue_fence, m_queue_fence_value);
}
WaitForGPUCompletion();
// Re-open the command list, using the current allocator.
ResetCommandList();
SetInitialCommandListState();
}
void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags)
@ -152,62 +143,94 @@ void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_cha
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
CheckHR(m_queued_command_list->Close());
m_queued_command_list->Close();
m_queued_command_list->QueueExecute();
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags);
m_queued_command_list->ProcessQueuedItems(true);
if (m_current_command_allocator == 0)
{
PerformGpuRolloverChecks();
}
m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size();
ResetCommandListWithIdleCommandAllocator();
SetInitialCommandListState();
#else
ExecuteQueuedWork();
m_command_queue->Signal(m_queue_fence, m_queue_fence_value);
CheckHR(swap_chain->Present(sync_interval, flags));
#endif
for (auto it : m_queue_fence_callbacks)
it.second(it.first, m_queue_fence_value);
}
void D3DCommandListManager::WaitForQueuedWorkToBeExecutedOnGPU()
{
// Wait for GPU to finish all outstanding work.
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueExecute();
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
m_queued_command_list->ProcessQueuedItems(true);
#else
CheckHR(m_backing_command_list->Close());
ID3D12CommandList* const execute_list[1] = { m_backing_command_list };
m_command_queue->ExecuteCommandLists(1, execute_list);
CheckHR(swap_chain->Present(sync_interval, flags));
CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value));
#endif
WaitOnCPUForFence(m_queue_fence, m_queue_fence_value);
// Notify observers of the fence value for the current work to finish.
for (auto it : m_queue_fence_callbacks)
it.second(it.first, m_queue_fence_value);
// Move to the next command allocator, this may mean switching allocator lists.
MoveToNextCommandAllocator();
ResetCommandList();
SetInitialCommandListState();
}
void D3DCommandListManager::PerformGpuRolloverChecks()
void D3DCommandListManager::DestroyAllPendingResources()
{
// Insert fence to measure GPU progress, ensure we aren't using in-use command allocators.
if (m_queue_frame_fence->GetCompletedValue() < m_queue_frame_fence_value)
for (auto& destruction_list : m_deferred_destruction_lists)
{
WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value);
for (auto& resource : destruction_list)
resource->Release();
destruction_list.clear();
}
}
void D3DCommandListManager::ResetAllCommandAllocators()
{
for (auto& allocator_list : m_command_allocator_lists)
{
for (auto& allocator : allocator_list)
allocator->Reset();
}
// Move back to the start, using the first allocator of first list.
m_current_command_allocator = 0;
m_current_command_allocator_list = 0;
m_current_deferred_destruction_list = 0;
}
void D3DCommandListManager::WaitForGPUCompletion()
{
// Wait for GPU to finish all outstanding work.
// This method assumes that no command lists are open.
m_queue_frame_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value);
m_queued_command_list->ProcessQueuedItems(true);
#else
CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value));
#endif
WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value);
// GPU is up to date with us. Therefore, it has finished with any pending resources.
DestroyAllPendingResources();
// Command allocators are also up-to-date, so reset these.
ResetAllCommandAllocators();
}
void D3DCommandListManager::PerformGPURolloverChecks()
{
m_queue_frame_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value);
#else
CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value));
#endif
// We now know that the previous 'set' of command lists has completed on GPU, and it is safe to
// release resources / start back at beginning of command allocator list.
// Begin Deferred Resource Destruction
UINT safe_to_delete_deferred_destruction_list = (m_current_deferred_destruction_list - 1) % m_deferred_destruction_lists.size();
WaitOnCPUForFence(m_queue_frame_fence, m_deferred_destruction_list_fences[safe_to_delete_deferred_destruction_list]);
for (UINT i = 0; i < m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].size(); i++)
{
@ -216,30 +239,37 @@ void D3DCommandListManager::PerformGpuRolloverChecks()
m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].clear();
m_deferred_destruction_list_fences[m_current_deferred_destruction_list] = m_queue_frame_fence_value;
m_current_deferred_destruction_list = (m_current_deferred_destruction_list + 1) % m_deferred_destruction_lists.size();
// End Deferred Resource Destruction
// Begin Command Allocator Resets
UINT safe_to_reset_command_allocator_list = (m_current_command_allocator_list - 1) % m_command_allocator_lists.size();
WaitOnCPUForFence(m_queue_frame_fence, m_command_allocator_list_fences[safe_to_reset_command_allocator_list]);
for (UINT i = 0; i < m_command_allocator_lists[safe_to_reset_command_allocator_list].size(); i++)
{
CheckHR(m_command_allocator_lists[safe_to_reset_command_allocator_list][i]->Reset());
}
m_command_allocator_list_fences[m_current_command_allocator_list] = m_queue_frame_fence_value;
m_current_command_allocator_list = (m_current_command_allocator_list + 1) % m_command_allocator_lists.size();
m_current_command_allocator = 0;
// End Command Allocator Resets
m_queue_frame_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value);
#else
CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value));
#endif
}
void D3DCommandListManager::ResetCommandListWithIdleCommandAllocator()
void D3DCommandListManager::MoveToNextCommandAllocator()
{
// Move to the next allocator in the current allocator list.
m_current_command_allocator = (m_current_command_allocator + 1) % m_command_allocator_lists[m_current_command_allocator_list].size();
// Did we wrap around? Move to the next set of allocators.
if (m_current_command_allocator == 0)
PerformGPURolloverChecks();
}
void D3DCommandListManager::ResetCommandList()
{
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
ID3D12QueuedCommandList* command_list = m_queued_command_list;
@ -257,38 +287,18 @@ void D3DCommandListManager::DestroyResourceAfterCurrentCommandListExecuted(ID3D1
m_deferred_destruction_lists[m_current_deferred_destruction_list].push_back(resource);
}
void D3DCommandListManager::ImmediatelyDestroyAllResourcesScheduledForDestruction()
{
for (auto& destruction_list : m_deferred_destruction_lists)
{
for (auto& resource : destruction_list)
resource->Release();
destruction_list.clear();
}
}
void D3DCommandListManager::ClearQueueAndWaitForCompletionOfInflightWork()
{
// Wait for GPU to finish all outstanding work.
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->ClearQueue(); // Waits for currently-processing work to finish, then clears queue.
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
m_queued_command_list->ProcessQueuedItems(true);
#else
CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value));
#endif
WaitOnCPUForFence(m_queue_fence, m_queue_fence_value);
}
D3DCommandListManager::~D3DCommandListManager()
{
ImmediatelyDestroyAllResourcesScheduledForDestruction();
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
// Wait for background thread to exit.
m_queued_command_list->Release();
#endif
// The command list will still be open, close it before destroying.
m_backing_command_list->Close();
DestroyAllPendingResources();
m_backing_command_list->Release();
for (auto& allocator_list : m_command_allocator_lists)
@ -305,8 +315,10 @@ D3DCommandListManager::~D3DCommandListManager()
void D3DCommandListManager::WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value)
{
CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event));
if (fence->GetCompletedValue() >= fence_value)
return;
CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event));
WaitForSingleObject(m_wait_on_cpu_fence_event, INFINITE);
}

View File

@ -38,11 +38,7 @@ public:
void ExecuteQueuedWork(bool wait_for_gpu_completion = false);
void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags);
void WaitForQueuedWorkToBeExecutedOnGPU();
void ClearQueueAndWaitForCompletionOfInflightWork();
void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource);
void ImmediatelyDestroyAllResourcesScheduledForDestruction();
void SetCommandListDirtyState(unsigned int command_list_state, bool dirty);
bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const;
@ -64,9 +60,13 @@ public:
void WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value);
private:
void DestroyAllPendingResources();
void ResetAllCommandAllocators();
void WaitForGPUCompletion();
void PerformGpuRolloverChecks();
void ResetCommandListWithIdleCommandAllocator();
void PerformGPURolloverChecks();
void MoveToNextCommandAllocator();
void ResetCommandList();
unsigned int m_command_list_dirty_state = UINT_MAX;
D3D_PRIMITIVE_TOPOLOGY m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
@ -85,6 +85,7 @@ private:
UINT m_current_command_allocator;
UINT m_current_command_allocator_list;
std::array<std::vector<ID3D12CommandAllocator*>, 2> m_command_allocator_lists;
std::array<UINT64, 2> m_command_allocator_list_fences;
ID3D12GraphicsCommandList* m_backing_command_list;
ID3D12QueuedCommandList* m_queued_command_list;
@ -93,6 +94,7 @@ private:
UINT m_current_deferred_destruction_list;
std::array<std::vector<ID3D12Resource*>, 2> m_deferred_destruction_lists;
std::array<UINT64, 2> m_deferred_destruction_list_fences;
};
} // namespace

View File

@ -14,9 +14,8 @@ constexpr size_t BufferOffsetForQueueItemType()
return sizeof(T) + sizeof(D3DQueueItemType) * 2;
}
DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param)
void ID3D12QueuedCommandList::BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list)
{
ID3D12QueuedCommandList* parent_queued_command_list = static_cast<ID3D12QueuedCommandList*>(param);
ID3D12GraphicsCommandList* command_list = parent_queued_command_list->m_command_list;
byte* queue_array = parent_queued_command_list->m_queue_array;
@ -340,6 +339,7 @@ DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param)
bool eligible_to_move_to_front_of_queue = reinterpret_cast<D3DQueueItem*>(item)->Stop.eligible_to_move_to_front_of_queue;
bool signal_stop_event = reinterpret_cast<D3DQueueItem*>(item)->Stop.signal_stop_event;
bool terminate_worker_thread = reinterpret_cast<D3DQueueItem*>(item)->Stop.terminate_worker_thread;
item += BufferOffsetForQueueItemType<StopArguments>();
@ -353,6 +353,9 @@ DWORD WINAPI ID3D12QueuedCommandList::BackgroundThreadFunction(LPVOID param)
SetEvent(parent_queued_command_list->m_stop_execution_event);
}
if (terminate_worker_thread)
return;
goto exitLoop;
}
}
@ -374,13 +377,14 @@ ID3D12QueuedCommandList::ID3D12QueuedCommandList(ID3D12GraphicsCommandList* back
m_begin_execution_event = CreateSemaphore(nullptr, 0, 256, nullptr);
m_stop_execution_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
m_background_thread = CreateThread(nullptr, 0, BackgroundThreadFunction, this, 0, &m_background_thread_id);
m_background_thread = std::thread(BackgroundThreadFunction, this);
}
ID3D12QueuedCommandList::~ID3D12QueuedCommandList()
{
TerminateThread(m_background_thread, 0);
CloseHandle(m_background_thread);
// Kick worker thread, and tell it to exit.
ProcessQueuedItems(true, true, true);
m_background_thread.join();
CloseHandle(m_begin_execution_event);
CloseHandle(m_stop_execution_event);
@ -461,22 +465,14 @@ void ID3D12QueuedCommandList::QueuePresent(IDXGISwapChain* swap_chain, UINT sync
CheckForOverflow();
}
void ID3D12QueuedCommandList::ClearQueue()
{
// Drain semaphore to ensure no new previously queued work executes (though inflight work may continue).
while (WaitForSingleObject(m_begin_execution_event, 0) != WAIT_TIMEOUT) { }
// Assume that any inflight queued work will complete within 100ms. This is a safe assumption.
Sleep(100);
}
void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop)
void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_of_queue, bool wait_for_stop, bool terminate_worker_thread)
{
D3DQueueItem item = {};
item.Type = D3DQueueItemType::Stop;
item.Stop.eligible_to_move_to_front_of_queue = eligible_to_move_to_front_of_queue;
item.Stop.signal_stop_event = wait_for_stop;
item.Stop.terminate_worker_thread = terminate_worker_thread;
*reinterpret_cast<D3DQueueItem*>(m_queue_array_back) = item;
@ -500,6 +496,7 @@ void ID3D12QueuedCommandList::ProcessQueuedItems(bool eligible_to_move_to_front_
if (wait_for_stop)
{
WaitForSingleObject(m_stop_execution_event, INFINITE);
ResetEvent(m_stop_execution_event);
}
}

View File

@ -6,6 +6,7 @@
#include <atomic>
#include <d3d12.h>
#include <thread>
namespace DX12
{
@ -210,6 +211,7 @@ struct StopArguments
{
bool eligible_to_move_to_front_of_queue;
bool signal_stop_event;
bool terminate_worker_thread;
};
struct D3DQueueItem
@ -254,13 +256,12 @@ public:
ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list, ID3D12CommandQueue* backing_command_queue);
void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false);
void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false, bool wait_for_stop = false, bool terminate_worker_thread = false);
void QueueExecute();
void QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value);
void QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value);
void QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags);
void ClearQueue();
// IUnknown methods
@ -612,15 +613,14 @@ private:
void ResetQueueOverflowTracking();
void CheckForOverflow();
static DWORD WINAPI BackgroundThreadFunction(LPVOID param);
static void BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list);
byte m_queue_array[QUEUE_ARRAY_SIZE];
byte* m_queue_array_back = m_queue_array;
byte* m_queue_array_back_at_start_of_frame = m_queue_array_back;
DWORD m_background_thread_id;
HANDLE m_background_thread;
std::thread m_background_thread;
HANDLE m_begin_execution_event;
HANDLE m_stop_execution_event;

View File

@ -461,6 +461,18 @@ HRESULT StateCache::GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D
return S_OK;
}
void StateCache::OnMSAASettingsChanged()
{
for (auto& it : m_small_pso_map)
{
SAFE_RELEASE(it.second);
}
m_small_pso_map.clear();
// Update sample count for new PSOs being created
gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}
void StateCache::Clear()
{
for (auto& it : m_pso_map)

View File

@ -95,6 +95,9 @@ public:
HRESULT GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc, ID3D12PipelineState** pso);
HRESULT GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology, const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid);
// Called when the MSAA count/quality changes. Invalidates all small PSOs.
void OnMSAASettingsChanged();
// Release all cached states and clear hash tables.
void Clear();
@ -126,7 +129,8 @@ private:
lhs.BlendState.RenderTarget[0].DestBlend,
lhs.BlendState.RenderTarget[0].SrcBlend,
lhs.BlendState.RenderTarget[0].RenderTargetWriteMask,
lhs.RTVFormats[0]) ==
lhs.RTVFormats[0],
lhs.SampleDesc.Count) ==
std::tie(rhs.PS.pShaderBytecode, rhs.VS.pShaderBytecode, rhs.GS.pShaderBytecode,
rhs.RasterizerState.CullMode,
rhs.DepthStencilState.DepthEnable,
@ -137,7 +141,8 @@ private:
rhs.BlendState.RenderTarget[0].DestBlend,
rhs.BlendState.RenderTarget[0].SrcBlend,
rhs.BlendState.RenderTarget[0].RenderTargetWriteMask,
rhs.RTVFormats[0]);
rhs.RTVFormats[0],
rhs.SampleDesc.Count);
}
};

View File

@ -39,7 +39,7 @@ D3DStreamBuffer::~D3DStreamBuffer()
// Obviously this is non-performant, so the buffer max_size should be large enough to
// ensure this never happens.
bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment)
bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute)
{
CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer is greater than max allowed size of backing buffer.");
@ -75,7 +75,7 @@ bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t align
// Slow path. No room at front, or back, due to the GPU still (possibly) accessing parts of the buffer.
// Resize if possible, else stall.
bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size);
bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size, allow_execute);
return command_list_executed;
}
@ -113,14 +113,25 @@ void D3DStreamBuffer::AllocateBuffer(size_t size)
CheckHR(m_buffer->Map(0, nullptr, &m_buffer_cpu_address));
m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress();
m_buffer_size = size;
// Start at the beginning of the new buffer.
m_buffer_gpu_completion_offset = 0;
m_buffer_current_allocation_offset = 0;
m_buffer_offset = 0;
// Notify observers.
if (m_buffer_reallocation_notification != nullptr)
*m_buffer_reallocation_notification = true;
// If we had any fences queued, they are no longer relevant.
ClearFences();
}
// Function returns true if current command list executed as a result of current command list
// referencing all of buffer's contents, AND we are already at max_size. No alternative but to
// flush. See comments above AllocateSpaceInBuffer for more details.
bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size)
bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute)
{
// This function will attempt to increase the size of the buffer, in response
// to running out of room. If the buffer is already at its maximum size specified
@ -155,14 +166,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size)
if (new_size > m_buffer_size)
{
AllocateBuffer(new_size);
m_buffer_current_allocation_offset = 0;
m_buffer_offset = allocation_size;
if (m_buffer_reallocation_notification != nullptr)
{
*m_buffer_reallocation_notification = true;
}
return false;
}
@ -177,6 +181,14 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size)
return false;
}
// If allow_execute is false, the caller cannot handle command list execution (and the associated reset), so re-allocate the same-sized buffer.
if (!allow_execute)
{
AllocateBuffer(new_size);
m_buffer_offset = allocation_size;
return false;
}
// 4) If we get to this point, that means there is no outstanding queued GPU work, and we're still out of room.
// This is bad - and performance will suffer due to the CPU/GPU serialization, but the show must go on.
@ -188,6 +200,7 @@ bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size)
m_buffer_offset = allocation_size;
m_buffer_current_allocation_offset = 0;
m_buffer_gpu_completion_offset = 0;
ClearFences();
return true;
}
@ -293,13 +306,13 @@ void D3DStreamBuffer::UpdateGPUProgress()
m_queued_fences.pop();
// Has fence gone past this point?
if (fence_value > tracking_information.fence_value)
if (fence_value >= tracking_information.fence_value)
{
m_buffer_gpu_completion_offset = tracking_information.buffer_offset;
}
else
{
// Fences are stored in assending order, so once we hit a fence we haven't yet crossed on GPU, abort search.
// Fences are stored in ascending order, so once we hit a fence we haven't yet crossed on GPU, abort search.
break;
}
}
@ -307,7 +320,24 @@ void D3DStreamBuffer::UpdateGPUProgress()
void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value)
{
reinterpret_cast<D3DStreamBuffer*>(owning_object)->QueueFence(fence_value);
D3DStreamBuffer* owning_stream_buffer = reinterpret_cast<D3DStreamBuffer*>(owning_object);
if (owning_stream_buffer->HasBufferOffsetChangedSinceLastFence())
owning_stream_buffer->QueueFence(fence_value);
}
void D3DStreamBuffer::ClearFences()
{
while (!m_queued_fences.empty())
m_queued_fences.pop();
}
bool D3DStreamBuffer::HasBufferOffsetChangedSinceLastFence() const
{
if (m_queued_fences.empty())
return true;
// Don't add a new fence tracking entry when our offset hasn't changed.
return (m_queued_fences.back().buffer_offset != m_buffer_offset);
}
void D3DStreamBuffer::QueueFence(UINT64 fence_value)

View File

@ -17,7 +17,7 @@ public:
D3DStreamBuffer(size_t initial_size, size_t max_size, bool* buffer_reallocation_notification);
~D3DStreamBuffer();
bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment);
bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute = true);
void OverrideSizeOfPreviousAllocation(size_t override_allocation_size);
void* GetBaseCPUAddress() const;
@ -32,13 +32,16 @@ public:
private:
void AllocateBuffer(size_t size);
bool AttemptBufferResizeOrElseStall(size_t new_size);
bool AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute);
bool AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(size_t allocation_size);
bool AttemptToFindExistingFenceToStallOn(size_t allocation_size);
void UpdateGPUProgress();
void ClearFences();
bool HasBufferOffsetChangedSinceLastFence() const;
void QueueFence(UINT64 fence_value);
struct FenceTrackingInformation

View File

@ -261,17 +261,6 @@ void D3DTexture2D::TransitionToResourceState(ID3D12GraphicsCommandList* command_
D3DTexture2D::~D3DTexture2D()
{
DX12::D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_tex12);
if (m_srv12_cpu.ptr)
{
D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {};
null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
DX12::D3D::device12->CreateShaderResourceView(NULL, &null_srv_desc, m_srv12_cpu);
}
}
} // namespace DX12

View File

@ -16,6 +16,7 @@
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
@ -71,7 +72,7 @@ public:
// returns vertex offset to the new data
size_t AppendData(const void* data, size_t size, size_t vertex_size)
{
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size);
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false);
memcpy(static_cast<u8*>(m_stream_buffer->GetCPUAddressOfCurrentAllocation()), data, size);
@ -80,7 +81,7 @@ public:
size_t BeginAppendData(void** write_ptr, size_t size, size_t vertex_size)
{
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size);
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false);
*write_ptr = m_stream_buffer->GetCPUAddressOfCurrentAllocation();
@ -359,24 +360,7 @@ int CD3DFont::Init()
const unsigned int text_vb_size = s_max_num_vertices * sizeof(FONT2DVERTEX);
CheckHR(
device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(text_vb_size),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&m_vb12)
)
);
SetDebugObjectName12(m_vb12, "vertex buffer of a CD3DFont object");
m_vb12_view.BufferLocation = m_vb12->GetGPUVirtualAddress();
m_vb12_view.SizeInBytes = text_vb_size;
m_vb12_view.StrideInBytes = sizeof(FONT2DVERTEX);
CheckHR(m_vb12->Map(0, nullptr, &m_vb12_data));
m_vertex_buffer = std::make_unique<D3DStreamBuffer>(text_vb_size * 2, text_vb_size * 16, nullptr);
D3D12_GRAPHICS_PIPELINE_STATE_DESC text_pso_desc = {
default_root_signature, // ID3D12RootSignature *pRootSignature;
@ -409,7 +393,7 @@ int CD3DFont::Init()
int CD3DFont::Shutdown()
{
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_vb12);
m_vertex_buffer.reset();
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_texture12);
return S_OK;
@ -417,7 +401,7 @@ int CD3DFont::Shutdown()
int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor, const std::string& text)
{
if (!m_vb12)
if (!m_vertex_buffer)
return 0;
float scale_x = 1 / static_cast<float>(D3D::GetBackBufferWidth()) * 2.f;
@ -428,10 +412,6 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw
float sx = x * scale_x - 1.f;
float sy = 1.f - y * scale_y;
// Fill vertex buffer
FONT2DVERTEX* vertices12 = static_cast<FONT2DVERTEX*>(m_vb12_data) + m_vb12_offset / sizeof(FONT2DVERTEX);
int num_triangles = 0L;
// set general pipeline state
D3D::current_command_list->SetPipelineState(m_pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
@ -441,13 +421,11 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV, m_texture12_gpu);
// If we are close to running off edge of vertex buffer, jump back to beginning.
if (m_vb12_offset + text.length() * 6 * sizeof(FONT2DVERTEX) >= s_max_num_vertices * sizeof(FONT2DVERTEX))
{
m_vb12_offset = 0;
vertices12 = static_cast<FONT2DVERTEX*>(m_vb12_data);
}
// upper bound is nchars * 6, assuming no spaces
m_vertex_buffer->AllocateSpaceInBuffer(static_cast<u32>(text.length()) * 6 * sizeof(FONT2DVERTEX), sizeof(FONT2DVERTEX), false);
FONT2DVERTEX* vertices12 = reinterpret_cast<FONT2DVERTEX*>(m_vertex_buffer->GetCPUAddressOfCurrentAllocation());
int num_triangles = 0;
float start_x = sx;
for (char c : text)
{
@ -487,13 +465,14 @@ int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dw
// Render the vertex buffer
if (num_triangles > 0)
{
D3D::current_command_list->IASetVertexBuffers(0, 1, &m_vb12_view);
u32 written_size = num_triangles * 3 * sizeof(FONT2DVERTEX);
m_vertex_buffer->OverrideSizeOfPreviousAllocation(written_size);
D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, m_vb12_offset / sizeof(FONT2DVERTEX), 0);
D3D12_VERTEX_BUFFER_VIEW vb_view = { m_vertex_buffer->GetGPUAddressOfCurrentAllocation(), written_size, sizeof(FONT2DVERTEX) };
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, 0, 0);
}
m_vb12_offset += 3 * num_triangles * sizeof(FONT2DVERTEX);
return S_OK;
}
@ -609,6 +588,28 @@ void SetLinearCopySampler()
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true);
}
void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth, float max_depth)
{
D3D12_VIEWPORT viewport = {
static_cast<float>(top_left_x),
static_cast<float>(top_left_y),
static_cast<float>(width),
static_cast<float>(height),
min_depth,
max_depth
};
D3D12_RECT scissor = {
static_cast<LONG>(top_left_x),
static_cast<LONG>(top_left_y),
static_cast<LONG>(top_left_x + width),
static_cast<LONG>(top_left_y + height)
};
D3D::current_command_list->RSSetViewports(1, &viewport);
D3D::current_command_list->RSSetScissorRects(1, &scissor);
};
void DrawShadedTexQuad(D3DTexture2D* texture,
const D3D12_RECT* rSource,
int source_width,
@ -621,8 +622,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture,
u32 slice,
DXGI_FORMAT rt_format,
bool inherit_srv_binding,
bool rt_multisampled,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override
bool rt_multisampled
)
{
float sw = 1.0f / static_cast<float>(source_width);
@ -685,8 +685,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture,
Renderer::GetResetBlendDesc(), // D3D12_BLEND_DESC BlendState;
UINT_MAX, // UINT SampleMask;
Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState
depth_stencil_desc_override ?
*depth_stencil_desc_override :
Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState
layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES IndexBufferProperties
@ -708,13 +706,6 @@ void DrawShadedTexQuad(D3DTexture2D* texture,
D3D::current_command_list->SetPipelineState(pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
// In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled.
// Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid
// dirtying state.
// 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072
D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072));
D3D::current_command_list->DrawInstanced(4, 1, static_cast<UINT>(stq_offset), 0);
g_renderer->RestoreAPIState();
@ -864,13 +855,6 @@ void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH
D3D::current_command_list->SetPipelineState(pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
// In D3D11, the 'resetraststate' has ScissorEnable disabled. In D3D12, scissor testing is always enabled.
// Thus, set the scissor rect to the max texture size, then reset it to the current scissor rect to avoid
// dirtying state.
// 2 ^ D3D12_MAX_TEXTURE_DIMENSION_2_TO_EXP = 131072
D3D::current_command_list->RSSetScissorRects(1, &CD3DX12_RECT(0, 0, 131072, 131072));
D3D::current_command_list->DrawInstanced(4, 1, static_cast<UINT>(clearq_offset), 0);
g_renderer->RestoreAPIState();
@ -889,7 +873,6 @@ void DrawEFBPokeQuads(EFBAccessType type,
size_t num_points,
D3D12_BLEND_DESC* blend_desc,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc,
D3D12_VIEWPORT* viewport,
D3D12_CPU_DESCRIPTOR_HANDLE* render_target,
D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer,
bool rt_multisampled
@ -949,7 +932,6 @@ void DrawEFBPokeQuads(EFBAccessType type,
// Corresponding dirty flags set outside loop.
D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer);
D3D::current_command_list->RSSetViewports(1, viewport);
D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D12_VERTEX_BUFFER_VIEW vb_view = {
@ -989,6 +971,11 @@ void DrawEFBPokeQuads(EFBAccessType type,
InitColVertex(&vertex[3], x1, y2, z, col);
InitColVertex(&vertex[4], x2, y1, z, col);
InitColVertex(&vertex[5], x2, y2, z, col);
if (type == POKE_COLOR)
FramebufferManager::UpdateEFBColorAccessCopy(point->x, point->y, col);
else if (type == POKE_Z)
FramebufferManager::UpdateEFBDepthAccessCopy(point->x, point->y, z);
}
// Issue the draw

View File

@ -5,10 +5,12 @@
#pragma once
#include <d3d11.h>
#include <memory>
#include <string>
#include "Common/MathUtil.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoCommon/RenderBase.h"
@ -48,10 +50,7 @@ private:
D3D12_CPU_DESCRIPTOR_HANDLE m_texture12_cpu = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_texture12_gpu = {};
ID3D12Resource* m_vb12 = nullptr;
D3D12_VERTEX_BUFFER_VIEW m_vb12_view = {};
void* m_vb12_data = nullptr;
unsigned int m_vb12_offset = 0;
std::unique_ptr<D3DStreamBuffer> m_vertex_buffer;
D3D12_INPUT_LAYOUT_DESC m_input_layout12 = {};
D3D12_SHADER_BYTECODE m_pshader12 = {};
@ -75,6 +74,8 @@ void ShutdownUtils();
void SetPointCopySampler();
void SetLinearCopySampler();
void SetViewportAndScissor(u32 top_left_x, u32 top_left_y, u32 width, u32 height, float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH);
void DrawShadedTexQuad(D3DTexture2D* texture,
const D3D12_RECT* source,
int source_width,
@ -87,8 +88,7 @@ void DrawShadedTexQuad(D3DTexture2D* texture,
u32 slice = 0,
DXGI_FORMAT rt_format = DXGI_FORMAT_R8G8B8A8_UNORM,
bool inherit_srv_binding = false,
bool rt_multisampled = false,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc_override = nullptr
bool rt_multisampled = false
);
void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled);
@ -99,7 +99,6 @@ void DrawEFBPokeQuads(EFBAccessType type,
size_t num_points,
D3D12_BLEND_DESC* blend_desc,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc,
D3D12_VIEWPORT* viewport,
D3D12_CPU_DESCRIPTOR_HANDLE* render_target,
D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer,
bool rt_multisampled);

View File

@ -21,14 +21,8 @@ FramebufferManager::Efb FramebufferManager::m_efb;
unsigned int FramebufferManager::m_target_width;
unsigned int FramebufferManager::m_target_height;
D3D12_DEPTH_STENCIL_DESC FramebufferManager::m_depth_resolve_depth_stencil_desc;
D3DTexture2D*& FramebufferManager::GetEFBColorTexture() { return m_efb.color_tex; }
ID3D12Resource*& FramebufferManager::GetEFBColorStagingBuffer() { return m_efb.color_staging_buf; }
D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() { return m_efb.depth_tex; }
D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() { return m_efb.depth_read_texture; }
ID3D12Resource*& FramebufferManager::GetEFBDepthStagingBuffer() { return m_efb.depth_staging_buf; }
D3DTexture2D*& FramebufferManager::GetEFBColorTempTexture() { return m_efb.color_temp_tex; }
@ -107,11 +101,6 @@ FramebufferManager::FramebufferManager()
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture");
// AccessEFB - Sysmem buffer used to retrieve the pixel data from color_tex
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024);
CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_staging_buf)));
CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr);
// EFB depth buffer - primary depth buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12)));
@ -120,24 +109,6 @@ FramebufferManager::FramebufferManager()
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture");
// Render buffer for AccessEFB (depth data)
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 1, 1, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
optimized_clear_valueRTV.Format = DXGI_FORMAT_R32_FLOAT;
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr);
m_efb.depth_read_texture = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.depth_read_texture->GetTex12(), "EFB depth read texture (used in Renderer::AccessEFB)");
// AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(64 * 1024);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_staging_buf));
CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr);
D3D::SetDebugObjectName12(m_efb.depth_staging_buf, "EFB depth staging texture (used for Renderer::AccessEFB)");
if (g_ActiveConfig.iMultisamples > 1)
{
// Framebuffer resolve textures (color+depth)
@ -148,18 +119,12 @@ FramebufferManager::FramebufferManager()
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(), "EFB color resolve texture shader resource view");
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, m_target_height, hr);
m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON);
m_efb.resolved_depth_tex = new D3DTexture2D(buf12, (D3D11_BIND_FLAG)(D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE), DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(), "EFB depth resolve texture shader resource view");
m_depth_resolve_depth_stencil_desc = {};
m_depth_resolve_depth_stencil_desc.StencilEnable = FALSE;
m_depth_resolve_depth_stencil_desc.DepthEnable = TRUE;
m_depth_resolve_depth_stencil_desc.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
m_depth_resolve_depth_stencil_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
}
else
{
@ -167,6 +132,8 @@ FramebufferManager::FramebufferManager()
m_efb.resolved_depth_tex = nullptr;
}
InitializeEFBAccessCopies();
s_xfbEncoder.Init();
}
@ -174,17 +141,12 @@ FramebufferManager::~FramebufferManager()
{
s_xfbEncoder.Shutdown();
DestroyEFBAccessCopies();
SAFE_RELEASE(m_efb.color_tex);
SAFE_RELEASE(m_efb.color_temp_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_staging_buf);
SAFE_RELEASE(m_efb.resolved_color_tex);
SAFE_RELEASE(m_efb.depth_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_staging_buf);
SAFE_RELEASE(m_efb.depth_read_texture);
SAFE_RELEASE(m_efb.color_temp_tex);
SAFE_RELEASE(m_efb.resolved_color_tex);
SAFE_RELEASE(m_efb.resolved_depth_tex);
}
@ -211,25 +173,20 @@ void FramebufferManager::ResolveDepthTexture()
{
// ResolveSubresource does not work with depth textures.
// Instead, we use a shader that selects the minimum depth from all samples.
D3D::SetViewportAndScissor(0, 0, m_target_width, m_target_height);
const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast<float>(m_target_width), static_cast<float>(m_target_height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp12);
m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE, &m_efb.resolved_depth_tex->GetDSV12());
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
D3D::SetLinearCopySampler();
// Render a quad covering the entire target, writing SV_Depth.
const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(
FramebufferManager::GetEFBDepthTexture(),
&source_rect,
m_target_width,
m_target_height,
StaticShaderCache::GetDepthCopyPixelShader(true),
StaticShaderCache::GetDepthResolveToColorPixelShader(),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
StaticShaderCache::GetCopyGeometryShader(),
@ -246,6 +203,215 @@ void FramebufferManager::ResolveDepthTexture()
g_renderer->RestoreAPIState();
}
u32 FramebufferManager::ReadEFBColorAccessCopy(u32 x, u32 y)
{
if (!m_efb.color_access_readback_map)
MapEFBColorAccessCopy();
u32 color;
size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32);
memcpy(&color, &m_efb.color_access_readback_map[buffer_offset], sizeof(color));
return color;
}
float FramebufferManager::ReadEFBDepthAccessCopy(u32 x, u32 y)
{
if (!m_efb.depth_access_readback_map)
MapEFBDepthAccessCopy();
float depth;
size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float);
memcpy(&depth, &m_efb.depth_access_readback_map[buffer_offset], sizeof(depth));
return depth;
}
void FramebufferManager::UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color)
{
if (!m_efb.color_access_readback_map)
return;
size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32);
memcpy(&m_efb.color_access_readback_map[buffer_offset], &color, sizeof(color));
}
void FramebufferManager::UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth)
{
if (!m_efb.depth_access_readback_map)
return;
size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float);
memcpy(&m_efb.depth_access_readback_map[buffer_offset], &depth, sizeof(depth));
}
void FramebufferManager::InitializeEFBAccessCopies()
{
D3D12_CLEAR_VALUE optimized_color_clear_value = { DXGI_FORMAT_R8G8B8A8_UNORM, { 0.0f, 0.0f, 0.0f, 1.0f } };
D3D12_CLEAR_VALUE optimized_depth_clear_value = { DXGI_FORMAT_R32_FLOAT, { 1.0f } };
CD3DX12_RESOURCE_DESC texdesc12;
ID3D12Resource* buf12;
HRESULT hr;
// EFB access - color resize buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_color_clear_value, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB access color resize buffer (hr=%#x)", hr);
m_efb.color_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM);
D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access color resize buffer");
buf12->Release();
// EFB access - color staging/readback buffer
m_efb.color_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.color_access_readback_pitch * EFB_HEIGHT);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_access_readback_buffer));
D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access color readback buffer");
// EFB access - depth resize buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, D3D12_TEXTURE_LAYOUT_UNKNOWN, 0);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COMMON, &optimized_depth_clear_value, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB access depth resize buffer (hr=%#x)", hr);
m_efb.depth_access_resize_tex = new D3DTexture2D(buf12, D3D11_BIND_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT);
D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(), "EFB access depth resize buffer");
buf12->Release();
// EFB access - depth staging/readback buffer
m_efb.depth_access_readback_pitch = D3D::AlignValue(EFB_WIDTH * sizeof(float), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.depth_access_readback_pitch * EFB_HEIGHT);
hr = D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_access_readback_buffer));
D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access depth readback buffer");
}
void FramebufferManager::MapEFBColorAccessCopy()
{
D3D::command_list_mgr->CPUAccessNotify();
ID3D12Resource* src_resource;
if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1)
{
// for non-1xIR or multisampled cases, we need to copy to an intermediate texture first
m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT);
D3D::SetPointCopySampler();
D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr);
CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(m_efb.color_tex, &src_rect, m_target_width, m_target_height,
StaticShaderCache::GetColorCopyPixelShader(true),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
{}, 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.color_access_resize_tex->GetTex12();
}
else
{
// Can source the EFB buffer
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.color_tex->GetTex12();
}
// Copy to staging resource
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0, { DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.color_access_readback_pitch } };
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.color_access_readback_buffer, dst_footprint);
CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
// Block until completion
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Restore EFB resource state if it was sourced from here
if (src_resource == m_efb.color_tex->GetTex12())
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
// Restore state after resetting command list
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12());
g_renderer->RestoreAPIState();
// Resource copy has finished, so safe to map now
m_efb.color_access_readback_buffer->Map(0, nullptr, reinterpret_cast<void**>(&m_efb.color_access_readback_map));
}
void FramebufferManager::MapEFBDepthAccessCopy()
{
D3D::command_list_mgr->CPUAccessNotify();
ID3D12Resource* src_resource;
if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT || g_ActiveConfig.iMultisamples > 1)
{
// for non-1xIR or multisampled cases, we need to copy to an intermediate texture first
m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT);
D3D::SetPointCopySampler();
D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(), FALSE, nullptr);
CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height,
(g_ActiveConfig.iMultisamples > 1) ? StaticShaderCache::GetDepthResolveToColorPixelShader() : StaticShaderCache::GetColorCopyPixelShader(false),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
{}, 1.0f, 0, DXGI_FORMAT_R32_FLOAT, false, false);
m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.depth_access_resize_tex->GetTex12();
}
else
{
// Can source the EFB buffer
m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.depth_tex->GetTex12();
}
// Copy to staging resource
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = { 0,{ DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.depth_access_readback_pitch } };
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.depth_access_readback_buffer, dst_footprint);
CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
// Block until completion
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Restore EFB resource state if it was sourced from here
if (src_resource == m_efb.depth_tex->GetTex12())
m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
// Restore state after resetting command list
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12());
g_renderer->RestoreAPIState();
// Resource copy has finished, so safe to map now
m_efb.depth_access_readback_buffer->Map(0, nullptr, reinterpret_cast<void**>(&m_efb.depth_access_readback_map));
}
void FramebufferManager::InvalidateEFBAccessCopies()
{
if (m_efb.color_access_readback_map)
{
m_efb.color_access_readback_buffer->Unmap(0, nullptr);
m_efb.color_access_readback_map = nullptr;
}
if (m_efb.depth_access_readback_map)
{
m_efb.depth_access_readback_buffer->Unmap(0, nullptr);
m_efb.depth_access_readback_map = nullptr;
}
}
void FramebufferManager::DestroyEFBAccessCopies()
{
InvalidateEFBAccessCopies();
SAFE_RELEASE(m_efb.color_access_resize_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.color_access_readback_buffer);
m_efb.color_access_readback_buffer = nullptr;
SAFE_RELEASE(m_efb.depth_access_resize_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_efb.depth_access_readback_buffer);
m_efb.depth_access_readback_buffer = nullptr;
}
void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
// DX12's XFB decoder does not use this function.
@ -255,8 +421,7 @@ void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
void XFBSource::CopyEFB(float gamma)
{
// Copy EFB data to XFB and restore render target again
const D3D12_VIEWPORT vp12 = { 0.f, 0.f, static_cast<float>(texWidth), static_cast<float>(texHeight), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp12);
D3D::SetViewportAndScissor(0, 0, texWidth, texHeight);
const D3D12_RECT rect = CD3DX12_RECT(0, 0, texWidth, texHeight);

View File

@ -61,11 +61,7 @@ public:
~FramebufferManager();
static D3DTexture2D*& GetEFBColorTexture();
static ID3D12Resource*& GetEFBColorStagingBuffer();
static D3DTexture2D*& GetEFBDepthTexture();
static D3DTexture2D*& GetEFBDepthReadTexture();
static ID3D12Resource*& GetEFBDepthStagingBuffer();
static D3DTexture2D*& GetResolvedEFBColorTexture();
static D3DTexture2D*& GetResolvedEFBDepthTexture();
@ -74,6 +70,17 @@ public:
static void ResolveDepthTexture();
// Access EFB from CPU
static u32 ReadEFBColorAccessCopy(u32 x, u32 y);
static float ReadEFBDepthAccessCopy(u32 x, u32 y);
static void UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color);
static void UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth);
static void InitializeEFBAccessCopies();
static void MapEFBColorAccessCopy();
static void MapEFBDepthAccessCopy();
static void InvalidateEFBAccessCopies();
static void DestroyEFBAccessCopies();
private:
std::unique_ptr<XFBSourceBase> CreateXFBSource(unsigned int target_width, unsigned int target_height, unsigned int layers) override;
void GetTargetSize(unsigned int* width, unsigned int* height) override;
@ -83,25 +90,29 @@ private:
static struct Efb
{
D3DTexture2D* color_tex;
ID3D12Resource* color_staging_buf;
D3DTexture2D* depth_tex;
ID3D12Resource* depth_staging_buf;
D3DTexture2D* depth_read_texture;
D3DTexture2D* color_temp_tex;
D3DTexture2D* resolved_color_tex;
D3DTexture2D* resolved_depth_tex;
D3DTexture2D* color_access_resize_tex;
ID3D12Resource* color_access_readback_buffer;
u8* color_access_readback_map;
u32 color_access_readback_pitch;
D3DTexture2D* depth_access_resize_tex;
ID3D12Resource* depth_access_readback_buffer;
u8* depth_access_readback_map;
u32 depth_access_readback_pitch;
int slices;
} m_efb;
static unsigned int m_target_width;
static unsigned int m_target_height;
static D3D12_DEPTH_STENCIL_DESC m_depth_resolve_depth_stencil_desc;
};
} // namespace DX12

View File

@ -89,8 +89,6 @@ void PSTextureEncoder::Init()
D3D::SetDebugObjectName12(m_out_readback_buffer, "efb encoder output staging buffer");
CheckHR(m_out_readback_buffer->Map(0, nullptr, &m_out_readback_buffer_data));
// Create constant buffer for uploading data to shaders. Need to align to 256 bytes.
unsigned int encode_params_buffer_size = (sizeof(EFBEncodeParams) + 0xff) & ~0xff;
@ -152,8 +150,7 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p
const u32 words_per_row = bytes_per_row / sizeof(u32);
D3D12_VIEWPORT vp = { 0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp);
D3D::SetViewportAndScissor(0, 0, words_per_row, num_blocks_y);
constexpr EFBRectangle full_src_rect(0, 0, EFB_WIDTH, EFB_HEIGHT);
@ -221,8 +218,10 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Transfer staging buffer to GameCube/Wii RAM
void* readback_data_map;
CheckHR(m_out_readback_buffer->Map(0, nullptr, &readback_data_map));
u8* src = static_cast<u8*>(m_out_readback_buffer_data);
u8* src = static_cast<u8*>(readback_data_map);
u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch);
for (unsigned int y = 0; y < num_blocks_y; ++y)
{
@ -232,6 +231,8 @@ void PSTextureEncoder::Encode(u8* dst, u32 format, u32 native_width, u32 bytes_p
src += dst_location.PlacedFootprint.Footprint.RowPitch;
}
m_out_readback_buffer->Unmap(0, nullptr);
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();

View File

@ -29,7 +29,6 @@ private:
D3D12_CPU_DESCRIPTOR_HANDLE m_out_rtv_cpu = {};
ID3D12Resource* m_out_readback_buffer = nullptr;
void* m_out_readback_buffer_data = nullptr;
ID3D12Resource* m_encode_params_buffer = nullptr;
void* m_encode_params_buffer_data = nullptr;

View File

@ -51,8 +51,6 @@ static bool s_last_xfb_mode = false;
static Television s_television;
static ID3D12Resource* s_access_efb_constant_buffer = nullptr;
enum CLEAR_BLEND_DESC
{
CLEAR_BLEND_DESC_ALL_CHANNELS_ENABLED = 0,
@ -78,7 +76,6 @@ D3D12_DEPTH_STENCIL_DESC g_reset_depth_desc = {};
D3D12_RASTERIZER_DESC g_reset_rast_desc = {};
static ID3D12Resource* s_screenshot_texture = nullptr;
static void* s_screenshot_texture_data = nullptr;
// Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK
typedef struct _Nv_Stereo_Image_Header
@ -110,25 +107,6 @@ static void SetupDeviceObjects()
g_framebuffer_manager = std::make_unique<FramebufferManager>();
float colmat[20] = { 0.0f };
colmat[0] = colmat[5] = colmat[10] = 1.0f;
CheckHR(
D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(sizeof(colmat)),
D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr,
IID_PPV_ARGS(&s_access_efb_constant_buffer)
)
);
// Copy inital data to access_efb_cbuf12.
void* access_efb_constant_buffer_data = nullptr;
CheckHR(s_access_efb_constant_buffer->Map(0, nullptr, &access_efb_constant_buffer_data));
memcpy(access_efb_constant_buffer_data, colmat, sizeof(colmat));
D3D12_DEPTH_STENCIL_DESC depth_desc;
depth_desc.DepthEnable = FALSE;
depth_desc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
@ -183,7 +161,6 @@ static void SetupDeviceObjects()
g_reset_rast_desc = rast_desc;
s_screenshot_texture = nullptr;
s_screenshot_texture_data = nullptr;
}
// Kill off all device objects
@ -197,9 +174,6 @@ static void TeardownDeviceObjects()
s_screenshot_texture = nullptr;
}
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_access_efb_constant_buffer);
s_access_efb_constant_buffer = nullptr;
s_television.Shutdown();
gx_state_cache.Clear();
@ -224,8 +198,6 @@ void CreateScreenshotTexture()
IID_PPV_ARGS(&s_screenshot_texture)
)
);
CheckHR(s_screenshot_texture->Map(0, nullptr, &s_screenshot_texture_data));
}
static D3D12_BOX GetScreenshotSourceBox(const TargetRectangle& target_rc)
@ -394,197 +366,65 @@ void Renderer::SetColorMask()
// - GX_PokeZMode (TODO)
u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
{
// EXISTINGD3D11TODO: This function currently is broken if anti-aliasing is enabled
// Convert EFB dimensions to the ones of our render target
EFBRectangle efb_pixel_rc;
efb_pixel_rc.left = x;
efb_pixel_rc.top = y;
efb_pixel_rc.right = x + 1;
efb_pixel_rc.bottom = y + 1;
TargetRectangle target_pixel_rc = Renderer::ConvertEFBRectangle(efb_pixel_rc);
// Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the average color instead
D3D12_RECT rect_to_lock;
if (type == PEEK_COLOR || type == PEEK_Z)
if (type == PEEK_COLOR)
{
rect_to_lock.left = (target_pixel_rc.left + target_pixel_rc.right) / 2;
rect_to_lock.top = (target_pixel_rc.top + target_pixel_rc.bottom) / 2;
rect_to_lock.right = rect_to_lock.left + 1;
rect_to_lock.bottom = rect_to_lock.top + 1;
}
else
{
rect_to_lock.left = target_pixel_rc.left;
rect_to_lock.right = target_pixel_rc.right;
rect_to_lock.top = target_pixel_rc.top;
rect_to_lock.bottom = target_pixel_rc.bottom;
}
u32 color = FramebufferManager::ReadEFBColorAccessCopy(x, y);
if (type == PEEK_Z)
{
D3D::command_list_mgr->CPUAccessNotify();
// depth buffers can only be completely CopySubresourceRegion'ed, so we're using DrawShadedTexQuad instead
// D3D12TODO: Is above statement true on D3D12?
D3D12_VIEWPORT vp12 = { 0.f, 0.f, 1.f, 1.f, D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp12);
D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_access_efb_constant_buffer->GetGPUVirtualAddress());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBDepthReadTexture()->GetRTV12(), FALSE, nullptr);
D3D::SetPointCopySampler();
D3D::DrawShadedTexQuad(
FramebufferManager::GetEFBDepthTexture(),
&rect_to_lock,
Renderer::GetTargetWidth(),
Renderer::GetTargetHeight(),
StaticShaderCache::GetColorCopyPixelShader(true),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
D3D12_SHADER_BYTECODE(),
1.0f,
0,
DXGI_FORMAT_R32_FLOAT,
false,
FramebufferManager::GetEFBDepthReadTexture()->GetMultisampled()
);
// copy to system memory
D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, 1, 1, 1);
ID3D12Resource* readback_buffer = FramebufferManager::GetEFBDepthStagingBuffer();
D3D12_TEXTURE_COPY_LOCATION dst_location = {};
dst_location.pResource = readback_buffer;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R32_FLOAT;
dst_location.PlacedFootprint.Footprint.Width = 1;
dst_location.PlacedFootprint.Footprint.Height = 1;
dst_location.PlacedFootprint.Footprint.Depth = 1;
dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12_TEXTURE_COPY_LOCATION src_location = {};
src_location.pResource = FramebufferManager::GetEFBDepthReadTexture()->GetTex12();
src_location.SubresourceIndex = 0;
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
FramebufferManager::GetEFBDepthReadTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
// Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU.
D3D::command_list_mgr->ExecuteQueuedWork(true);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE );
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12());
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();
// read the data from system memory
void* readback_buffer_data = nullptr;
CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data));
// depth buffer is inverted in the d3d backend
float val = 1.0f - reinterpret_cast<float*>(readback_buffer_data)[0];
u32 ret = 0;
if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
// if Z is in 16 bit format you must return a 16 bit integer
ret = MathUtil::Clamp<u32>(static_cast<u32>(val * 65536.0f), 0, 0xFFFF);
}
else
{
ret = MathUtil::Clamp<u32>(static_cast<u32>(val * 16777216.0f), 0, 0xFFFFFF);
}
// EXISTINGD3D11TODO: in RE0 this value is often off by one in Video_DX9 (where this code is derived from), which causes lighting to disappear
return ret;
}
else if (type == PEEK_COLOR)
{
D3D::command_list_mgr->CPUAccessNotify();
ID3D12Resource* readback_buffer = FramebufferManager::GetEFBColorStagingBuffer();
D3D12_BOX src_box = CD3DX12_BOX(rect_to_lock.left, rect_to_lock.top, 0, rect_to_lock.right, rect_to_lock.bottom, 1);
D3D12_TEXTURE_COPY_LOCATION dst_location = {};
dst_location.pResource = readback_buffer;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
dst_location.PlacedFootprint.Footprint.Width = 1;
dst_location.PlacedFootprint.Footprint.Height = 1;
dst_location.PlacedFootprint.Footprint.Depth = 1;
dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12_TEXTURE_COPY_LOCATION src_location = {};
src_location.pResource = FramebufferManager::GetResolvedEFBColorTexture()->GetTex12();
src_location.SubresourceIndex = 0;
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
FramebufferManager::GetResolvedEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
// Need to wait for the CPU to complete the copy (and all prior operations) before we can read it on the CPU.
D3D::command_list_mgr->ExecuteQueuedWork(true);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE, &FramebufferManager::GetEFBDepthTexture()->GetDSV12());
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();
// read the data from system memory
void* readback_buffer_data = nullptr;
CheckHR(readback_buffer->Map(0, nullptr, &readback_buffer_data));
u32 ret = reinterpret_cast<u32*>(readback_buffer_data)[0];
// a little-endian value is expected to be returned
color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000));
// check what to do with the alpha channel (GX_PokeAlphaRead)
PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode();
if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24)
{
ret = RGBA8ToRGBA6ToRGBA8(ret);
color = RGBA8ToRGBA6ToRGBA8(color);
}
else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
ret = RGBA8ToRGB565ToRGBA8(ret);
color = RGBA8ToRGB565ToRGBA8(color);
}
if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24)
{
ret |= 0xFF000000;
color |= 0xFF000000;
}
if (alpha_read_mode.ReadMode == 2)
{
return ret; // GX_READ_NONE
return color; // GX_READ_NONE
}
else if (alpha_read_mode.ReadMode == 1)
{
return (ret | 0xFF000000); // GX_READ_FF
return (color | 0xFF000000); // GX_READ_FF
}
else /*if(alpha_read_mode.ReadMode == 0)*/
{
return (ret & 0x00FFFFFF); // GX_READ_00
return (color & 0x00FFFFFF); // GX_READ_00
}
}
else // if (type == PEEK_Z)
{
// depth buffer is inverted in the d3d backend
float depth = 1.0f - FramebufferManager::ReadEFBDepthAccessCopy(x, y);
u32 ret = 0;
if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16)
{
// if Z is in 16 bit format you must return a 16 bit integer
ret = MathUtil::Clamp<u32>(static_cast<u32>(depth * 65536.0f), 0, 0xFFFF);
}
else
{
ret = MathUtil::Clamp<u32>(static_cast<u32>(depth * 16777216.0f), 0, 0xFFFFFF);
}
return 0;
return ret;
}
}
void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points)
{
D3D12_VIEWPORT vp = { 0.0f, 0.0f, static_cast<float>(GetTargetWidth()), static_cast<float>(GetTargetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::SetViewportAndScissor(0, 0, GetTargetWidth(), GetTargetHeight());
if (type == POKE_COLOR)
{
@ -595,7 +435,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num
num_points,
&g_reset_blend_desc,
&g_reset_depth_desc,
&vp,
&FramebufferManager::GetEFBColorTexture()->GetRTV12(),
nullptr,
FramebufferManager::GetEFBColorTexture()->GetMultisampled()
@ -609,7 +448,6 @@ void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num
num_points,
&s_clear_blend_descs[CLEAR_BLEND_DESC_ALL_CHANNELS_DISABLED],
&s_clear_depth_descs[CLEAR_DEPTH_DESC_DEPTH_ENABLED_WRITES_ENABLED],
&vp,
&FramebufferManager::GetEFBColorTexture()->GetRTV12(),
&FramebufferManager::GetEFBDepthTexture()->GetDSV12(),
FramebufferManager::GetEFBColorTexture()->GetMultisampled()
@ -689,23 +527,15 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha
// Update the view port for clearing the picture
TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc);
D3D12_VIEWPORT vp = {
static_cast<float>(target_rc.left),
static_cast<float>(target_rc.top),
static_cast<float>(target_rc.GetWidth()),
static_cast<float>(target_rc.GetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp);
// Color is passed in bgra mode so we need to convert it to rgba
u32 rgba_color = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000);
D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight());
D3D::DrawClearQuad(rgba_color, 1.0f - (z & 0xFFFFFF) / 16777216.0f, blend_desc, depth_stencil_desc, FramebufferManager::GetEFBColorTexture()->GetMultisampled());
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();
FramebufferManager::InvalidateEFBAccessCopies();
}
void Renderer::ReinterpretPixelData(unsigned int convtype)
@ -729,16 +559,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
return;
}
D3D12_VIEWPORT vp = {
0.f,
0.f,
static_cast<float>(g_renderer->GetTargetWidth()),
static_cast<float>(g_renderer->GetTargetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp);
D3D::SetViewportAndScissor(0, 0, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight());
FramebufferManager::GetEFBColorTempTexture()->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV12(), FALSE, nullptr);
@ -852,7 +673,12 @@ bool Renderer::SaveScreenshot(const std::string& filename, const TargetRectangle
D3D::command_list_mgr->ExecuteQueuedWork(true);
saved_png = TextureToPng(static_cast<u8*>(s_screenshot_texture_data), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false);
void* screenshot_texture_map;
CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map));
saved_png = TextureToPng(static_cast<u8*>(screenshot_texture_map), dst_location.PlacedFootprint.Footprint.RowPitch, filename, source_box.right - source_box.left, source_box.bottom - source_box.top, false);
s_screenshot_texture->Unmap(0, nullptr);
if (saved_png)
{
@ -906,6 +732,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
return;
}
// Invalidate EFB access copies. Not strictly necessary, but this avoids having the buffers mapped when calling Present().
FramebufferManager::InvalidateEFBAccessCopies();
// Prepare to copy the XFBs to our backbuffer
UpdateDrawRectangle(s_backbuffer_width, s_backbuffer_height);
TargetRectangle target_rc = GetTargetRectangle();
@ -916,27 +745,13 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
float clear_color[4] = { 0.f, 0.f, 0.f, 1.f };
D3D::current_command_list->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV12(), clear_color, 0, nullptr);
// D3D12: Because scissor-testing is always enabled, change scissor rect to backbuffer in case EFB is smaller
// than swap chain back buffer.
D3D12_RECT back_buffer_rect = { 0L, 0L, GetBackbufferWidth(), GetBackbufferHeight() };
D3D::current_command_list->RSSetScissorRects(1, &back_buffer_rect);
// activate linear filtering for the buffer copies
D3D::SetLinearCopySampler();
if (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB)
{
// EXISTINGD3D11TODO: Television should be used to render Virtual XFB mode as well.
D3D12_VIEWPORT vp12 = {
static_cast<float>(target_rc.left),
static_cast<float>(target_rc.top),
static_cast<float>(target_rc.GetWidth()),
static_cast<float>(target_rc.GetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp12);
D3D::SetViewportAndScissor(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight());
s_television.Submit(xfb_addr, fb_stride, fb_width, fb_height);
s_television.Render();
@ -1063,7 +878,12 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
w = s_record_width;
h = s_record_height;
}
formatBufferDump(static_cast<u8*>(s_screenshot_texture_data), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch);
void* screenshot_texture_map;
CheckHR(s_screenshot_texture->Map(0, nullptr, &screenshot_texture_map));
formatBufferDump(static_cast<u8*>(screenshot_texture_map), &frame_data[0], source_width, source_height, dst_location.PlacedFootprint.Footprint.RowPitch);
s_screenshot_texture->Unmap(0, nullptr);
FlipImageData(&frame_data[0], w, h);
AVIDump::AddFrame(&frame_data[0], source_width, source_height);
}
@ -1084,16 +904,7 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
}
// Reset viewport for drawing text
D3D12_VIEWPORT vp = {
0.0f,
0.0f,
static_cast<float>(GetBackbufferWidth()),
static_cast<float>(GetBackbufferHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp);
D3D::SetViewportAndScissor(0, 0, GetBackbufferWidth(), GetBackbufferHeight());
Renderer::DrawDebugText();
@ -1135,9 +946,16 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
s_last_stereo_mode != (g_ActiveConfig.iStereoMode > 0))
{
s_last_xfb_mode = g_ActiveConfig.bUseRealXFB;
s_last_multisamples = g_ActiveConfig.iMultisamples;
// Block on any changes until the GPU catches up, so we can free resources safely.
D3D::command_list_mgr->ExecuteQueuedWork(true);
if (s_last_multisamples != g_ActiveConfig.iMultisamples)
{
s_last_multisamples = g_ActiveConfig.iMultisamples;
StaticShaderCache::InvalidateMSAAShaders();
gx_state_cache.OnMSAASettingsChanged();
}
if (window_resized)
{
@ -1272,6 +1090,9 @@ void Renderer::ApplyState(bool use_dst_alpha)
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, false);
}
// Always called prior to drawing, so we can invalidate the CPU EFB copies here.
FramebufferManager::InvalidateEFBAccessCopies();
}
void Renderer::RestoreState()
@ -1509,30 +1330,12 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D
TargetRectangle left_rc, right_rc;
ConvertStereoRectangle(dst, left_rc, right_rc);
D3D12_VIEWPORT left_vp = {
static_cast<float>(left_rc.left),
static_cast<float>(left_rc.top),
static_cast<float>(left_rc.GetWidth()),
static_cast<float>(left_rc.GetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D12_VIEWPORT right_vp = {
static_cast<float>(right_rc.left),
static_cast<float>(right_rc.top),
static_cast<float>(right_rc.GetWidth()),
static_cast<float>(right_rc.GetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
// Swap chain backbuffer is never multisampled..
D3D::current_command_list->RSSetViewports(1, &left_vp);
D3D::SetViewportAndScissor(left_rc.left, left_rc.top, left_rc.GetWidth(), left_rc.GetHeight());
D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
D3D::current_command_list->RSSetViewports(1, &right_vp);
D3D::SetViewportAndScissor(right_rc.left, right_rc.top, right_rc.GetWidth(), right_rc.GetHeight());
D3D::DrawShadedTexQuad(src_texture, src.AsRECT(), src_width, src_height, StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(), StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), gamma, 1, DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
}
else if (g_ActiveConfig.iStereoMode == STEREO_3DVISION)
@ -1574,8 +1377,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D
}
else
{
D3D12_VIEWPORT vp = { static_cast<float>(dst.left), static_cast<float>(dst.top), static_cast<float>(dst.GetWidth()), static_cast<float>(dst.GetHeight()), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp);
D3D::SetViewportAndScissor(dst.left, dst.top, dst.GetWidth(), dst.GetHeight());
D3D::DrawShadedTexQuad(
src_texture,

View File

@ -15,7 +15,7 @@ namespace DX12
static ID3DBlob* s_color_matrix_program_blob[2] = {};
static ID3DBlob* s_color_copy_program_blob[2] = {};
static ID3DBlob* s_depth_matrix_program_blob[2] = {};
static ID3DBlob* s_depth_copy_program_blob[2] = {};
static ID3DBlob* s_depth_resolve_to_color_program_blob = {};
static ID3DBlob* s_clear_program_blob = {};
static ID3DBlob* s_anaglyph_program_blob = {};
static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {};
@ -73,17 +73,6 @@ static constexpr const char s_color_copy_program_hlsl[] = {
"}\n"
};
static constexpr const char s_depth_copy_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"void main(\n"
"out float odepth : SV_Depth,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"odepth = Tex0.Sample(samp0,uv0);\n"
"}\n"
};
// Anaglyph Red-Cyan shader based on Dubois algorithm
// Constants taken from the paper:
// "Conversion of a Stereo Pair to Anaglyph with
@ -126,19 +115,19 @@ static constexpr const char s_color_copy_program_msaa_hlsl[] = {
"}\n"
};
static constexpr const char s_depth_copy_program_msaa_hlsl[] = {
static constexpr const char s_depth_resolve_to_color_program_hlsl[] = {
"#define SAMPLES %d\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"void main(\n"
" out float depth : SV_Depth,\n"
" out float ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" depth = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n"
" ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n"
" for(int i = 1; i < SAMPLES; ++i)\n"
" depth = min(depth, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n"
" ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n"
"}\n"
};
@ -497,25 +486,21 @@ D3D12_SHADER_BYTECODE StaticShaderCache::GetColorCopyPixelShader(bool multisampl
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthCopyPixelShader(bool multisampled)
D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthResolveToColorPixelShader()
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
if (s_depth_resolve_to_color_program_blob)
{
bytecode = { s_depth_copy_program_blob[0]->GetBufferPointer(), s_depth_copy_program_blob[0]->GetBufferSize() };
}
else if (s_depth_copy_program_blob[1])
{
bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() };
bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() };
}
else
{
// create MSAA shader for current AA mode
std::string buf = StringFromFormat(s_depth_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
std::string buf = StringFromFormat(s_depth_resolve_to_color_program_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_depth_copy_program_blob[1]);
bytecode = { s_depth_copy_program_blob[1]->GetBufferPointer(), s_depth_copy_program_blob[1]->GetBufferSize() };
D3D::CompilePixelShader(buf, &s_depth_resolve_to_color_program_blob);
bytecode = { s_depth_resolve_to_color_program_blob->GetBufferPointer(), s_depth_resolve_to_color_program_blob->GetBufferSize() };
}
return bytecode;
@ -646,7 +631,6 @@ void StaticShaderCache::Init()
D3D::CompilePixelShader(s_clear_program_hlsl, &s_clear_program_blob);
D3D::CompilePixelShader(s_anaglyph_program_hlsl, &s_anaglyph_program_blob);
D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]);
D3D::CompilePixelShader(s_depth_copy_program_hlsl, &s_depth_copy_program_blob[0]);
D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]);
D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]);
@ -667,6 +651,7 @@ void StaticShaderCache::InvalidateMSAAShaders()
SAFE_RELEASE(s_depth_matrix_program_blob[1]);
SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[1]);
SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[1]);
SAFE_RELEASE(s_depth_resolve_to_color_program_blob);
}
void StaticShaderCache::Shutdown()
@ -675,6 +660,7 @@ void StaticShaderCache::Shutdown()
SAFE_RELEASE(s_clear_program_blob);
SAFE_RELEASE(s_anaglyph_program_blob);
SAFE_RELEASE(s_depth_resolve_to_color_program_blob);
for (unsigned int i = 0; i < 2; ++i)
{

View File

@ -18,7 +18,7 @@ public:
static D3D12_SHADER_BYTECODE GetColorMatrixPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetColorCopyPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetDepthMatrixPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetDepthCopyPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetDepthResolveToColorPixelShader();
static D3D12_SHADER_BYTECODE GetClearPixelShader();
static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader();
static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled);

View File

@ -25,10 +25,10 @@ namespace DX12
static std::unique_ptr<TextureEncoder> s_encoder = nullptr;
static std::unique_ptr<D3DStreamBuffer> s_efb_copy_stream_buffer = nullptr;
static u32 s_efb_copy_last_cbuf_id = UINT_MAX;
static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr;
static void* s_texture_cache_entry_readback_buffer_data = nullptr;
static UINT s_texture_cache_entry_readback_buffer_size = 0;
static size_t s_texture_cache_entry_readback_buffer_size = 0;
TextureCache::TCacheEntry::~TCacheEntry()
{
@ -42,47 +42,27 @@ void TextureCache::TCacheEntry::Bind(unsigned int stage)
bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level)
{
// EXISTINGD3D11TODO: Somehow implement this (D3DX11 doesn't support dumping individual LODs)
static bool warn_once = true;
if (level && warn_once)
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
size_t level_pitch = D3D::AlignValue(level_width * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
size_t required_readback_buffer_size = level_pitch * level_height;
// Check if the current readback buffer is large enough
if (required_readback_buffer_size > s_texture_cache_entry_readback_buffer_size)
{
WARN_LOG(VIDEO, "Dumping individual LOD not supported by D3D12 backend!");
warn_once = false;
return false;
}
// Reallocate the buffer with the new size. Safe to immediately release because we're the only user and we block until completion.
if (s_texture_cache_entry_readback_buffer)
s_texture_cache_entry_readback_buffer->Release();
D3D12_RESOURCE_DESC texture_desc = m_texture->GetTex12()->GetDesc();
const unsigned int required_readback_buffer_size = D3D::AlignValue(static_cast<unsigned int>(texture_desc.Width) * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
if (s_texture_cache_entry_readback_buffer_size < required_readback_buffer_size)
{
s_texture_cache_entry_readback_buffer_size = required_readback_buffer_size;
// We know the readback buffer won't be in use right now, since we wait on this thread
// for the GPU to finish execution right after copying to it.
SAFE_RELEASE(s_texture_cache_entry_readback_buffer);
}
if (!s_texture_cache_entry_readback_buffer_size)
{
CheckHR(
D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size),
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr,
IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer)
)
);
CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &s_texture_cache_entry_readback_buffer_data));
IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer)));
}
bool saved_png = false;
m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D12_TEXTURE_COPY_LOCATION dst_location = {};
@ -90,26 +70,31 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
dst_location.PlacedFootprint.Footprint.Depth = 1;
dst_location.PlacedFootprint.Footprint.Format = texture_desc.Format;
dst_location.PlacedFootprint.Footprint.Width = static_cast<UINT>(texture_desc.Width);
dst_location.PlacedFootprint.Footprint.Height = texture_desc.Height;
dst_location.PlacedFootprint.Footprint.RowPitch = D3D::AlignValue(dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
dst_location.PlacedFootprint.Footprint.Width = level_width;
dst_location.PlacedFootprint.Footprint.Height = level_height;
dst_location.PlacedFootprint.Footprint.RowPitch = static_cast<UINT>(level_pitch);
D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0);
D3D12_TEXTURE_COPY_LOCATION src_location = CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), level);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
D3D::command_list_mgr->ExecuteQueuedWork(true);
saved_png = TextureToPng(
static_cast<u8*>(s_texture_cache_entry_readback_buffer_data),
// Map readback buffer and save to file.
void* readback_texture_map;
CheckHR(s_texture_cache_entry_readback_buffer->Map(0, nullptr, &readback_texture_map));
bool saved = TextureToPng(
static_cast<u8*>(readback_texture_map),
dst_location.PlacedFootprint.Footprint.RowPitch,
filename,
dst_location.PlacedFootprint.Footprint.Width,
dst_location.PlacedFootprint.Footprint.Height
);
return saved_png;
s_texture_cache_entry_readback_buffer->Unmap(0, nullptr);
return saved;
}
void TextureCache::TCacheEntry::CopyRectangleFromTexture(
@ -164,15 +149,7 @@ void TextureCache::TCacheEntry::CopyRectangleFromTexture(
return;
}
const D3D12_VIEWPORT vp = {
float(dst_rect.left),
float(dst_rect.top),
float(dst_rect.GetWidth()),
float(dst_rect.GetHeight()),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp);
D3D::SetViewportAndScissor(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight());
m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr);
@ -272,8 +249,6 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry
void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat src_format, const EFBRectangle& srcRect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat)
{
static unsigned int old_cbuf_id = UINT_MAX;
// When copying at half size, in multisampled mode, resolve the color/depth buffer first.
// This is because multisampled texture reads go through Load, not Sample, and the linear
// filter is ignored.
@ -289,28 +264,19 @@ void TextureCache::TCacheEntry::FromRenderTarget(u8* dst, PEControl::PixelFormat
FramebufferManager::GetResolvedEFBColorTexture();
}
// stretch picture with increased internal resolution
const D3D12_VIEWPORT vp = {
0.f,
0.f,
static_cast<float>(config.width),
static_cast<float>(config.height),
D3D12_MIN_DEPTH,
D3D12_MAX_DEPTH
};
D3D::current_command_list->RSSetViewports(1, &vp);
// set transformation
if (cbuf_id != old_cbuf_id)
if (s_efb_copy_last_cbuf_id != cbuf_id)
{
s_efb_copy_stream_buffer->AllocateSpaceInBuffer(28 * sizeof(float), 256);
memcpy(s_efb_copy_stream_buffer->GetCPUAddressOfCurrentAllocation(), colmat, 28 * sizeof(float));
old_cbuf_id = cbuf_id;
s_efb_copy_last_cbuf_id = cbuf_id;
}
// stretch picture with increased internal resolution
D3D::SetViewportAndScissor(0, 0, config.width, config.height);
D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_stream_buffer->GetGPUAddressOfCurrentAllocation());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
@ -441,14 +407,13 @@ void main(
void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, TlutFormat format)
{
// stretch picture with increased internal resolution
const D3D12_VIEWPORT vp = { 0.f, 0.f, static_cast<float>(unconverted->config.width), static_cast<float>(unconverted->config.height), D3D12_MIN_DEPTH, D3D12_MAX_DEPTH };
D3D::current_command_list->RSSetViewports(1, &vp);
const unsigned int palette_buffer_allocation_size = 512;
m_palette_stream_buffer->AllocateSpaceInBuffer(palette_buffer_allocation_size, 256);
memcpy(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation(), palette, palette_buffer_allocation_size);
// stretch picture with increased internal resolution
D3D::SetViewportAndScissor(0, 0, unconverted->config.width, unconverted->config.height);
// D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture occupies the first SRV slot,
// we need to allocate temporary space out of our descriptor heap, place the palette SRV in the second slot, then copy the
// existing texture's descriptor into the first slot.
@ -554,9 +519,9 @@ TextureCache::TextureCache()
s_encoder->Init();
s_efb_copy_stream_buffer = std::make_unique<D3DStreamBuffer>(1024 * 1024, 1024 * 1024, nullptr);
s_efb_copy_last_cbuf_id = UINT_MAX;
s_texture_cache_entry_readback_buffer = nullptr;
s_texture_cache_entry_readback_buffer_data = nullptr;
s_texture_cache_entry_readback_buffer_size = 0;
m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader12(std::string("IA8"));
@ -606,8 +571,10 @@ TextureCache::~TextureCache()
if (s_texture_cache_entry_readback_buffer)
{
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_texture_cache_entry_readback_buffer);
// Safe to destroy the readback buffer immediately, as the only time it's used is blocked until completion.
s_texture_cache_entry_readback_buffer->Release();
s_texture_cache_entry_readback_buffer = nullptr;
s_texture_cache_entry_readback_buffer_size = 0;
}
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_palette_uniform_buffer);
@ -635,7 +602,7 @@ void TextureCache::BindTextures()
D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle;
DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(&s_group_base_texture_cpu_handle, 8, &s_group_base_texture_gpu_handle, nullptr, true);
for (unsigned int stage = 0; stage <= last_texture; stage++)
for (unsigned int stage = 0; stage < 8; stage++)
{
if (bound_textures[stage] != nullptr)
{