Merge pull request #4424 from Helios747/remove_more_features

Remove D3D12
This commit is contained in:
Mat M 2017-05-18 20:04:40 -04:00 committed by GitHub
commit 1880a97bb0
53 changed files with 7 additions and 12500 deletions

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -218,9 +218,6 @@
<ProjectReference Include="..\..\..\Languages\Languages.vcxproj">
<Project>{0e033be3-2e08-428e-9ae9-bc673efa12b5}</Project>
</ProjectReference>
<ProjectReference Include="..\VideoBackends\D3D12\D3D12.vcxproj">
<Project>{570215b7-e32f-4438-95ae-c8d955f9fca3}</Project>
</ProjectReference>
<ProjectReference Include="..\VideoBackends\Vulkan\Vulkan.vcxproj">
<Project>{29f29a19-f141-45ad-9679-5a2923b49da3}</Project>
</ProjectReference>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -284,9 +284,6 @@
<ProjectReference Include="$(CoreDir)VideoCommon\VideoCommon.vcxproj">
<Project>{3de9ee35-3e91-4f27-a014-2866ad8c3fe3}</Project>
</ProjectReference>
<ProjectReference Include="$(CoreDir)VideoBackends\D3D12\D3D12.vcxproj">
<Project>{570215b7-e32f-4438-95ae-c8d955f9fca3}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View File

@ -1,161 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <memory>
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/D3D12/BoundingBox.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
constexpr size_t BBOX_BUFFER_SIZE = sizeof(int) * 4;
constexpr size_t BBOX_STREAM_BUFFER_SIZE = BBOX_BUFFER_SIZE * 128;
static ID3D12Resource* s_bbox_buffer;
static ID3D12Resource* s_bbox_staging_buffer;
static void* s_bbox_staging_buffer_map;
static std::unique_ptr<D3DStreamBuffer> s_bbox_stream_buffer;
static D3D12_GPU_DESCRIPTOR_HANDLE s_bbox_descriptor_handle;
void BBox::Init()
{
CD3DX12_RESOURCE_DESC buffer_desc(CD3DX12_RESOURCE_DESC::Buffer(
BBOX_BUFFER_SIZE, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, 0));
CD3DX12_RESOURCE_DESC staging_buffer_desc(
CD3DX12_RESOURCE_DESC::Buffer(BBOX_BUFFER_SIZE, D3D12_RESOURCE_FLAG_NONE, 0));
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &buffer_desc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&s_bbox_buffer)));
CheckHR(D3D::device12->CreateCommittedResource(&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
D3D12_HEAP_FLAG_NONE, &staging_buffer_desc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
IID_PPV_ARGS(&s_bbox_staging_buffer)));
s_bbox_stream_buffer =
std::make_unique<D3DStreamBuffer>(BBOX_STREAM_BUFFER_SIZE, BBOX_STREAM_BUFFER_SIZE, nullptr);
// D3D12 root signature UAV must be raw or structured buffers, not typed. Since we used a typed
// buffer,
// we have to use a descriptor table. Luckily, we only have to allocate this once, and it never
// changes.
D3D12_CPU_DESCRIPTOR_HANDLE cpu_descriptor_handle;
if (!D3D::gpu_descriptor_heap_mgr->Allocate(&cpu_descriptor_handle, &s_bbox_descriptor_handle,
nullptr, false))
PanicAlert("Failed to create bounding box UAV descriptor");
D3D12_UNORDERED_ACCESS_VIEW_DESC view_desc = {DXGI_FORMAT_R32_SINT, D3D12_UAV_DIMENSION_BUFFER};
view_desc.Buffer.FirstElement = 0;
view_desc.Buffer.NumElements = 4;
view_desc.Buffer.StructureByteStride = 0;
view_desc.Buffer.CounterOffsetInBytes = 0;
view_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
D3D::device12->CreateUnorderedAccessView(s_bbox_buffer, nullptr, &view_desc,
cpu_descriptor_handle);
Bind();
}
void BBox::Bind()
{
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_UAV,
s_bbox_descriptor_handle);
}
void BBox::Invalidate()
{
if (!s_bbox_staging_buffer_map)
return;
D3D12_RANGE write_range = {};
s_bbox_staging_buffer->Unmap(0, &write_range);
s_bbox_staging_buffer_map = nullptr;
}
void BBox::Shutdown()
{
Invalidate();
if (s_bbox_buffer)
{
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_bbox_buffer);
s_bbox_buffer = nullptr;
}
if (s_bbox_staging_buffer)
{
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(s_bbox_staging_buffer);
s_bbox_staging_buffer = nullptr;
}
s_bbox_stream_buffer.reset();
}
void BBox::Set(int index, int value)
{
// If the buffer is currently mapped, compare the value, and update the staging buffer.
if (s_bbox_staging_buffer_map)
{
int current_value;
memcpy(&current_value, reinterpret_cast<u8*>(s_bbox_staging_buffer_map) + (index * sizeof(int)),
sizeof(int));
if (current_value == value)
{
// Value hasn't changed. So skip updating completely.
return;
}
memcpy(reinterpret_cast<u8*>(s_bbox_staging_buffer_map) + (index * sizeof(int)), &value,
sizeof(int));
}
s_bbox_stream_buffer->AllocateSpaceInBuffer(sizeof(int), sizeof(int));
// Allocate temporary bytes in upload buffer, then copy to real buffer.
memcpy(s_bbox_stream_buffer->GetCPUAddressOfCurrentAllocation(), &value, sizeof(int));
D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0);
D3D::current_command_list->CopyBufferRegion(
s_bbox_buffer, index * sizeof(int), s_bbox_stream_buffer->GetBuffer(),
s_bbox_stream_buffer->GetOffsetOfCurrentAllocation(), sizeof(int));
D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0);
}
int BBox::Get(int index)
{
if (!s_bbox_staging_buffer_map)
{
D3D::command_list_mgr->CPUAccessNotify();
// Copy from real buffer to staging buffer, then block until we have the results.
D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE,
0);
D3D::current_command_list->CopyBufferRegion(s_bbox_staging_buffer, 0, s_bbox_buffer, 0,
BBOX_BUFFER_SIZE);
D3D::ResourceBarrier(D3D::current_command_list, s_bbox_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0);
D3D::command_list_mgr->ExecuteQueuedWork(true);
D3D12_RANGE read_range = {0, BBOX_BUFFER_SIZE};
CheckHR(s_bbox_staging_buffer->Map(0, &read_range, &s_bbox_staging_buffer_map));
}
int value;
memcpy(&value, &reinterpret_cast<int*>(s_bbox_staging_buffer_map)[index], sizeof(int));
return value;
}
};

View File

@ -1,21 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "VideoBackends/D3D12/D3DBase.h"
namespace DX12
{
class BBox
{
public:
static void Init();
static void Bind();
static void Invalidate();
static void Shutdown();
static void Set(int index, int value);
static int Get(int index);
};
};

View File

@ -1,54 +0,0 @@
set(SRCS
BoundingBox.cpp
BoundingBox.h
D3DBase.cpp
D3DBase.h
D3DCommandListManager.cpp
D3DCommandListManager.h
D3DDescriptorHeapManager.cpp
D3DDescriptorHeapManager.h
D3DQueuedCommandList.cpp
D3DQueuedCommandList.h
D3DShader.cpp
D3DShader.h
D3DState.cpp
D3DState.h
D3DStreamBuffer.cpp
D3DStreamBuffer.h
D3DTexture.cpp
D3DTexture.h
D3DUtil.cpp
D3DUtil.h
FramebufferManager.cpp
FramebufferManager.h
main.cpp
NativeVertexFormat.cpp
NativeVertexFormat.h
PerfQuery.cpp
PerfQuery.h
PSTextureEncoder.cpp
PSTextureEncoder.h
Render.cpp
Render.h
ShaderCache.cpp
ShaderCache.h
ShaderConstantsManager.cpp
ShaderConstantsManager.h
StaticShaderCache.cpp
StaticShaderCache.h
TextureCache.cpp
TextureCache.h
VertexManager.cpp
VertexManager.h
VideoBackend.h
XFBEncoder.cpp
XFBEncoder.h
)
set(LIBS
videocommon
SOIL
common
)
add_dolphin_library(videod3d12 "${SRCS}" "${LIBS}")

View File

@ -1,106 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{570215B7-E32F-4438-95AE-C8D955F9FCA3}</ProjectGuid>
<WindowsTargetPlatformVersion>10.0.10586.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<PlatformToolset>v140</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="..\..\..\VSProps\Base.props" />
<Import Project="..\..\..\VSProps\PCHUse.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<ForcedIncludeFiles />
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<ForcedIncludeFiles />
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="BoundingBox.cpp" />
<ClCompile Include="D3DBase.cpp" />
<ClCompile Include="D3DCommandListManager.cpp" />
<ClCompile Include="D3DDescriptorHeapManager.cpp" />
<ClCompile Include="D3DQueuedCommandList.cpp" />
<ClCompile Include="D3DShader.cpp" />
<ClCompile Include="D3DState.cpp" />
<ClCompile Include="D3DStreamBuffer.cpp" />
<ClCompile Include="D3DTexture.cpp" />
<ClCompile Include="D3DUtil.cpp" />
<ClCompile Include="FramebufferManager.cpp" />
<ClCompile Include="main.cpp" />
<ClCompile Include="NativeVertexFormat.cpp" />
<ClCompile Include="PerfQuery.cpp" />
<ClCompile Include="PSTextureEncoder.cpp" />
<ClCompile Include="Render.cpp" />
<ClCompile Include="ShaderCache.cpp" />
<ClCompile Include="ShaderConstantsManager.cpp" />
<ClCompile Include="StaticShaderCache.cpp" />
<ClCompile Include="TextureCache.cpp" />
<ClCompile Include="VertexManager.cpp" />
<ClCompile Include="XFBEncoder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="D3DBase.h" />
<ClInclude Include="D3DCommandListManager.h" />
<ClInclude Include="D3DDescriptorHeapManager.h" />
<ClInclude Include="D3DQueuedCommandList.h" />
<ClInclude Include="D3DShader.h" />
<ClInclude Include="D3DState.h" />
<ClInclude Include="D3DStreamBuffer.h" />
<ClInclude Include="D3DTexture.h" />
<ClInclude Include="D3DUtil.h" />
<ClInclude Include="FramebufferManager.h" />
<ClInclude Include="NativeVertexFormat.h" />
<ClInclude Include="PerfQuery.h" />
<ClInclude Include="PSTextureEncoder.h" />
<ClInclude Include="Render.h" />
<ClInclude Include="ShaderCache.h" />
<ClInclude Include="ShaderConstantsManager.h" />
<ClInclude Include="StaticShaderCache.h" />
<ClInclude Include="TextureCache.h" />
<ClInclude Include="VertexManager.h" />
<ClInclude Include="VideoBackend.h" />
<ClInclude Include="XFBEncoder.h" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="$(CoreDir)VideoCommon\VideoCommon.vcxproj">
<Project>{3de9ee35-3e91-4f27-a014-2866ad8c3fe3}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -1,143 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Render">
<UniqueIdentifier>{3683d29b-19f6-4e7a-803f-4ac70b1d49fd}</UniqueIdentifier>
</Filter>
<Filter Include="D3D12">
<UniqueIdentifier>{ae700f7e-33c8-45b5-b7ee-a0ded3630549}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="D3DBase.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DShader.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DTexture.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DUtil.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DState.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="FramebufferManager.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="NativeVertexFormat.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="PerfQuery.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="PSTextureEncoder.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Render.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="TextureCache.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="VertexManager.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="XFBEncoder.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="main.cpp" />
<ClCompile Include="BoundingBox.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="D3DCommandListManager.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DDescriptorHeapManager.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="D3DQueuedCommandList.cpp">
<Filter>D3D12</Filter>
</ClCompile>
<ClCompile Include="StaticShaderCache.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="ShaderCache.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="ShaderConstantsManager.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="D3DStreamBuffer.cpp">
<Filter>D3D12</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="D3DBase.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="D3DShader.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="D3DTexture.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="D3DUtil.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="D3DState.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="FramebufferManager.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="PSTextureEncoder.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Render.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="TextureCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="VertexManager.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="XFBEncoder.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="VideoBackend.h" />
<ClInclude Include="BoundingBox.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="D3DCommandListManager.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="D3DDescriptorHeapManager.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="NativeVertexFormat.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="D3DQueuedCommandList.h">
<Filter>D3D12</Filter>
</ClInclude>
<ClInclude Include="StaticShaderCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="ShaderCache.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="ShaderConstantsManager.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="D3DStreamBuffer.h">
<Filter>D3D12</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -1,900 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <memory>
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/VideoConfig.h"
static const unsigned int SWAP_CHAIN_BUFFER_COUNT = 4;
namespace DX12
{
// d3dcompiler_*.dll exports
static HINSTANCE s_d3d_compiler_dll = nullptr;
static int s_d3d_compiler_dll_ref = 0;
D3DREFLECT d3d_reflect = nullptr;
D3DCREATEBLOB d3d_create_blob = nullptr;
pD3DCompile d3d_compile = nullptr;
// dxgi.dll exports
static HINSTANCE s_dxgi_dll = nullptr;
static int s_dxgi_dll_ref = 0;
CREATEDXGIFACTORY create_dxgi_factory = nullptr;
// d3d12.dll exports
static HINSTANCE s_d3d12_dll = nullptr;
static int s_d3d12_dll_ref = 0;
D3D12CREATEDEVICE d3d12_create_device = nullptr;
D3D12SERIALIZEROOTSIGNATURE d3d12_serialize_root_signature = nullptr;
D3D12GETDEBUGINTERFACE d3d12_get_debug_interface = nullptr;
namespace D3D
{
// Begin extern'd variables.
ID3D12Device* device12 = nullptr;
ID3D12CommandQueue* command_queue = nullptr;
std::unique_ptr<D3DCommandListManager> command_list_mgr;
ID3D12GraphicsCommandList* current_command_list = nullptr;
ID3D12RootSignature* default_root_signature = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu = {};
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu_shadow = {};
unsigned int resource_descriptor_size = 0;
unsigned int sampler_descriptor_size = 0;
std::unique_ptr<D3DDescriptorHeapManager> gpu_descriptor_heap_mgr;
std::unique_ptr<D3DDescriptorHeapManager> sampler_descriptor_heap_mgr;
std::unique_ptr<D3DDescriptorHeapManager> dsv_descriptor_heap_mgr;
std::unique_ptr<D3DDescriptorHeapManager> rtv_descriptor_heap_mgr;
std::array<ID3D12DescriptorHeap*, 2> gpu_descriptor_heaps;
HWND hWnd;
// End extern'd variables.
static IDXGISwapChain* s_swap_chain = nullptr;
static unsigned int s_monitor_refresh_rate = 0;
static LARGE_INTEGER s_qpc_frequency;
static ID3D12DebugDevice* s_debug_device12 = nullptr;
static D3DTexture2D* s_backbuf[SWAP_CHAIN_BUFFER_COUNT];
static unsigned int s_current_back_buf = 0;
static unsigned int s_xres = 0;
static unsigned int s_yres = 0;
static bool s_frame_in_progress = false;
HRESULT LoadDXGI()
{
if (s_dxgi_dll_ref++ > 0)
return S_OK;
if (s_dxgi_dll)
return S_OK;
s_dxgi_dll = LoadLibraryA("dxgi.dll");
if (!s_dxgi_dll)
{
MessageBoxA(nullptr, "Failed to load dxgi.dll", "Critical error", MB_OK | MB_ICONERROR);
--s_dxgi_dll_ref;
return E_FAIL;
}
create_dxgi_factory = (CREATEDXGIFACTORY)GetProcAddress(s_dxgi_dll, "CreateDXGIFactory");
if (create_dxgi_factory == nullptr)
MessageBoxA(nullptr, "GetProcAddress failed for CreateDXGIFactory!", "Critical error",
MB_OK | MB_ICONERROR);
return S_OK;
}
HRESULT LoadD3D()
{
if (s_d3d12_dll_ref++ > 0)
return S_OK;
s_d3d12_dll = LoadLibraryA("d3d12.dll");
if (!s_d3d12_dll)
{
MessageBoxA(nullptr, "Failed to load d3d12.dll", "Critical error", MB_OK | MB_ICONERROR);
--s_d3d12_dll_ref;
return E_FAIL;
}
d3d12_create_device = (D3D12CREATEDEVICE)GetProcAddress(s_d3d12_dll, "D3D12CreateDevice");
if (d3d12_create_device == nullptr)
{
MessageBoxA(nullptr, "GetProcAddress failed for D3D12CreateDevice!", "Critical error",
MB_OK | MB_ICONERROR);
return E_FAIL;
}
d3d12_serialize_root_signature =
(D3D12SERIALIZEROOTSIGNATURE)GetProcAddress(s_d3d12_dll, "D3D12SerializeRootSignature");
if (d3d12_serialize_root_signature == nullptr)
{
MessageBoxA(nullptr, "GetProcAddress failed for D3D12SerializeRootSignature!", "Critical error",
MB_OK | MB_ICONERROR);
return E_FAIL;
}
d3d12_get_debug_interface =
(D3D12GETDEBUGINTERFACE)GetProcAddress(s_d3d12_dll, "D3D12GetDebugInterface");
if (d3d12_get_debug_interface == nullptr)
{
MessageBoxA(nullptr, "GetProcAddress failed for D3D12GetDebugInterface!", "Critical error",
MB_OK | MB_ICONERROR);
return E_FAIL;
}
return S_OK;
}
HRESULT LoadD3DCompiler()
{
if (s_d3d_compiler_dll_ref++ > 0)
return S_OK;
if (s_d3d_compiler_dll)
return S_OK;
// try to load D3DCompiler first to check whether we have proper runtime support
// try to use the dll the backend was compiled against first - don't bother about debug runtimes
s_d3d_compiler_dll = LoadLibraryA(D3DCOMPILER_DLL_A);
if (!s_d3d_compiler_dll)
{
// if that fails, use the dll which should be available in every SDK which officially supports
// DX12.
s_d3d_compiler_dll = LoadLibraryA("D3DCompiler_42.dll");
if (!s_d3d_compiler_dll)
{
MessageBoxA(nullptr, "Failed to load D3DCompiler_42.dll, update your DX12 runtime, please",
"Critical error", MB_OK | MB_ICONERROR);
return E_FAIL;
}
else
{
NOTICE_LOG(VIDEO, "Successfully loaded D3DCompiler_42.dll. If you're having trouble, try "
"updating your DX runtime first.");
}
}
d3d_reflect = (D3DREFLECT)GetProcAddress(s_d3d_compiler_dll, "D3DReflect");
if (d3d_reflect == nullptr)
MessageBoxA(nullptr, "GetProcAddress failed for D3DReflect!", "Critical error",
MB_OK | MB_ICONERROR);
d3d_create_blob = (D3DCREATEBLOB)GetProcAddress(s_d3d_compiler_dll, "D3DCreateBlob");
if (d3d_create_blob == nullptr)
MessageBoxA(nullptr, "GetProcAddress failed for D3DCreateBlob!", "Critical error",
MB_OK | MB_ICONERROR);
d3d_compile = (pD3DCompile)GetProcAddress(s_d3d_compiler_dll, "D3DCompile");
if (d3d_compile == nullptr)
MessageBoxA(nullptr, "GetProcAddress failed for D3DCompile!", "Critical error",
MB_OK | MB_ICONERROR);
return S_OK;
}
void UnloadDXGI()
{
if (!s_dxgi_dll_ref)
return;
if (--s_dxgi_dll_ref != 0)
return;
if (s_dxgi_dll)
FreeLibrary(s_dxgi_dll);
s_dxgi_dll = nullptr;
create_dxgi_factory = nullptr;
}
void UnloadD3D()
{
if (!s_d3d12_dll_ref)
return;
if (--s_d3d12_dll_ref != 0)
return;
if (s_d3d12_dll)
FreeLibrary(s_d3d12_dll);
s_d3d12_dll = nullptr;
d3d12_create_device = nullptr;
d3d12_serialize_root_signature = nullptr;
}
void UnloadD3DCompiler()
{
if (!s_d3d_compiler_dll_ref)
return;
if (--s_d3d_compiler_dll_ref != 0)
return;
if (s_d3d_compiler_dll)
FreeLibrary(s_d3d_compiler_dll);
s_d3d_compiler_dll = nullptr;
d3d_compile = nullptr;
d3d_create_blob = nullptr;
d3d_reflect = nullptr;
}
std::vector<DXGI_SAMPLE_DESC> EnumAAModes(ID3D12Device* device)
{
std::vector<DXGI_SAMPLE_DESC> aa_modes;
for (int samples = 0; samples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; ++samples)
{
D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS multisample_quality_levels = {};
multisample_quality_levels.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
multisample_quality_levels.SampleCount = samples;
device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
&multisample_quality_levels, sizeof(multisample_quality_levels));
DXGI_SAMPLE_DESC desc;
desc.Count = samples;
desc.Quality = 0;
if (multisample_quality_levels.NumQualityLevels > 0)
aa_modes.push_back(desc);
}
return aa_modes;
}
static bool SupportsS3TCTextures(ID3D12Device* device)
{
auto CheckForFormat = [](ID3D12Device* device, DXGI_FORMAT format) {
D3D12_FEATURE_DATA_FORMAT_SUPPORT data = {format};
if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &data, sizeof(data))))
return false;
return (data.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D) != 0;
};
return CheckForFormat(device, DXGI_FORMAT_BC1_UNORM) &&
CheckForFormat(device, DXGI_FORMAT_BC2_UNORM) &&
CheckForFormat(device, DXGI_FORMAT_BC3_UNORM);
}
HRESULT Create(HWND wnd)
{
hWnd = wnd;
HRESULT hr;
RECT client;
GetClientRect(hWnd, &client);
s_xres = client.right - client.left;
s_yres = client.bottom - client.top;
hr = LoadDXGI();
if (FAILED(hr))
return hr;
hr = LoadD3D();
if (FAILED(hr))
{
UnloadDXGI();
return hr;
}
hr = LoadD3DCompiler();
if (FAILED(hr))
{
UnloadD3D();
UnloadDXGI();
return hr;
}
IDXGIFactory* factory;
IDXGIAdapter* adapter;
hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory);
if (FAILED(hr))
{
MessageBox(wnd, _T("Failed to create IDXGIFactory object"), _T("Dolphin Direct3D 12 backend"),
MB_OK | MB_ICONERROR);
UnloadD3DCompiler();
UnloadD3D();
UnloadDXGI();
return hr;
}
hr = factory->EnumAdapters(g_ActiveConfig.iAdapter, &adapter);
if (FAILED(hr))
{
// try using the first one
hr = factory->EnumAdapters(0, &adapter);
if (FAILED(hr))
{
MessageBox(wnd, _T("Failed to enumerate adapters"), _T("Dolphin Direct3D 12 backend"),
MB_OK | MB_ICONERROR);
UnloadD3DCompiler();
UnloadD3D();
UnloadDXGI();
return hr;
}
}
DXGI_SWAP_CHAIN_DESC swap_chain_desc = {};
swap_chain_desc.BufferCount = SWAP_CHAIN_BUFFER_COUNT;
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swap_chain_desc.OutputWindow = wnd;
swap_chain_desc.SampleDesc.Count = 1;
swap_chain_desc.SampleDesc.Quality = 0;
swap_chain_desc.Windowed = true;
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
swap_chain_desc.Flags = 0;
swap_chain_desc.BufferDesc.Width = s_xres;
swap_chain_desc.BufferDesc.Height = s_yres;
swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(USE_D3D12_DEBUG_LAYER)
// Enabling the debug layer will fail if the Graphics Tools feature is not installed.
ID3D12Debug* debug_controller;
hr = d3d12_get_debug_interface(IID_PPV_ARGS(&debug_controller));
if (SUCCEEDED(hr))
{
debug_controller->EnableDebugLayer();
debug_controller->Release();
}
else
{
MessageBox(wnd, _T("WARNING: Failed to enable D3D12 debug layer, please ensure the Graphics ")
_T("Tools feature is installed."),
_T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
}
#endif
hr = d3d12_create_device(adapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&device12));
if (FAILED(hr))
{
MessageBox(wnd, _T("Failed to initialize Direct3D.\nMake sure your video card supports ")
_T("Direct3D 12 and your drivers are up-to-date."),
_T("Dolphin Direct3D 12 backend"), MB_OK | MB_ICONERROR);
adapter->Release();
UnloadD3DCompiler();
UnloadD3D();
UnloadDXGI();
return hr;
}
// Ensure that the chosen AA mode is supported by the device.
std::vector<DXGI_SAMPLE_DESC> aa_modes = EnumAAModes(device12);
if (std::find_if(aa_modes.begin(), aa_modes.end(), [](const DXGI_SAMPLE_DESC& desc) {
return desc.Count == g_Config.iMultisamples;
}) == aa_modes.end())
{
g_Config.iMultisamples = 1;
UpdateActiveConfig();
}
D3D12_COMMAND_QUEUE_DESC command_queue_desc = {
D3D12_COMMAND_LIST_TYPE_DIRECT, // D3D12_COMMAND_LIST_TYPE Type;
0, // INT Priority;
D3D12_COMMAND_QUEUE_FLAG_NONE, // D3D12_COMMAND_QUEUE_FLAG Flags;
0 // UINT NodeMask;
};
CheckHR(device12->CreateCommandQueue(&command_queue_desc, IID_PPV_ARGS(&command_queue)));
CheckHR(factory->CreateSwapChain(command_queue, &swap_chain_desc, &s_swap_chain));
s_current_back_buf = 0;
// Query the monitor refresh rate, to ensure proper Present throttling behavior.
DEVMODE dev_mode;
memset(&dev_mode, 0, sizeof(DEVMODE));
dev_mode.dmSize = sizeof(DEVMODE);
dev_mode.dmDriverExtra = 0;
if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode) == 0)
{
// If EnumDisplaySettings fails, assume monitor refresh rate of 60 Hz.
s_monitor_refresh_rate = 60;
}
else
{
s_monitor_refresh_rate = dev_mode.dmDisplayFrequency;
}
ID3D12InfoQueue* info_queue = nullptr;
if (SUCCEEDED(device12->QueryInterface(&info_queue)))
{
CheckHR(info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE));
CheckHR(info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE));
D3D12_INFO_QUEUE_FILTER filter = {};
D3D12_MESSAGE_ID id_list[] = {
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_DEPTHSTENCILVIEW_NOT_SET, // Benign.
D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, // Benign.
D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH // Benign.
};
filter.DenyList.NumIDs = ARRAYSIZE(id_list);
filter.DenyList.pIDList = id_list;
info_queue->PushStorageFilter(&filter);
info_queue->Release();
// Used at Close time to report live objects.
CheckHR(device12->QueryInterface(&s_debug_device12));
}
// prevent DXGI from responding to Alt+Enter, unfortunately DXGI_MWA_NO_ALT_ENTER
// does not work so we disable all monitoring of window messages. However this
// may make it more difficult for DXGI to handle display mode changes.
hr = factory->MakeWindowAssociation(wnd, DXGI_MWA_NO_WINDOW_CHANGES);
if (FAILED(hr))
MessageBox(wnd, _T("Failed to associate the window"), _T("Dolphin Direct3D 12 backend"),
MB_OK | MB_ICONERROR);
CreateDescriptorHeaps();
CreateRootSignatures();
command_list_mgr = std::make_unique<D3DCommandListManager>(D3D12_COMMAND_LIST_TYPE_DIRECT,
device12, command_queue);
command_list_mgr->GetCommandList(&current_command_list);
command_list_mgr->SetInitialCommandListState();
for (UINT i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++)
{
ID3D12Resource* buf12 = nullptr;
hr = s_swap_chain->GetBuffer(i, IID_PPV_ARGS(&buf12));
CHECK(SUCCEEDED(hr), "Retrieve back buffer texture");
s_backbuf[i] =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false,
D3D12_RESOURCE_STATE_PRESENT // Swap Chain back buffers start out in
// D3D12_RESOURCE_STATE_PRESENT.
);
SAFE_RELEASE(buf12);
SetDebugObjectName12(s_backbuf[i]->GetTex12(), "backbuffer texture");
}
s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
current_command_list->OMSetRenderTargets(1, &s_backbuf[s_current_back_buf]->GetRTV12(), FALSE,
nullptr);
QueryPerformanceFrequency(&s_qpc_frequency);
// Render the device name.
DXGI_ADAPTER_DESC adapter_desc;
CheckHR(adapter->GetDesc(&adapter_desc));
OSD::AddMessage(
StringFromFormat("Using D3D Adapter: %s.", UTF16ToUTF8(adapter_desc.Description).c_str()));
SAFE_RELEASE(factory);
SAFE_RELEASE(adapter);
g_Config.backend_info.bSupportsST3CTextures = SupportsS3TCTextures(device12);
return S_OK;
}
void CreateDescriptorHeaps()
{
// Create D3D12 GPU and CPU descriptor heaps.
{
D3D12_DESCRIPTOR_HEAP_DESC gpu_descriptor_heap_desc = {};
gpu_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
gpu_descriptor_heap_desc.NumDescriptors = 500000;
gpu_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
gpu_descriptor_heap_mgr =
std::make_unique<D3DDescriptorHeapManager>(&gpu_descriptor_heap_desc, device12, 50000);
gpu_descriptor_heaps[0] = gpu_descriptor_heap_mgr->GetDescriptorHeap();
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_heap_cpu_base =
gpu_descriptor_heap_mgr->GetDescriptorHeap()->GetCPUDescriptorHandleForHeapStart();
resource_descriptor_size =
device12->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
sampler_descriptor_size =
device12->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
D3D12_GPU_DESCRIPTOR_HANDLE null_srv_gpu = {};
gpu_descriptor_heap_mgr->Allocate(&null_srv_cpu, &null_srv_gpu, &null_srv_cpu_shadow);
D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {};
null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
null_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
device12->CreateShaderResourceView(NULL, &null_srv_desc, null_srv_cpu);
for (UINT i = 0; i < 500000; i++)
{
// D3D12TODO: Make paving of descriptor heap optional.
D3D12_CPU_DESCRIPTOR_HANDLE destination_descriptor = {};
destination_descriptor.ptr = descriptor_heap_cpu_base.ptr + i * resource_descriptor_size;
device12->CreateShaderResourceView(NULL, &null_srv_desc, destination_descriptor);
}
}
{
D3D12_DESCRIPTOR_HEAP_DESC sampler_descriptor_heap_desc = {};
sampler_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
sampler_descriptor_heap_desc.NumDescriptors = 2000;
sampler_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
sampler_descriptor_heap_mgr =
std::make_unique<D3DDescriptorHeapManager>(&sampler_descriptor_heap_desc, device12);
gpu_descriptor_heaps[1] = sampler_descriptor_heap_mgr->GetDescriptorHeap();
}
{
D3D12_DESCRIPTOR_HEAP_DESC dsv_descriptor_heap_desc = {};
dsv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
dsv_descriptor_heap_desc.NumDescriptors = 2000;
dsv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
dsv_descriptor_heap_mgr =
std::make_unique<D3DDescriptorHeapManager>(&dsv_descriptor_heap_desc, device12);
}
{
// D3D12TODO: Temporary workaround.. really need to properly suballocate out of render target
// heap.
D3D12_DESCRIPTOR_HEAP_DESC rtv_descriptor_heap_desc = {};
rtv_descriptor_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
rtv_descriptor_heap_desc.NumDescriptors = 1000000;
rtv_descriptor_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
rtv_descriptor_heap_mgr =
std::make_unique<D3DDescriptorHeapManager>(&rtv_descriptor_heap_desc, device12);
}
}
void CreateRootSignatures()
{
D3D12_DESCRIPTOR_RANGE desc_range_srv = {
D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType;
8, // UINT NumDescriptors;
0, // UINT BaseShaderRegister;
0, // UINT RegisterSpace;
D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart;
};
D3D12_DESCRIPTOR_RANGE desc_range_sampler = {
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType;
8, // UINT NumDescriptors;
0, // UINT BaseShaderRegister;
0, // UINT RegisterSpace;
D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart;
};
D3D12_DESCRIPTOR_RANGE desc_range_uav = {
D3D12_DESCRIPTOR_RANGE_TYPE_UAV, // D3D12_DESCRIPTOR_RANGE_TYPE RangeType;
1, // UINT NumDescriptors;
2, // UINT BaseShaderRegister;
0, // UINT RegisterSpace;
D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND // UINT OffsetInDescriptorsFromTableStart;
};
D3D12_ROOT_PARAMETER root_parameters[NUM_GRAPHICS_ROOT_PARAMETERS];
root_parameters[DESCRIPTOR_TABLE_PS_SRV].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[DESCRIPTOR_TABLE_PS_SRV].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[DESCRIPTOR_TABLE_PS_SRV].DescriptorTable.pDescriptorRanges = &desc_range_srv;
root_parameters[DESCRIPTOR_TABLE_PS_SRV].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].DescriptorTable.pDescriptorRanges =
&desc_range_sampler;
root_parameters[DESCRIPTOR_TABLE_PS_SAMPLER].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
root_parameters[DESCRIPTOR_TABLE_GS_CBV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[DESCRIPTOR_TABLE_GS_CBV].Descriptor.RegisterSpace = 0;
root_parameters[DESCRIPTOR_TABLE_GS_CBV].Descriptor.ShaderRegister = 0;
root_parameters[DESCRIPTOR_TABLE_GS_CBV].ShaderVisibility = D3D12_SHADER_VISIBILITY_GEOMETRY;
root_parameters[DESCRIPTOR_TABLE_VS_CBV].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[DESCRIPTOR_TABLE_VS_CBV].Descriptor.RegisterSpace = 0;
root_parameters[DESCRIPTOR_TABLE_VS_CBV].Descriptor.ShaderRegister = 0;
root_parameters[DESCRIPTOR_TABLE_VS_CBV].ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].Descriptor.RegisterSpace = 0;
root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].Descriptor.ShaderRegister = 0;
root_parameters[DESCRIPTOR_TABLE_PS_CBVONE].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].Descriptor.RegisterSpace = 0;
root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].Descriptor.ShaderRegister = 1;
root_parameters[DESCRIPTOR_TABLE_PS_CBVTWO].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
root_parameters[DESCRIPTOR_TABLE_PS_UAV].ParameterType =
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
root_parameters[DESCRIPTOR_TABLE_PS_UAV].DescriptorTable.NumDescriptorRanges = 1;
root_parameters[DESCRIPTOR_TABLE_PS_UAV].DescriptorTable.pDescriptorRanges = &desc_range_uav;
root_parameters[DESCRIPTOR_TABLE_PS_UAV].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc = {};
root_signature_desc.pParameters = root_parameters;
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS;
root_signature_desc.NumParameters = ARRAYSIZE(root_parameters);
ID3DBlob* text_root_signature_blob;
ID3DBlob* text_root_signature_error_blob;
CheckHR(d3d12_serialize_root_signature(&root_signature_desc, D3D_ROOT_SIGNATURE_VERSION_1,
&text_root_signature_blob,
&text_root_signature_error_blob));
CheckHR(D3D::device12->CreateRootSignature(0, text_root_signature_blob->GetBufferPointer(),
text_root_signature_blob->GetBufferSize(),
IID_PPV_ARGS(&default_root_signature)));
}
void WaitForOutstandingRenderingToComplete()
{
command_list_mgr->ExecuteQueuedWork(true);
}
void Close()
{
// we can't release the swapchain while in fullscreen.
s_swap_chain->SetFullscreenState(false, nullptr);
// Release all back buffer references
for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++)
{
SAFE_RELEASE(s_backbuf[i]);
}
D3D::CleanupPersistentD3DTextureResources();
SAFE_RELEASE(s_swap_chain);
command_list_mgr.reset();
command_queue->Release();
default_root_signature->Release();
gpu_descriptor_heap_mgr.reset();
sampler_descriptor_heap_mgr.reset();
rtv_descriptor_heap_mgr.reset();
dsv_descriptor_heap_mgr.reset();
ULONG remaining_references = device12->Release();
if ((!s_debug_device12 && remaining_references) || (s_debug_device12 && remaining_references > 1))
{
ERROR_LOG(VIDEO, "Unreleased D3D12 references: %i.", remaining_references);
}
else
{
NOTICE_LOG(VIDEO, "Successfully released all D3D12 device references!");
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (s_debug_device12)
{
--remaining_references; // the debug interface increases the refcount of the device, subtract
// that.
if (remaining_references)
{
// print out alive objects, but only if we actually have pending references
// note this will also print out internal live objects to the debug console
s_debug_device12->ReportLiveDeviceObjects(D3D12_RLDO_DETAIL);
}
SAFE_RELEASE(s_debug_device12);
}
#endif
device12 = nullptr;
current_command_list = nullptr;
// unload DLLs
UnloadD3DCompiler();
UnloadD3D();
UnloadDXGI();
}
const std::string VertexShaderVersionString()
{
return "vs_5_0";
}
const std::string GeometryShaderVersionString()
{
return "gs_5_0";
}
const std::string PixelShaderVersionString()
{
return "ps_5_0";
}
D3DTexture2D*& GetBackBuffer()
{
return s_backbuf[s_current_back_buf];
}
unsigned int GetBackBufferWidth()
{
return s_xres;
}
unsigned int GetBackBufferHeight()
{
return s_yres;
}
// Returns the maximum width/height of a texture.
unsigned int GetMaxTextureSize()
{
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
}
void Reset()
{
// release all back buffer references
for (UINT i = 0; i < ARRAYSIZE(s_backbuf); i++)
{
SAFE_RELEASE(s_backbuf[i]);
}
// Block until all commands have finished.
// This will also final-release all pending resources (including the backbuffer above)
command_list_mgr->ExecuteQueuedWork(true);
// resize swapchain buffers
RECT client;
GetClientRect(hWnd, &client);
s_xres = client.right - client.left;
s_yres = client.bottom - client.top;
CheckHR(s_swap_chain->ResizeBuffers(SWAP_CHAIN_BUFFER_COUNT, s_xres, s_yres,
DXGI_FORMAT_R8G8B8A8_UNORM, 0));
// recreate back buffer textures
HRESULT hr = S_OK;
for (UINT i = 0; i < SWAP_CHAIN_BUFFER_COUNT; i++)
{
ID3D12Resource* buf12 = nullptr;
hr = s_swap_chain->GetBuffer(i, IID_PPV_ARGS(&buf12));
CHECK(SUCCEEDED(hr), "Retrieve back buffer texture");
s_backbuf[i] = new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false,
D3D12_RESOURCE_STATE_PRESENT);
SAFE_RELEASE(buf12);
SetDebugObjectName12(s_backbuf[i]->GetTex12(), "backbuffer texture");
}
// The 'about-to-be-presented' back buffer index is always set back to '0' upon ResizeBuffers,
// just like
// creating a new swap chain.
s_current_back_buf = 0;
s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
}
bool BeginFrame()
{
if (s_frame_in_progress)
{
PanicAlert("BeginFrame called although a frame is already in progress");
return false;
}
s_frame_in_progress = true;
return (device12 != nullptr);
}
void EndFrame()
{
if (!s_frame_in_progress)
{
PanicAlert("EndFrame called although no frame is in progress");
return;
}
s_frame_in_progress = false;
}
void Present()
{
// The Present function contains logic to ensure we never Present faster than Windows can
// send to the monitor. If we Present too fast, the Present call will start to block, and we'll be
// throttled - obviously not desired if vsync is disabled and the emulated CPU speed is > 100%.
// The throttling logic ensures that we don't Present more than twice in a given monitor vsync.
// This is accomplished through timing data - there is a programmatic way to determine if a
// Present call will block, however after investigation that is not feasible here (without
// invasive
// workarounds), due to the fact this method does not actually call Present - we just queue a
// Present
// command for the background thread to dispatch.
// The monitor refresh rate is determined in Create().
static LARGE_INTEGER s_last_present_qpc;
LARGE_INTEGER current_qpc;
QueryPerformanceCounter(&current_qpc);
const double time_elapsed_since_last_present =
static_cast<double>(current_qpc.QuadPart - s_last_present_qpc.QuadPart) /
s_qpc_frequency.QuadPart;
unsigned int present_flags = 0;
if (g_ActiveConfig.IsVSync() == false &&
time_elapsed_since_last_present < (1.0 / static_cast<double>(s_monitor_refresh_rate)) / 2.0)
{
present_flags = DXGI_PRESENT_TEST; // Causes Present to be a no-op.
}
else
{
s_last_present_qpc = current_qpc;
s_backbuf[s_current_back_buf]->TransitionToResourceState(current_command_list,
D3D12_RESOURCE_STATE_PRESENT);
s_current_back_buf = (s_current_back_buf + 1) % SWAP_CHAIN_BUFFER_COUNT;
}
command_list_mgr->ExecuteQueuedWorkAndPresent(s_swap_chain, g_ActiveConfig.IsVSync() ? 1 : 0,
present_flags);
command_list_mgr->m_cpu_access_last_frame = command_list_mgr->m_cpu_access_this_frame;
command_list_mgr->m_cpu_access_this_frame = false;
command_list_mgr->m_draws_since_last_execution = 0;
}
HRESULT SetFullscreenState(bool enable_fullscreen)
{
return S_OK;
}
bool GetFullscreenState()
{
// Fullscreen exclusive intentionally not supported in DX12 backend. No performance
// difference between it and windowed full-screen due to usage of a FLIP swap chain.
return false;
}
} // namespace D3D
} // namespace DX12

View File

@ -1,176 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#define USE_D3D12_QUEUED_COMMAND_LISTS
// D3D12TODO: Support this from Graphics Settings, not require a recompile to enable.
//#define USE_D3D12_DEBUG_LAYER
#pragma once
#include <d3d12.h>
#include <d3dcompiler.h>
#include <dxgi1_4.h>
#include <memory>
#include <vector>
#include "../../Externals/d3dx12/d3dx12.h"
#include "Common/Common.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
namespace DX12
{
#define SAFE_RELEASE(x) \
{ \
if (x) \
(x)->Release(); \
(x) = nullptr; \
}
#define CHECK(cond, Message, ...) \
if (!(cond)) \
{ \
__debugbreak(); \
PanicAlert(__FUNCTION__ " failed in %s at line %d: " Message, __FILE__, __LINE__, \
__VA_ARGS__); \
}
// DEBUGCHECK is for high-frequency functions that we only want to check on debug builds.
#if defined(_DEBUG) || defined(DEBUGFAST)
#define DEBUGCHECK(cond, Message, ...) \
if (!(cond)) \
{ \
PanicAlert(__FUNCTION__ " failed in %s at line %d: " Message, __FILE__, __LINE__, \
__VA_ARGS__); \
}
#else
#define DEBUGCHECK(cond, Message, ...)
#endif
inline void CheckHR(HRESULT hr)
{
CHECK(SUCCEEDED(hr), "Failed HRESULT.");
}
class D3DCommandListManager;
class D3DDescriptorHeapManager;
class D3DTexture2D;
enum GRAPHICS_ROOT_PARAMETER : u32
{
DESCRIPTOR_TABLE_PS_SRV,
DESCRIPTOR_TABLE_PS_SAMPLER,
DESCRIPTOR_TABLE_GS_CBV,
DESCRIPTOR_TABLE_VS_CBV,
DESCRIPTOR_TABLE_PS_CBVONE,
DESCRIPTOR_TABLE_PS_CBVTWO,
DESCRIPTOR_TABLE_PS_UAV,
NUM_GRAPHICS_ROOT_PARAMETERS
};
namespace D3D
{
HRESULT LoadDXGI();
HRESULT LoadD3D();
HRESULT LoadD3DCompiler();
void UnloadDXGI();
void UnloadD3D();
void UnloadD3DCompiler();
std::vector<DXGI_SAMPLE_DESC> EnumAAModes(ID3D12Device* device);
HRESULT Create(HWND wnd);
void CreateDescriptorHeaps();
void CreateRootSignatures();
void WaitForOutstandingRenderingToComplete();
void Close();
extern ID3D12Device* device12;
extern unsigned int resource_descriptor_size;
extern unsigned int sampler_descriptor_size;
extern std::unique_ptr<D3DDescriptorHeapManager> gpu_descriptor_heap_mgr;
extern std::unique_ptr<D3DDescriptorHeapManager> sampler_descriptor_heap_mgr;
extern std::unique_ptr<D3DDescriptorHeapManager> dsv_descriptor_heap_mgr;
extern std::unique_ptr<D3DDescriptorHeapManager> rtv_descriptor_heap_mgr;
extern std::array<ID3D12DescriptorHeap*, 2> gpu_descriptor_heaps;
extern D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu;
extern D3D12_CPU_DESCRIPTOR_HANDLE null_srv_cpu_shadow;
extern std::unique_ptr<D3DCommandListManager> command_list_mgr;
extern ID3D12GraphicsCommandList* current_command_list;
extern ID3D12RootSignature* default_root_signature;
extern HWND hWnd;
void Reset();
bool BeginFrame();
void EndFrame();
void Present();
unsigned int GetBackBufferWidth();
unsigned int GetBackBufferHeight();
D3DTexture2D*& GetBackBuffer();
const std::string PixelShaderVersionString();
const std::string GeometryShaderVersionString();
const std::string VertexShaderVersionString();
HRESULT SetFullscreenState(bool enable_fullscreen);
bool GetFullscreenState();
// This function will assign a name to the given resource.
// The DirectX debug layer will make it easier to identify resources that way,
// e.g. when listing up all resources who have unreleased references.
static void SetDebugObjectName12(ID3D12Resource* resource, LPCSTR name)
{
HRESULT hr =
resource->SetPrivateData(WKPDID_D3DDebugObjectName, (UINT)(name ? strlen(name) : 0), name);
if (FAILED(hr))
{
throw std::exception("Failure setting name for D3D12 object");
}
}
static std::string GetDebugObjectName12(ID3D12Resource* resource)
{
std::string name;
if (resource)
{
UINT size = 0;
resource->GetPrivateData(WKPDID_D3DDebugObjectName, &size, nullptr); // get required size
name.resize(size);
resource->GetPrivateData(WKPDID_D3DDebugObjectName, &size, const_cast<char*>(name.data()));
}
return name;
}
} // namespace D3D
using CREATEDXGIFACTORY = HRESULT(WINAPI*)(REFIID, void**);
extern CREATEDXGIFACTORY create_dxgi_factory;
using D3D12CREATEDEVICE = HRESULT(WINAPI*)(IUnknown*, D3D_FEATURE_LEVEL, REFIID, void**);
extern D3D12CREATEDEVICE d3d12_create_device;
using D3D12SERIALIZEROOTSIGNATURE =
HRESULT(WINAPI*)(const D3D12_ROOT_SIGNATURE_DESC* pRootSignature,
D3D_ROOT_SIGNATURE_VERSION Version, ID3DBlob** ppBlob, ID3DBlob** ppErrorBlob);
using D3D12GETDEBUGINTERFACE = HRESULT(WINAPI*)(REFIID riid, void** ppvDebug);
using D3DREFLECT = HRESULT(WINAPI*)(LPCVOID, SIZE_T, REFIID, void**);
extern D3DREFLECT d3d_reflect;
using D3DCREATEBLOB = HRESULT(WINAPI*)(SIZE_T, ID3DBlob**);
extern D3DCREATEBLOB d3d_create_blob;
extern pD3DCompile d3d_compile;
} // namespace DX12

View File

@ -1,389 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include <queue>
#include <vector>
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DQueuedCommandList.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/ShaderConstantsManager.h"
#include "VideoBackends/D3D12/VertexManager.h"
static constexpr unsigned int COMMAND_ALLOCATORS_PER_LIST = 2;
namespace DX12
{
extern StateCache gx_state_cache;
D3DCommandListManager::D3DCommandListManager(D3D12_COMMAND_LIST_TYPE command_list_type,
ID3D12Device* device,
ID3D12CommandQueue* command_queue)
: m_device(device), m_command_queue(command_queue)
{
// Create two lists, with two command allocators each. This corresponds to up to two frames in
// flight at once.
m_current_command_allocator = 0;
m_current_command_allocator_list = 0;
for (UINT i = 0; i < COMMAND_ALLOCATORS_PER_LIST; i++)
{
for (UINT j = 0; j < m_command_allocator_lists.size(); j++)
{
ID3D12CommandAllocator* command_allocator = nullptr;
CheckHR(
m_device->CreateCommandAllocator(command_list_type, IID_PPV_ARGS(&command_allocator)));
m_command_allocator_lists[j].push_back(command_allocator);
}
}
// Create backing command list.
CheckHR(m_device->CreateCommandList(
0, command_list_type, m_command_allocator_lists[m_current_command_allocator_list][0], nullptr,
IID_PPV_ARGS(&m_backing_command_list)));
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list = new ID3D12QueuedCommandList(m_backing_command_list, m_command_queue);
#endif
// Create fence that will be used to measure GPU progress of app rendering requests (e.g. CPU
// readback of GPU data).
m_queue_fence_value = 0;
CheckHR(m_device->CreateFence(m_queue_fence_value, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&m_queue_fence)));
// Create fence that will be used internally by D3DCommandListManager for frame-level resource
// tracking.
m_queue_frame_fence_value = 0;
CheckHR(m_device->CreateFence(m_queue_frame_fence_value, D3D12_FENCE_FLAG_NONE,
IID_PPV_ARGS(&m_queue_frame_fence)));
// Create event that will be used for waiting on CPU until a fence is signaled by GPU.
m_wait_on_cpu_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
// Pre-size the deferred destruction lists.
for (UINT i = 0; i < m_deferred_destruction_lists.size(); i++)
{
m_deferred_destruction_lists[i].reserve(200);
}
m_current_deferred_destruction_list = 0;
std::fill(m_command_allocator_list_fences.begin(), m_command_allocator_list_fences.end(), 0);
std::fill(m_deferred_destruction_list_fences.begin(), m_deferred_destruction_list_fences.end(),
0);
}
void D3DCommandListManager::SetInitialCommandListState()
{
ID3D12GraphicsCommandList* command_list = nullptr;
GetCommandList(&command_list);
command_list->SetDescriptorHeaps(static_cast<unsigned int>(D3D::gpu_descriptor_heaps.size()),
D3D::gpu_descriptor_heaps.data());
command_list->SetGraphicsRootSignature(D3D::default_root_signature);
if (g_renderer)
{
// It is possible that we change command lists in the middle of the frame. In that case, restore
// the viewport/scissor to the current console GPU state.
g_renderer->RestoreAPIState();
}
m_command_list_dirty_state = UINT_MAX;
command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
if (g_vertex_manager)
reinterpret_cast<VertexManager*>(g_vertex_manager.get())->SetIndexBuffer();
}
void D3DCommandListManager::GetCommandList(ID3D12GraphicsCommandList** command_list) const
{
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
*command_list = this->m_queued_command_list;
#else
*command_list = this->m_backing_command_list;
#endif
}
void D3DCommandListManager::ExecuteQueuedWork(bool wait_for_gpu_completion)
{
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->Close();
m_queued_command_list->QueueExecute();
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
m_queued_command_list->ProcessQueuedItems(wait_for_gpu_completion, wait_for_gpu_completion);
#else
CheckHR(m_backing_command_list->Close());
ID3D12CommandList* const execute_list[1] = {m_backing_command_list};
m_command_queue->ExecuteCommandLists(1, execute_list);
CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value));
#endif
// Notify observers of the fence value for the current work to finish.
for (auto it : m_queue_fence_callbacks)
it.second(it.first, m_queue_fence_value);
if (wait_for_gpu_completion)
WaitForGPUCompletion();
// Re-open the command list, using the current allocator.
ResetCommandList();
SetInitialCommandListState();
}
void D3DCommandListManager::ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain,
UINT sync_interval, UINT flags)
{
m_queue_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->Close();
m_queued_command_list->QueueExecute();
m_queued_command_list->QueuePresent(swap_chain, sync_interval, flags);
m_queued_command_list->QueueFenceGpuSignal(m_queue_fence, m_queue_fence_value);
m_queued_command_list->ProcessQueuedItems(true);
#else
CheckHR(m_backing_command_list->Close());
ID3D12CommandList* const execute_list[1] = {m_backing_command_list};
m_command_queue->ExecuteCommandLists(1, execute_list);
CheckHR(swap_chain->Present(sync_interval, flags));
CheckHR(m_command_queue->Signal(m_queue_fence, m_queue_fence_value));
#endif
// Notify observers of the fence value for the current work to finish.
for (auto it : m_queue_fence_callbacks)
it.second(it.first, m_queue_fence_value);
// Move to the next command allocator, this may mean switching allocator lists.
MoveToNextCommandAllocator();
ResetCommandList();
SetInitialCommandListState();
}
void D3DCommandListManager::DestroyAllPendingResources()
{
for (auto& destruction_list : m_deferred_destruction_lists)
{
for (auto& resource : destruction_list)
resource->Release();
destruction_list.clear();
}
}
void D3DCommandListManager::ResetAllCommandAllocators()
{
for (auto& allocator_list : m_command_allocator_lists)
{
for (auto& allocator : allocator_list)
allocator->Reset();
}
// Move back to the start, using the first allocator of first list.
m_current_command_allocator = 0;
m_current_command_allocator_list = 0;
m_current_deferred_destruction_list = 0;
}
void D3DCommandListManager::WaitForGPUCompletion()
{
// Wait for GPU to finish all outstanding work.
// This method assumes that no command lists are open.
m_queue_frame_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value);
m_queued_command_list->ProcessQueuedItems(true);
#else
CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value));
#endif
WaitOnCPUForFence(m_queue_frame_fence, m_queue_frame_fence_value);
// GPU is up to date with us. Therefore, it has finished with any pending resources.
DestroyAllPendingResources();
// Command allocators are also up-to-date, so reset these.
ResetAllCommandAllocators();
}
void D3DCommandListManager::PerformGPURolloverChecks()
{
m_queue_frame_fence_value++;
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
m_queued_command_list->QueueFenceGpuSignal(m_queue_frame_fence, m_queue_frame_fence_value);
#else
CheckHR(m_command_queue->Signal(m_queue_frame_fence, m_queue_frame_fence_value));
#endif
// We now know that the previous 'set' of command lists has completed on GPU, and it is safe to
// release resources / start back at beginning of command allocator list.
// Begin Deferred Resource Destruction
UINT safe_to_delete_deferred_destruction_list =
(m_current_deferred_destruction_list - 1) % m_deferred_destruction_lists.size();
WaitOnCPUForFence(m_queue_frame_fence,
m_deferred_destruction_list_fences[safe_to_delete_deferred_destruction_list]);
for (UINT i = 0;
i < m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].size(); i++)
{
CHECK(m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list][i]->Release() == 0,
"Resource leak.");
}
m_deferred_destruction_lists[safe_to_delete_deferred_destruction_list].clear();
m_deferred_destruction_list_fences[m_current_deferred_destruction_list] =
m_queue_frame_fence_value;
m_current_deferred_destruction_list =
(m_current_deferred_destruction_list + 1) % m_deferred_destruction_lists.size();
// End Deferred Resource Destruction
// Begin Command Allocator Resets
UINT safe_to_reset_command_allocator_list =
(m_current_command_allocator_list - 1) % m_command_allocator_lists.size();
WaitOnCPUForFence(m_queue_frame_fence,
m_command_allocator_list_fences[safe_to_reset_command_allocator_list]);
for (UINT i = 0; i < m_command_allocator_lists[safe_to_reset_command_allocator_list].size(); i++)
{
CheckHR(m_command_allocator_lists[safe_to_reset_command_allocator_list][i]->Reset());
}
m_command_allocator_list_fences[m_current_command_allocator_list] = m_queue_frame_fence_value;
m_current_command_allocator_list =
(m_current_command_allocator_list + 1) % m_command_allocator_lists.size();
m_current_command_allocator = 0;
// End Command Allocator Resets
}
void D3DCommandListManager::MoveToNextCommandAllocator()
{
// Move to the next allocator in the current allocator list.
m_current_command_allocator = (m_current_command_allocator + 1) %
m_command_allocator_lists[m_current_command_allocator_list].size();
// Did we wrap around? Move to the next set of allocators.
if (m_current_command_allocator == 0)
PerformGPURolloverChecks();
}
void D3DCommandListManager::ResetCommandList()
{
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
ID3D12QueuedCommandList* command_list = m_queued_command_list;
#else
ID3D12GraphicsCommandList* command_list = m_backing_command_list;
#endif
CheckHR(command_list->Reset(m_command_allocator_lists[m_current_command_allocator_list]
[m_current_command_allocator],
nullptr));
}
void D3DCommandListManager::DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource)
{
CHECK(resource, "Null resource being inserted!");
m_deferred_destruction_lists[m_current_deferred_destruction_list].push_back(resource);
}
D3DCommandListManager::~D3DCommandListManager()
{
#ifdef USE_D3D12_QUEUED_COMMAND_LISTS
// Wait for background thread to exit.
m_queued_command_list->Release();
#endif
// The command list will still be open, close it before destroying.
m_backing_command_list->Close();
DestroyAllPendingResources();
m_backing_command_list->Release();
for (auto& allocator_list : m_command_allocator_lists)
{
for (auto& resource : allocator_list)
resource->Release();
}
m_queue_fence->Release();
m_queue_frame_fence->Release();
CloseHandle(m_wait_on_cpu_fence_event);
}
void D3DCommandListManager::WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value)
{
if (fence->GetCompletedValue() >= fence_value)
return;
CheckHR(fence->SetEventOnCompletion(fence_value, m_wait_on_cpu_fence_event));
WaitForSingleObject(m_wait_on_cpu_fence_event, INFINITE);
}
void D3DCommandListManager::SetCommandListDirtyState(unsigned int command_list_state, bool dirty)
{
if (dirty)
m_command_list_dirty_state |= command_list_state;
else
m_command_list_dirty_state &= ~command_list_state;
}
bool D3DCommandListManager::GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const
{
return ((m_command_list_dirty_state & command_list_state) != 0);
}
void D3DCommandListManager::SetCommandListPrimitiveTopology(
D3D_PRIMITIVE_TOPOLOGY primitive_topology)
{
m_command_list_current_topology = primitive_topology;
}
D3D_PRIMITIVE_TOPOLOGY D3DCommandListManager::GetCommandListPrimitiveTopology() const
{
return m_command_list_current_topology;
}
void D3DCommandListManager::CPUAccessNotify()
{
m_cpu_access_last_frame = true;
m_cpu_access_this_frame = true;
m_draws_since_last_execution = 0;
};
ID3D12Fence*
D3DCommandListManager::RegisterQueueFenceCallback(void* owning_object,
PFN_QUEUE_FENCE_CALLBACK* callback_function)
{
m_queue_fence_callbacks[owning_object] = callback_function;
return m_queue_fence;
}
void D3DCommandListManager::RemoveQueueFenceCallback(void* owning_object)
{
m_queue_fence_callbacks.erase(owning_object);
}
} // namespace DX12

View File

@ -1,98 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <map>
#include <vector>
#include "D3DQueuedCommandList.h"
namespace DX12
{
enum COMMAND_LIST_STATE
{
COMMAND_LIST_STATE_GS_CBV = 1,
COMMAND_LIST_STATE_PS_CBV = 2,
COMMAND_LIST_STATE_VS_CBV = 4,
COMMAND_LIST_STATE_PSO = 8,
COMMAND_LIST_STATE_SAMPLERS = 16,
COMMAND_LIST_STATE_VERTEX_BUFFER = 32
};
// This class provides an abstraction for D3D12 descriptor heaps.
class D3DCommandListManager
{
public:
D3DCommandListManager(D3D12_COMMAND_LIST_TYPE command_list_type, ID3D12Device* device,
ID3D12CommandQueue* command_queue);
~D3DCommandListManager();
void SetInitialCommandListState();
void GetCommandList(ID3D12GraphicsCommandList** command_list) const;
void ExecuteQueuedWork(bool wait_for_gpu_completion = false);
void ExecuteQueuedWorkAndPresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags);
void DestroyResourceAfterCurrentCommandListExecuted(ID3D12Resource* resource);
void SetCommandListDirtyState(unsigned int command_list_state, bool dirty);
bool GetCommandListDirtyState(COMMAND_LIST_STATE command_list_state) const;
void SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY primitive_topology);
D3D_PRIMITIVE_TOPOLOGY GetCommandListPrimitiveTopology() const;
unsigned int m_draws_since_last_execution = 0;
bool m_cpu_access_last_frame = false;
bool m_cpu_access_this_frame = false;
void CPUAccessNotify();
// Allow other components to register for a callback each time a fence is queued.
using PFN_QUEUE_FENCE_CALLBACK = void(void* owning_object, UINT64 fence_value);
ID3D12Fence* RegisterQueueFenceCallback(void* owning_object,
PFN_QUEUE_FENCE_CALLBACK* callback_function);
void RemoveQueueFenceCallback(void* owning_object);
void WaitOnCPUForFence(ID3D12Fence* fence, UINT64 fence_value);
private:
void DestroyAllPendingResources();
void ResetAllCommandAllocators();
void WaitForGPUCompletion();
void PerformGPURolloverChecks();
void MoveToNextCommandAllocator();
void ResetCommandList();
unsigned int m_command_list_dirty_state = UINT_MAX;
D3D_PRIMITIVE_TOPOLOGY m_command_list_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
HANDLE m_wait_on_cpu_fence_event;
ID3D12Device* m_device;
ID3D12CommandQueue* m_command_queue;
UINT64 m_queue_fence_value;
ID3D12Fence* m_queue_fence;
UINT64 m_queue_frame_fence_value;
ID3D12Fence* m_queue_frame_fence;
std::map<void*, PFN_QUEUE_FENCE_CALLBACK*> m_queue_fence_callbacks;
UINT m_current_command_allocator;
UINT m_current_command_allocator_list;
std::array<std::vector<ID3D12CommandAllocator*>, 2> m_command_allocator_lists;
std::array<UINT64, 2> m_command_allocator_list_fences;
ID3D12GraphicsCommandList* m_backing_command_list;
ID3D12QueuedCommandList* m_queued_command_list;
UINT m_current_deferred_destruction_list;
std::array<std::vector<ID3D12Resource*>, 2> m_deferred_destruction_lists;
std::array<UINT64, 2> m_deferred_destruction_list_fences;
};
} // namespace

View File

@ -1,187 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DState.h"
namespace DX12
{
bool operator==(const D3DDescriptorHeapManager::SamplerStateSet& lhs,
const D3DDescriptorHeapManager::SamplerStateSet& rhs)
{
// D3D12TODO: Do something more efficient than this.
return (!memcmp(&lhs, &rhs, sizeof(D3DDescriptorHeapManager::SamplerStateSet)));
}
D3DDescriptorHeapManager::D3DDescriptorHeapManager(D3D12_DESCRIPTOR_HEAP_DESC* desc,
ID3D12Device* device,
unsigned int temporarySlots)
: m_device(device)
{
CheckHR(device->CreateDescriptorHeap(desc, IID_PPV_ARGS(&m_descriptor_heap)));
m_descriptor_heap_size = desc->NumDescriptors;
m_descriptor_increment_size = device->GetDescriptorHandleIncrementSize(desc->Type);
m_gpu_visible = (desc->Flags == D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE);
if (m_gpu_visible)
{
D3D12_DESCRIPTOR_HEAP_DESC cpu_shadow_heap_desc = *desc;
cpu_shadow_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
CheckHR(device->CreateDescriptorHeap(&cpu_shadow_heap_desc,
IID_PPV_ARGS(&m_descriptor_heap_cpu_shadow)));
m_heap_base_gpu = m_descriptor_heap->GetGPUDescriptorHandleForHeapStart();
m_heap_base_gpu_cpu_shadow = m_descriptor_heap_cpu_shadow->GetCPUDescriptorHandleForHeapStart();
}
m_heap_base_cpu = m_descriptor_heap->GetCPUDescriptorHandleForHeapStart();
m_first_temporary_slot_in_heap = m_descriptor_heap_size - temporarySlots;
m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap;
}
bool D3DDescriptorHeapManager::Allocate(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handle,
D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadow,
bool temporary)
{
bool allocated_from_current_heap = true;
if (m_current_permanent_offset_in_heap + 1 >= m_first_temporary_slot_in_heap)
{
// If out of room in the heap, start back at beginning.
allocated_from_current_heap = false;
m_current_permanent_offset_in_heap = 0;
}
CHECK(!gpu_handle || (gpu_handle && m_gpu_visible),
"D3D12_GPU_DESCRIPTOR_HANDLE used on non-GPU-visible heap.");
if (temporary && m_current_temporary_offset_in_heap + 1 >= m_descriptor_heap_size)
{
m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap;
}
unsigned int heapOffsetToUse =
temporary ? m_current_temporary_offset_in_heap : m_current_permanent_offset_in_heap;
if (m_gpu_visible)
{
gpu_handle->ptr = m_heap_base_gpu.ptr + heapOffsetToUse * m_descriptor_increment_size;
if (gpu_handle_cpu_shadow)
gpu_handle_cpu_shadow->ptr =
m_heap_base_gpu_cpu_shadow.ptr + heapOffsetToUse * m_descriptor_increment_size;
}
cpu_handle->ptr = m_heap_base_cpu.ptr + heapOffsetToUse * m_descriptor_increment_size;
if (!temporary)
{
m_current_permanent_offset_in_heap++;
}
return allocated_from_current_heap;
}
bool D3DDescriptorHeapManager::AllocateGroup(
D3D12_CPU_DESCRIPTOR_HANDLE* base_cpu_handle, unsigned int num_handles,
D3D12_GPU_DESCRIPTOR_HANDLE* base_gpu_handle,
D3D12_CPU_DESCRIPTOR_HANDLE* base_gpu_handle_cpu_shadow, bool temporary)
{
bool allocated_from_current_heap = true;
if (m_current_permanent_offset_in_heap + num_handles >= m_first_temporary_slot_in_heap)
{
// If out of room in the heap, start back at beginning.
allocated_from_current_heap = false;
m_current_permanent_offset_in_heap = 0;
}
CHECK(!base_gpu_handle || (base_gpu_handle && m_gpu_visible),
"D3D12_GPU_DESCRIPTOR_HANDLE used on non-GPU-visible heap.");
if (temporary && m_current_temporary_offset_in_heap + num_handles >= m_descriptor_heap_size)
{
m_current_temporary_offset_in_heap = m_first_temporary_slot_in_heap;
}
unsigned int heapOffsetToUse =
temporary ? m_current_temporary_offset_in_heap : m_current_permanent_offset_in_heap;
if (m_gpu_visible)
{
base_gpu_handle->ptr = m_heap_base_gpu.ptr + heapOffsetToUse * m_descriptor_increment_size;
if (base_gpu_handle_cpu_shadow)
base_gpu_handle_cpu_shadow->ptr =
m_heap_base_gpu_cpu_shadow.ptr + heapOffsetToUse * m_descriptor_increment_size;
}
base_cpu_handle->ptr = m_heap_base_cpu.ptr + heapOffsetToUse * m_descriptor_increment_size;
if (temporary)
{
m_current_temporary_offset_in_heap += num_handles;
}
else
{
m_current_permanent_offset_in_heap += num_handles;
}
return allocated_from_current_heap;
}
D3D12_GPU_DESCRIPTOR_HANDLE
D3DDescriptorHeapManager::GetHandleForSamplerGroup(SamplerState* sampler_state,
unsigned int num_sampler_samples)
{
auto it = m_sampler_map.find(*reinterpret_cast<SamplerStateSet*>(sampler_state));
if (it == m_sampler_map.end())
{
D3D12_CPU_DESCRIPTOR_HANDLE base_sampler_cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE base_sampler_gpu_handle;
bool allocatedFromExistingHeap =
AllocateGroup(&base_sampler_cpu_handle, num_sampler_samples, &base_sampler_gpu_handle);
if (!allocatedFromExistingHeap)
{
m_sampler_map.clear();
}
for (unsigned int i = 0; i < num_sampler_samples; i++)
{
D3D12_CPU_DESCRIPTOR_HANDLE destinationDescriptor;
destinationDescriptor.ptr = base_sampler_cpu_handle.ptr + i * D3D::sampler_descriptor_size;
D3D::device12->CreateSampler(&StateCache::GetDesc12(sampler_state[i]), destinationDescriptor);
}
m_sampler_map[*reinterpret_cast<SamplerStateSet*>(sampler_state)] = base_sampler_gpu_handle;
return base_sampler_gpu_handle;
}
else
{
return it->second;
}
}
ID3D12DescriptorHeap* D3DDescriptorHeapManager::GetDescriptorHeap() const
{
return m_descriptor_heap;
}
D3DDescriptorHeapManager::~D3DDescriptorHeapManager()
{
SAFE_RELEASE(m_descriptor_heap);
SAFE_RELEASE(m_descriptor_heap_cpu_shadow);
}
} // namespace DX12

View File

@ -1,77 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <d3d12.h>
#include <unordered_map>
#include "VideoBackends/D3D12/D3DState.h"
namespace DX12
{
// This class provides an abstraction for D3D12 descriptor heaps.
class D3DDescriptorHeapManager
{
public:
D3DDescriptorHeapManager(D3D12_DESCRIPTOR_HEAP_DESC* desc, ID3D12Device* device,
unsigned int temporarySlots = 0);
~D3DDescriptorHeapManager();
bool Allocate(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handle,
D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handle = nullptr,
D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadow = nullptr,
bool temporary = false);
bool AllocateGroup(D3D12_CPU_DESCRIPTOR_HANDLE* cpu_handles, unsigned int num_handles,
D3D12_GPU_DESCRIPTOR_HANDLE* gpu_handles = nullptr,
D3D12_CPU_DESCRIPTOR_HANDLE* gpu_handle_cpu_shadows = nullptr,
bool temporary = false);
D3D12_GPU_DESCRIPTOR_HANDLE GetHandleForSamplerGroup(SamplerState* sampler_state,
unsigned int num_sampler_samples);
ID3D12DescriptorHeap* GetDescriptorHeap() const;
struct SamplerStateSet
{
SamplerState desc0;
SamplerState desc1;
SamplerState desc2;
SamplerState desc3;
SamplerState desc4;
SamplerState desc5;
SamplerState desc6;
SamplerState desc7;
};
private:
ID3D12Device* m_device = nullptr;
ID3D12DescriptorHeap* m_descriptor_heap = nullptr;
ID3D12DescriptorHeap* m_descriptor_heap_cpu_shadow = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_cpu;
D3D12_GPU_DESCRIPTOR_HANDLE m_heap_base_gpu;
D3D12_CPU_DESCRIPTOR_HANDLE m_heap_base_gpu_cpu_shadow;
struct hash_sampler_desc
{
size_t operator()(const SamplerStateSet sampler_state_set) const
{
return sampler_state_set.desc0.hex;
}
};
std::unordered_map<SamplerStateSet, D3D12_GPU_DESCRIPTOR_HANDLE, hash_sampler_desc> m_sampler_map;
unsigned int m_current_temporary_offset_in_heap = 0;
unsigned int m_current_permanent_offset_in_heap = 0;
unsigned int m_descriptor_increment_size;
unsigned int m_descriptor_heap_size;
bool m_gpu_visible;
unsigned int m_first_temporary_slot_in_heap;
};
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -1,536 +0,0 @@
// Copyright hdcmeta
// Dual-Licensed under MIT and GPLv2+
// Refer to the license.txt/license_mit.txt files included.
#pragma once
#include <atomic>
#include <d3d12.h>
#include <dxgi.h>
#include <thread>
namespace DX12
{
static const unsigned int QUEUE_ARRAY_SIZE = 24 * 1024 * 1024;
enum D3DQueueItemType
{
AbortProcessing = 0,
SetPipelineState,
SetRenderTargets,
SetVertexBuffers,
SetIndexBuffer,
RSSetViewports,
RSSetScissorRects,
SetGraphicsRootDescriptorTable,
SetGraphicsRootConstantBufferView,
SetGraphicsRootSignature,
ClearRenderTargetView,
ClearDepthStencilView,
DrawInstanced,
DrawIndexedInstanced,
IASetPrimitiveTopology,
CopyBufferRegion,
CopyTextureRegion,
SetDescriptorHeaps,
ResourceBarrier,
ResolveSubresource,
BeginQuery,
EndQuery,
ResolveQueryData,
ExecuteCommandList,
CloseCommandList,
Present,
ResetCommandList,
ResetCommandAllocator,
FenceGpuSignal,
FenceCpuSignal,
Stop
};
struct SetPipelineStateArguments
{
ID3D12PipelineState* pPipelineStateObject;
};
struct SetRenderTargetsArguments
{
D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetDescriptor;
D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilDescriptor;
};
struct SetVertexBuffersArguments
{
// UINT startSlot; - Dolphin only uses the 0th slot.
D3D12_VERTEX_BUFFER_VIEW desc;
// UINT numBuffers; - Only supporting single vertex buffer set since that's all Dolphin uses.
};
struct SetIndexBufferArguments
{
D3D12_INDEX_BUFFER_VIEW desc;
};
struct RSSetViewportsArguments
{
FLOAT TopLeftX;
FLOAT TopLeftY;
FLOAT Width;
FLOAT Height;
FLOAT MinDepth;
FLOAT MaxDepth;
};
struct RSSetScissorRectsArguments
{
LONG left;
LONG top;
LONG right;
LONG bottom;
};
struct SetGraphicsRootDescriptorTableArguments
{
UINT RootParameterIndex;
D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor;
};
struct SetGraphicsRootConstantBufferViewArguments
{
UINT RootParameterIndex;
D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
};
struct SetGraphicsRootSignatureArguments
{
ID3D12RootSignature* pRootSignature;
};
struct ClearRenderTargetViewArguments
{
D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView;
};
struct ClearDepthStencilViewArguments
{
D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilView;
};
struct DrawInstancedArguments
{
UINT VertexCount;
UINT StartVertexLocation;
};
struct DrawIndexedInstancedArguments
{
UINT IndexCount;
UINT StartIndexLocation;
INT BaseVertexLocation;
};
struct IASetPrimitiveTopologyArguments
{
D3D12_PRIMITIVE_TOPOLOGY PrimitiveTopology;
};
struct CopyBufferRegionArguments
{
ID3D12Resource* pDstBuffer;
UINT DstOffset;
ID3D12Resource* pSrcBuffer;
UINT SrcOffset;
UINT NumBytes;
};
struct CopyTextureRegionArguments
{
D3D12_TEXTURE_COPY_LOCATION dst;
UINT DstX;
UINT DstY;
UINT DstZ;
D3D12_TEXTURE_COPY_LOCATION src;
D3D12_BOX srcBox;
};
struct SetDescriptorHeapsArguments
{
ID3D12DescriptorHeap* const* ppDescriptorHeap;
UINT NumDescriptorHeaps;
};
struct ResourceBarrierArguments
{
D3D12_RESOURCE_BARRIER barrier;
};
struct ResolveSubresourceArguments
{
ID3D12Resource* pDstResource;
UINT DstSubresource;
ID3D12Resource* pSrcResource;
UINT SrcSubresource;
DXGI_FORMAT Format;
};
struct BeginQueryArguments
{
ID3D12QueryHeap* pQueryHeap;
D3D12_QUERY_TYPE Type;
UINT Index;
};
struct EndQueryArguments
{
ID3D12QueryHeap* pQueryHeap;
D3D12_QUERY_TYPE Type;
UINT Index;
};
struct ResolveQueryDataArguments
{
ID3D12QueryHeap* pQueryHeap;
D3D12_QUERY_TYPE Type;
UINT StartElement;
UINT ElementCount;
ID3D12Resource* pDestinationBuffer;
UINT64 AlignedDestinationBufferOffset;
};
struct CloseCommandListArguments
{
};
struct ExecuteCommandListArguments
{
};
struct PresentArguments
{
IDXGISwapChain* swapChain;
UINT syncInterval;
UINT flags;
};
struct ResetCommandListArguments
{
ID3D12CommandAllocator* allocator;
};
struct ResetCommandAllocatorArguments
{
ID3D12CommandAllocator* allocator;
};
struct FenceGpuSignalArguments
{
ID3D12Fence* fence;
UINT64 fence_value;
};
struct FenceCpuSignalArguments
{
ID3D12Fence* fence;
UINT64 fence_value;
};
struct StopArguments
{
bool eligible_to_move_to_front_of_queue;
bool signal_stop_event;
bool terminate_worker_thread;
};
struct D3DQueueItem
{
D3DQueueItemType Type;
union
{
SetPipelineStateArguments SetPipelineState;
SetRenderTargetsArguments SetRenderTargets;
SetVertexBuffersArguments SetVertexBuffers;
SetIndexBufferArguments SetIndexBuffer;
RSSetViewportsArguments RSSetViewports;
RSSetScissorRectsArguments RSSetScissorRects;
SetGraphicsRootDescriptorTableArguments SetGraphicsRootDescriptorTable;
SetGraphicsRootConstantBufferViewArguments SetGraphicsRootConstantBufferView;
SetGraphicsRootSignatureArguments SetGraphicsRootSignature;
ClearRenderTargetViewArguments ClearRenderTargetView;
ClearDepthStencilViewArguments ClearDepthStencilView;
DrawInstancedArguments DrawInstanced;
DrawIndexedInstancedArguments DrawIndexedInstanced;
IASetPrimitiveTopologyArguments IASetPrimitiveTopology;
CopyBufferRegionArguments CopyBufferRegion;
CopyTextureRegionArguments CopyTextureRegion;
SetDescriptorHeapsArguments SetDescriptorHeaps;
ResourceBarrierArguments ResourceBarrier;
ResolveSubresourceArguments ResolveSubresource;
BeginQueryArguments BeginQuery;
EndQueryArguments EndQuery;
ResolveQueryDataArguments ResolveQueryData;
CloseCommandListArguments CloseCommandList;
ExecuteCommandListArguments ExecuteCommandList;
PresentArguments Present;
ResetCommandListArguments ResetCommandList;
ResetCommandAllocatorArguments ResetCommandAllocator;
FenceGpuSignalArguments FenceGpuSignal;
FenceCpuSignalArguments FenceCpuSignal;
StopArguments Stop;
};
};
class ID3D12QueuedCommandList : public ID3D12GraphicsCommandList
{
public:
ID3D12QueuedCommandList(ID3D12GraphicsCommandList* backing_command_list,
ID3D12CommandQueue* backing_command_queue);
void ProcessQueuedItems(bool eligible_to_move_to_front_of_queue = false,
bool wait_for_stop = false, bool terminate_worker_thread = false);
void QueueExecute();
void QueueFenceGpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value);
void QueueFenceCpuSignal(ID3D12Fence* fence_to_signal, UINT64 fence_value);
void QueuePresent(IDXGISwapChain* swap_chain, UINT sync_interval, UINT flags);
// IUnknown methods
ULONG STDMETHODCALLTYPE AddRef();
ULONG STDMETHODCALLTYPE Release();
HRESULT STDMETHODCALLTYPE QueryInterface(_In_ REFIID riid,
_COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject);
// ID3D12Object methods
HRESULT STDMETHODCALLTYPE GetPrivateData(_In_ REFGUID guid, _Inout_ UINT* pDataSize,
_Out_writes_bytes_opt_(*pDataSize) void* pData);
HRESULT STDMETHODCALLTYPE SetPrivateData(_In_ REFGUID guid, _In_ UINT DataSize,
_In_reads_bytes_opt_(DataSize) const void* pData);
HRESULT STDMETHODCALLTYPE SetPrivateDataInterface(_In_ REFGUID guid,
_In_opt_ const IUnknown* pData);
HRESULT STDMETHODCALLTYPE SetName(_In_z_ LPCWSTR pName);
// ID3D12DeviceChild methods
D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE GetType();
// ID3D12CommandList methods
HRESULT STDMETHODCALLTYPE GetDevice(REFIID riid, void** ppvDevice);
HRESULT STDMETHODCALLTYPE Close(void);
HRESULT STDMETHODCALLTYPE Reset(_In_ ID3D12CommandAllocator* pAllocator,
_In_opt_ ID3D12PipelineState* pInitialState);
void STDMETHODCALLTYPE ClearState(_In_ ID3D12PipelineState* pPipelineState);
void STDMETHODCALLTYPE DrawInstanced(_In_ UINT VertexCountPerInstance, _In_ UINT InstanceCount,
_In_ UINT StartVertexLocation,
_In_ UINT StartInstanceLocation);
void STDMETHODCALLTYPE DrawIndexedInstanced(_In_ UINT IndexCountPerInstance,
_In_ UINT InstanceCount, _In_ UINT StartIndexLocation,
_In_ INT BaseVertexLocation,
_In_ UINT StartInstanceLocation);
void STDMETHODCALLTYPE Dispatch(_In_ UINT ThreadGroupCountX, _In_ UINT ThreadGroupCountY,
_In_ UINT ThreadGroupCountZ);
void STDMETHODCALLTYPE DispatchIndirect(_In_ ID3D12Resource* pBufferForArgs,
_In_ UINT AlignedByteOffsetForArgs);
void STDMETHODCALLTYPE CopyBufferRegion(_In_ ID3D12Resource* pDstBuffer, UINT64 DstOffset,
_In_ ID3D12Resource* pSrcBuffer, UINT64 SrcOffset,
UINT64 NumBytes);
void STDMETHODCALLTYPE CopyTextureRegion(_In_ const D3D12_TEXTURE_COPY_LOCATION* pDst, UINT DstX,
UINT DstY, UINT DstZ,
_In_ const D3D12_TEXTURE_COPY_LOCATION* pSrc,
_In_opt_ const D3D12_BOX* pSrcBox);
void STDMETHODCALLTYPE CopyResource(_In_ ID3D12Resource* pDstResource,
_In_ ID3D12Resource* pSrcResource);
void STDMETHODCALLTYPE
CopyTiles(_In_ ID3D12Resource* pTiledResource,
_In_ const D3D12_TILED_RESOURCE_COORDINATE* pTileRegionStartCoordinate,
_In_ const D3D12_TILE_REGION_SIZE* pTileRegionSize, _In_ ID3D12Resource* pBuffer,
UINT64 BufferStartOffsetInBytes, D3D12_TILE_COPY_FLAGS Flags);
void STDMETHODCALLTYPE ResolveSubresource(_In_ ID3D12Resource* pDstResource,
_In_ UINT DstSubresource,
_In_ ID3D12Resource* pSrcResource,
_In_ UINT SrcSubresource, _In_ DXGI_FORMAT Format);
void STDMETHODCALLTYPE IASetPrimitiveTopology(_In_ D3D12_PRIMITIVE_TOPOLOGY PrimitiveTopology);
void STDMETHODCALLTYPE RSSetViewports(
_In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count,
_In_reads_(Count) const D3D12_VIEWPORT* pViewports);
void STDMETHODCALLTYPE RSSetScissorRects(
_In_range_(0, D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE) UINT Count,
_In_reads_(Count) const D3D12_RECT* pRects);
void STDMETHODCALLTYPE OMSetBlendFactor(_In_opt_ const FLOAT BlendFactor[4]);
void STDMETHODCALLTYPE OMSetStencilRef(_In_ UINT StencilRef);
void STDMETHODCALLTYPE SetPipelineState(_In_ ID3D12PipelineState* pPipelineState);
void STDMETHODCALLTYPE ResourceBarrier(_In_ UINT NumBarriers,
_In_reads_(NumBarriers)
const D3D12_RESOURCE_BARRIER* pBarriers);
void STDMETHODCALLTYPE ExecuteBundle(_In_ ID3D12GraphicsCommandList* command_list);
void STDMETHODCALLTYPE BeginQuery(_In_ ID3D12QueryHeap* pQueryHeap, _In_ D3D12_QUERY_TYPE Type,
_In_ UINT Index);
void STDMETHODCALLTYPE EndQuery(_In_ ID3D12QueryHeap* pQueryHeap, _In_ D3D12_QUERY_TYPE Type,
_In_ UINT Index);
void STDMETHODCALLTYPE ResolveQueryData(_In_ ID3D12QueryHeap* pQueryHeap,
_In_ D3D12_QUERY_TYPE Type, _In_ UINT StartElement,
_In_ UINT ElementCount,
_In_ ID3D12Resource* pDestinationBuffer,
_In_ UINT64 AlignedDestinationBufferOffset);
void STDMETHODCALLTYPE SetPredication(_In_opt_ ID3D12Resource* pBuffer,
_In_ UINT64 AlignedBufferOffset,
_In_ D3D12_PREDICATION_OP Operation);
void STDMETHODCALLTYPE SetDescriptorHeaps(_In_ UINT NumDescriptorHeaps,
_In_reads_(NumDescriptorHeaps)
ID3D12DescriptorHeap* const* pDescriptorHeaps);
void STDMETHODCALLTYPE SetComputeRootSignature(_In_ ID3D12RootSignature* pRootSignature);
void STDMETHODCALLTYPE SetGraphicsRootSignature(_In_ ID3D12RootSignature* pRootSignature);
void STDMETHODCALLTYPE SetComputeRootDescriptorTable(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor);
void STDMETHODCALLTYPE SetGraphicsRootDescriptorTable(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_DESCRIPTOR_HANDLE BaseDescriptor);
void STDMETHODCALLTYPE SetComputeRoot32BitConstant(_In_ UINT RootParameterIndex,
_In_ UINT SrcData,
_In_ UINT DestOffsetIn32BitValues);
void STDMETHODCALLTYPE SetGraphicsRoot32BitConstant(_In_ UINT RootParameterIndex,
_In_ UINT SrcData,
_In_ UINT DestOffsetIn32BitValues);
void STDMETHODCALLTYPE SetComputeRoot32BitConstants(_In_ UINT RootParameterIndex,
_In_ UINT Num32BitValuesToSet,
_In_reads_(Num32BitValuesToSet * sizeof(UINT))
const void* pSrcData,
_In_ UINT DestOffsetIn32BitValues);
void STDMETHODCALLTYPE
SetGraphicsRoot32BitConstants(_In_ UINT RootParameterIndex, _In_ UINT Num32BitValuesToSet,
_In_reads_(Num32BitValuesToSet * sizeof(UINT)) const void* pSrcData,
_In_ UINT DestOffsetIn32BitValues);
void STDMETHODCALLTYPE SetGraphicsRootConstantBufferView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation);
void STDMETHODCALLTYPE SetComputeRootConstantBufferView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS BufferLocation);
void STDMETHODCALLTYPE SetComputeRootShaderResourceView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle);
void STDMETHODCALLTYPE SetGraphicsRootShaderResourceView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle);
void STDMETHODCALLTYPE SetComputeRootUnorderedAccessView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle);
void STDMETHODCALLTYPE SetGraphicsRootUnorderedAccessView(
_In_ UINT RootParameterIndex, _In_ D3D12_GPU_VIRTUAL_ADDRESS DescriptorHandle);
void STDMETHODCALLTYPE IASetIndexBuffer(_In_opt_ const D3D12_INDEX_BUFFER_VIEW* pDesc);
void STDMETHODCALLTYPE IASetVertexBuffers(_In_ UINT StartSlot, _In_ UINT NumBuffers,
_In_ const D3D12_VERTEX_BUFFER_VIEW* pDesc);
void STDMETHODCALLTYPE SOSetTargets(_In_ UINT StartSlot, _In_ UINT NumViews,
_In_ const D3D12_STREAM_OUTPUT_BUFFER_VIEW* pViews);
void STDMETHODCALLTYPE
OMSetRenderTargets(_In_ UINT NumRenderTargetDescriptors,
_In_ const D3D12_CPU_DESCRIPTOR_HANDLE* pRenderTargetDescriptors,
_In_ BOOL RTsSingleHandleToDescriptorRange,
_In_opt_ const D3D12_CPU_DESCRIPTOR_HANDLE* pDepthStencilDescriptor);
void STDMETHODCALLTYPE ClearDepthStencilView(_In_ D3D12_CPU_DESCRIPTOR_HANDLE DepthStencilView,
_In_ D3D12_CLEAR_FLAGS ClearFlags, _In_ FLOAT Depth,
_In_ UINT8 Stencil, _In_ UINT NumRects,
_In_reads_opt_(NumRects) const D3D12_RECT* pRect);
void STDMETHODCALLTYPE ClearRenderTargetView(_In_ D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView,
_In_ const FLOAT ColorRGBA[4], _In_ UINT NumRects,
_In_reads_opt_(NumRects) const D3D12_RECT* pRects);
void STDMETHODCALLTYPE ClearUnorderedAccessViewUint(
_In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap,
_In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, _In_ ID3D12Resource* pResource,
_In_ const UINT Values[4], _In_ UINT NumRects,
_In_reads_opt_(NumRects) const D3D12_RECT* pRects);
void STDMETHODCALLTYPE ClearUnorderedAccessViewFloat(
_In_ D3D12_GPU_DESCRIPTOR_HANDLE ViewGPUHandleInCurrentHeap,
_In_ D3D12_CPU_DESCRIPTOR_HANDLE ViewCPUHandle, _In_ ID3D12Resource* pResource,
_In_ const FLOAT Values[4], _In_ UINT NumRects,
_In_reads_opt_(NumRects) const D3D12_RECT* pRects);
void STDMETHODCALLTYPE DiscardResource(_In_ ID3D12Resource* pResource,
_In_opt_ const D3D12_DISCARD_REGION* pRegion);
void STDMETHODCALLTYPE SetMarker(UINT Metadata, _In_reads_bytes_opt_(Size) const void* pData,
UINT Size);
void STDMETHODCALLTYPE BeginEvent(UINT Metadata, _In_reads_bytes_opt_(Size) const void* pData,
UINT Size);
void STDMETHODCALLTYPE EndEvent(void);
void STDMETHODCALLTYPE ExecuteIndirect(_In_ ID3D12CommandSignature* pCommandSignature,
_In_ UINT MaxCommandCount,
_In_ ID3D12Resource* pArgumentBuffer,
_In_ UINT64 ArgumentBufferOffset,
_In_opt_ ID3D12Resource* pCountBuffer,
_In_ UINT64 CountBufferOffset);
private:
~ID3D12QueuedCommandList();
void ResetQueueOverflowTracking();
void CheckForOverflow();
static void BackgroundThreadFunction(ID3D12QueuedCommandList* parent_queued_command_list);
byte m_queue_array[QUEUE_ARRAY_SIZE];
byte* m_queue_array_back = m_queue_array;
byte* m_queue_array_back_at_start_of_frame = m_queue_array_back;
std::thread m_background_thread;
HANDLE m_begin_execution_event;
HANDLE m_stop_execution_event;
ID3D12GraphicsCommandList* m_command_list;
ID3D12CommandQueue* m_command_queue;
std::atomic<unsigned long> m_ref = 1;
};
} // namespace

View File

@ -1,91 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <fstream>
#include <string>
#include "Common/FileUtil.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
namespace D3D
{
bool CompileShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines,
const std::string& shader_version_string)
{
ID3D10Blob* shader_buffer = nullptr;
ID3D10Blob* error_buffer = nullptr;
#if defined(_DEBUG) || defined(DEBUGFAST)
UINT flags = D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_DEBUG;
#else
UINT flags = D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_OPTIMIZATION_LEVEL3 |
D3DCOMPILE_SKIP_VALIDATION;
#endif
HRESULT hr = d3d_compile(code.c_str(), code.length(), nullptr, defines, nullptr, "main",
shader_version_string.data(), flags, 0, &shader_buffer, &error_buffer);
if (error_buffer)
{
WARN_LOG(VIDEO, "Warning generated when compiling %s shader:\n%s",
shader_version_string.c_str(),
static_cast<const char*>(error_buffer->GetBufferPointer()));
}
if (FAILED(hr))
{
static int num_failures = 0;
std::string filename =
StringFromFormat("%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
shader_version_string.c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, filename, std::ios_base::out);
file << code;
file << std::endl << "Errors:" << std::endl;
file << static_cast<const char*>(error_buffer->GetBufferPointer());
file.close();
PanicAlert("Failed to compile shader: %s\nDebug info (%s):\n%s", filename.c_str(),
shader_version_string.c_str(),
static_cast<const char*>(error_buffer->GetBufferPointer()));
*blob = nullptr;
error_buffer->Release();
}
else
{
*blob = shader_buffer;
}
return SUCCEEDED(hr);
}
// code->bytecode
bool CompileVertexShader(const std::string& code, ID3DBlob** blob)
{
return CompileShader(code, blob, nullptr, D3D::VertexShaderVersionString());
}
// code->bytecode
bool CompileGeometryShader(const std::string& code, ID3DBlob** blob,
const D3D_SHADER_MACRO* defines)
{
return CompileShader(code, blob, defines, D3D::GeometryShaderVersionString());
}
// code->bytecode
bool CompilePixelShader(const std::string& code, ID3DBlob** blob, const D3D_SHADER_MACRO* defines)
{
return CompileShader(code, blob, defines, D3D::PixelShaderVersionString());
}
} // namespace
} // namespace DX12

View File

@ -1,25 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
#include "VideoBackends/D3D12/D3DBase.h"
class D3DBlob;
namespace DX12
{
namespace D3D
{
// The returned bytecode buffers should be Release()d.
bool CompileVertexShader(const std::string& code, ID3DBlob** blob);
bool CompileGeometryShader(const std::string& code, ID3DBlob** blob,
const D3D_SHADER_MACRO* defines = nullptr);
bool CompilePixelShader(const std::string& code, ID3DBlob** blob,
const D3D_SHADER_MACRO* defines = nullptr);
}
} // namespace DX12

View File

@ -1,486 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "Core/ConfigManager.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/NativeVertexFormat.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
static bool s_cache_is_corrupted = false;
static LinearDiskCache<SmallPsoDiskDesc, u8> s_pso_disk_cache;
class PipelineStateCacheInserter : public LinearDiskCacheReader<SmallPsoDiskDesc, u8>
{
public:
void Read(const SmallPsoDiskDesc& key, const u8* value, u32 value_size)
{
if (s_cache_is_corrupted)
return;
D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {};
desc.pRootSignature = D3D::default_root_signature;
desc.RTVFormats[0] =
DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode.
desc.DSVFormat = DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode.
desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
desc.NumRenderTargets = 1;
desc.SampleMask = UINT_MAX;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.GS = ShaderCache::GetGeometryShaderFromUid(&key.gs_uid);
desc.PS = ShaderCache::GetPixelShaderFromUid(&key.ps_uid);
desc.VS = ShaderCache::GetVertexShaderFromUid(&key.vs_uid);
if (!desc.PS.pShaderBytecode || !desc.VS.pShaderBytecode)
{
s_cache_is_corrupted = true;
return;
}
BlendState blend_state = {};
blend_state.hex = key.blend_state_hex;
desc.BlendState = StateCache::GetDesc12(blend_state);
ZMode depth_stencil_state = {};
depth_stencil_state.hex = key.depth_stencil_state_hex;
desc.DepthStencilState = StateCache::GetDesc12(depth_stencil_state);
RasterizerState rasterizer_state = {};
rasterizer_state.hex = key.rasterizer_state_hex;
desc.RasterizerState = StateCache::GetDesc12(rasterizer_state);
desc.PrimitiveTopologyType = key.topology;
// search for a cached native vertex format
const PortableVertexDeclaration& native_vtx_decl = key.vertex_declaration;
std::unique_ptr<NativeVertexFormat>& native =
(*VertexLoaderManager::GetNativeVertexFormatMap())[native_vtx_decl];
if (!native)
{
native = g_vertex_manager->CreateNativeVertexFormat(native_vtx_decl);
}
desc.InputLayout = reinterpret_cast<D3DVertexFormat*>(native.get())->GetActiveInputLayout12();
desc.CachedPSO.CachedBlobSizeInBytes = value_size;
desc.CachedPSO.pCachedBlob = value;
ID3D12PipelineState* pso = nullptr;
HRESULT hr = D3D::device12->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&pso));
if (FAILED(hr))
{
// Failure can occur if disk cache is corrupted, or a driver upgrade invalidates the existing
// blobs.
// In this case, we need to clear the disk cache.
s_cache_is_corrupted = true;
return;
}
SmallPsoDesc small_desc = {};
small_desc.blend_state.hex = key.blend_state_hex;
small_desc.depth_stencil_state.hex = key.depth_stencil_state_hex;
small_desc.rasterizer_state.hex = key.rasterizer_state_hex;
small_desc.gs_bytecode = desc.GS;
small_desc.ps_bytecode = desc.PS;
small_desc.vs_bytecode = desc.VS;
small_desc.input_layout = reinterpret_cast<D3DVertexFormat*>(native.get());
gx_state_cache.m_small_pso_map[small_desc] = pso;
}
};
StateCache::StateCache()
{
m_current_pso_desc = {};
m_current_pso_desc.RTVFormats[0] =
DXGI_FORMAT_R8G8B8A8_UNORM; // This state changes in PSTextureEncoder::Encode.
m_current_pso_desc.DSVFormat =
DXGI_FORMAT_D32_FLOAT; // This state changes in PSTextureEncoder::Encode.
m_current_pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF;
m_current_pso_desc.NumRenderTargets = 1;
m_current_pso_desc.SampleMask = UINT_MAX;
}
void StateCache::Init()
{
// Root signature isn't available at time of StateCache construction, so fill it in now.
gx_state_cache.m_current_pso_desc.pRootSignature = D3D::default_root_signature;
// Multi-sample configuration isn't available at time of StateCache construction, so fill it in
// now.
gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
gx_state_cache.m_current_pso_desc.SampleDesc.Quality = 0;
if (!File::Exists(File::GetUserPath(D_SHADERCACHE_IDX)))
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));
std::string cache_filename =
StringFromFormat("%sdx12-%s-pso.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
SConfig::GetInstance().GetGameID().c_str());
PipelineStateCacheInserter inserter;
s_pso_disk_cache.OpenAndRead(cache_filename, inserter);
if (s_cache_is_corrupted)
{
// If a PSO fails to create, that means either:
// - The file itself is corrupt.
// - A driver/HW change has occurred, causing the existing cache blobs to be invalid.
//
// In either case, we want to re-create the disk cache. This should not be a frequent
// occurrence.
s_pso_disk_cache.Close();
for (auto it : gx_state_cache.m_small_pso_map)
{
SAFE_RELEASE(it.second);
}
gx_state_cache.m_small_pso_map.clear();
File::Delete(cache_filename);
s_pso_disk_cache.OpenAndRead(cache_filename, inserter);
s_cache_is_corrupted = false;
}
}
D3D12_SAMPLER_DESC StateCache::GetDesc12(SamplerState state)
{
const unsigned int d3d_mip_filters[4] = {
TexMode0::TEXF_NONE, TexMode0::TEXF_POINT, TexMode0::TEXF_LINEAR,
TexMode0::TEXF_NONE, // reserved
};
const D3D12_TEXTURE_ADDRESS_MODE d3d_clamps[4] = {
D3D12_TEXTURE_ADDRESS_MODE_CLAMP, D3D12_TEXTURE_ADDRESS_MODE_WRAP,
D3D12_TEXTURE_ADDRESS_MODE_MIRROR,
D3D12_TEXTURE_ADDRESS_MODE_WRAP // reserved
};
D3D12_SAMPLER_DESC sampdc;
unsigned int mip = d3d_mip_filters[state.min_filter & 3];
sampdc.MaxAnisotropy = 1;
if (g_ActiveConfig.iMaxAnisotropy > 0 && !SamplerCommon::IsBpTexMode0PointFiltering(state))
{
sampdc.Filter = D3D12_FILTER_ANISOTROPIC;
sampdc.MaxAnisotropy = 1 << g_ActiveConfig.iMaxAnisotropy;
}
else if (state.min_filter & 4) // linear min filter
{
if (state.mag_filter) // linear mag filter
{
if (mip == TexMode0::TEXF_NONE)
sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
else if (mip == TexMode0::TEXF_POINT)
sampdc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
else if (mip == TexMode0::TEXF_LINEAR)
sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
}
else // point mag filter
{
if (mip == TexMode0::TEXF_NONE)
sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
else if (mip == TexMode0::TEXF_POINT)
sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
else if (mip == TexMode0::TEXF_LINEAR)
sampdc.Filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
}
}
else // point min filter
{
if (state.mag_filter) // linear mag filter
{
if (mip == TexMode0::TEXF_NONE)
sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
else if (mip == TexMode0::TEXF_POINT)
sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
else if (mip == TexMode0::TEXF_LINEAR)
sampdc.Filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
}
else // point mag filter
{
if (mip == TexMode0::TEXF_NONE)
sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
else if (mip == TexMode0::TEXF_POINT)
sampdc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
else if (mip == TexMode0::TEXF_LINEAR)
sampdc.Filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
}
}
sampdc.AddressU = d3d_clamps[state.wrap_s];
sampdc.AddressV = d3d_clamps[state.wrap_t];
sampdc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
sampdc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
sampdc.BorderColor[0] = sampdc.BorderColor[1] = sampdc.BorderColor[2] = sampdc.BorderColor[3] =
1.0f;
sampdc.MaxLOD = SamplerCommon::AreBpTexMode0MipmapsEnabled(state) ? state.max_lod / 16.f : 0.f;
sampdc.MinLOD = std::min(state.min_lod / 16.f, sampdc.MaxLOD);
sampdc.MipLODBias = static_cast<s32>(state.lod_bias) / 32.0f;
return sampdc;
}
D3D12_BLEND GetBlendingAlpha(D3D12_BLEND blend)
{
switch (blend)
{
case D3D12_BLEND_SRC_COLOR:
return D3D12_BLEND_SRC_ALPHA;
case D3D12_BLEND_INV_SRC_COLOR:
return D3D12_BLEND_INV_SRC_ALPHA;
case D3D12_BLEND_DEST_COLOR:
return D3D12_BLEND_DEST_ALPHA;
case D3D12_BLEND_INV_DEST_COLOR:
return D3D12_BLEND_INV_DEST_ALPHA;
default:
return blend;
}
}
D3D12_BLEND_DESC StateCache::GetDesc12(BlendState state)
{
if (!state.blend_enable)
{
state.src_blend = D3D12_BLEND_ONE;
state.dst_blend = D3D12_BLEND_ZERO;
state.blend_op = D3D12_BLEND_OP_ADD;
state.use_dst_alpha = false;
}
D3D12_BLEND_DESC blenddc = {FALSE, // BOOL AlphaToCoverageEnable;
FALSE, // BOOL IndependentBlendEnable;
{
state.blend_enable, // BOOL BlendEnable;
FALSE, // BOOL LogicOpEnable;
state.src_blend, // D3D12_BLEND SrcBlend;
state.dst_blend, // D3D12_BLEND DestBlend;
state.blend_op, // D3D12_BLEND_OP BlendOp;
state.src_blend, // D3D12_BLEND SrcBlendAlpha;
state.dst_blend, // D3D12_BLEND DestBlendAlpha;
state.blend_op, // D3D12_BLEND_OP BlendOpAlpha;
D3D12_LOGIC_OP_NOOP, // D3D12_LOGIC_OP LogicOp
state.write_mask // UINT8 RenderTargetWriteMask;
}};
blenddc.RenderTarget[0].SrcBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].SrcBlend);
blenddc.RenderTarget[0].DestBlendAlpha = GetBlendingAlpha(blenddc.RenderTarget[0].DestBlend);
if (state.use_dst_alpha)
{
blenddc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
blenddc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_ZERO;
blenddc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
}
return blenddc;
}
D3D12_RASTERIZER_DESC StateCache::GetDesc12(RasterizerState state)
{
return {D3D12_FILL_MODE_SOLID,
state.cull_mode,
false,
0,
0.f,
0,
false,
true,
false,
0,
D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF};
}
inline D3D12_DEPTH_STENCIL_DESC StateCache::GetDesc12(ZMode state)
{
D3D12_DEPTH_STENCIL_DESC depthdc;
depthdc.StencilEnable = FALSE;
depthdc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
depthdc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;
D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = {D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP,
D3D12_STENCIL_OP_KEEP,
D3D12_COMPARISON_FUNC_ALWAYS};
depthdc.FrontFace = defaultStencilOp;
depthdc.BackFace = defaultStencilOp;
const D3D12_COMPARISON_FUNC d3dCmpFuncs[8] = {
D3D12_COMPARISON_FUNC_NEVER, D3D12_COMPARISON_FUNC_GREATER,
D3D12_COMPARISON_FUNC_EQUAL, D3D12_COMPARISON_FUNC_GREATER_EQUAL,
D3D12_COMPARISON_FUNC_LESS, D3D12_COMPARISON_FUNC_NOT_EQUAL,
D3D12_COMPARISON_FUNC_LESS_EQUAL, D3D12_COMPARISON_FUNC_ALWAYS};
if (state.testenable)
{
depthdc.DepthEnable = TRUE;
depthdc.DepthWriteMask =
state.updateenable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
depthdc.DepthFunc = d3dCmpFuncs[state.func];
}
else
{
// if the test is disabled write is disabled too
depthdc.DepthEnable = FALSE;
depthdc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
}
return depthdc;
}
HRESULT StateCache::GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc,
ID3D12PipelineState** pso)
{
auto it = m_pso_map.find(*pso_desc);
if (it == m_pso_map.end())
{
// Not found, create new PSO.
ID3D12PipelineState* new_pso = nullptr;
HRESULT hr = D3D::device12->CreateGraphicsPipelineState(pso_desc, IID_PPV_ARGS(&new_pso));
if (FAILED(hr))
{
return hr;
}
m_pso_map[*pso_desc] = new_pso;
*pso = new_pso;
}
else
{
*pso = it->second;
}
return S_OK;
}
HRESULT StateCache::GetPipelineStateObjectFromCache(
SmallPsoDesc* pso_desc, ID3D12PipelineState** pso, D3D12_PRIMITIVE_TOPOLOGY_TYPE topology,
const GeometryShaderUid* gs_uid, const PixelShaderUid* ps_uid, const VertexShaderUid* vs_uid)
{
auto it = m_small_pso_map.find(*pso_desc);
if (it == m_small_pso_map.end())
{
// Not found, create new PSO.
// RootSignature, SampleMask, SampleDesc, NumRenderTargets, RTVFormats, DSVFormat
// never change so they are set in constructor and forgotten.
m_current_pso_desc.GS = pso_desc->gs_bytecode;
m_current_pso_desc.PS = pso_desc->ps_bytecode;
m_current_pso_desc.VS = pso_desc->vs_bytecode;
m_current_pso_desc.BlendState = GetDesc12(pso_desc->blend_state);
m_current_pso_desc.DepthStencilState = GetDesc12(pso_desc->depth_stencil_state);
m_current_pso_desc.RasterizerState = GetDesc12(pso_desc->rasterizer_state);
m_current_pso_desc.PrimitiveTopologyType = topology;
m_current_pso_desc.InputLayout = pso_desc->input_layout->GetActiveInputLayout12();
ID3D12PipelineState* new_pso = nullptr;
HRESULT hr =
D3D::device12->CreateGraphicsPipelineState(&m_current_pso_desc, IID_PPV_ARGS(&new_pso));
if (FAILED(hr))
{
return hr;
}
m_small_pso_map[*pso_desc] = new_pso;
*pso = new_pso;
// This contains all of the information needed to reconstruct a PSO at startup.
SmallPsoDiskDesc disk_desc = {};
disk_desc.blend_state_hex = pso_desc->blend_state.hex;
disk_desc.depth_stencil_state_hex = pso_desc->depth_stencil_state.hex;
disk_desc.rasterizer_state_hex = pso_desc->rasterizer_state.hex;
disk_desc.ps_uid = *ps_uid;
disk_desc.vs_uid = *vs_uid;
disk_desc.gs_uid = *gs_uid;
disk_desc.vertex_declaration = pso_desc->input_layout->GetVertexDeclaration();
disk_desc.topology = topology;
// This shouldn't fail.. but if it does, don't cache to disk.
ID3DBlob* psoBlob = nullptr;
hr = new_pso->GetCachedBlob(&psoBlob);
if (SUCCEEDED(hr))
{
s_pso_disk_cache.Append(disk_desc, reinterpret_cast<const u8*>(psoBlob->GetBufferPointer()),
static_cast<u32>(psoBlob->GetBufferSize()));
psoBlob->Release();
}
}
else
{
*pso = it->second;
}
return S_OK;
}
void StateCache::OnMSAASettingsChanged()
{
for (auto& it : m_small_pso_map)
{
SAFE_RELEASE(it.second);
}
m_small_pso_map.clear();
// Update sample count for new PSOs being created
gx_state_cache.m_current_pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}
void StateCache::Clear()
{
for (auto& it : m_pso_map)
{
SAFE_RELEASE(it.second);
}
m_pso_map.clear();
for (auto& it : m_small_pso_map)
{
SAFE_RELEASE(it.second);
}
m_small_pso_map.clear();
s_pso_disk_cache.Sync();
s_pso_disk_cache.Close();
}
} // namespace DX12

View File

@ -1,195 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <stack>
#include <unordered_map>
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/NativeVertexFormat.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoCommon/BPMemory.h"
namespace DX12
{
class PipelineStateCacheInserter;
union RasterizerState
{
BitField<0, 2, D3D12_CULL_MODE> cull_mode;
u32 hex;
};
union BlendState
{
BitField<0, 1, u32> blend_enable;
BitField<1, 3, D3D12_BLEND_OP> blend_op;
BitField<4, 4, u8> write_mask;
BitField<8, 5, D3D12_BLEND> src_blend;
BitField<13, 5, D3D12_BLEND> dst_blend;
BitField<18, 1, u32> use_dst_alpha;
u32 hex;
};
union SamplerState
{
BitField<0, 3, u32> min_filter;
BitField<3, 1, u32> mag_filter;
BitField<4, 8, u32> min_lod;
BitField<12, 8, u32> max_lod;
BitField<20, 8, s32> lod_bias;
BitField<28, 2, u32> wrap_s;
BitField<30, 2, u32> wrap_t;
u32 hex;
};
struct SmallPsoDesc
{
D3D12_SHADER_BYTECODE gs_bytecode;
D3D12_SHADER_BYTECODE ps_bytecode;
D3D12_SHADER_BYTECODE vs_bytecode;
D3DVertexFormat* input_layout;
BlendState blend_state;
RasterizerState rasterizer_state;
ZMode depth_stencil_state;
};
// The Bitfield members in BlendState, RasterizerState, and ZMode cause the..
// static_assert(std::is_trivially_copyable<K>::value, "K must be a trivially copyable type");
// .. check in LinearDiskCache to fail. So, just storing the packed u32 values.
struct SmallPsoDiskDesc
{
u32 blend_state_hex;
u32 rasterizer_state_hex;
u32 depth_stencil_state_hex;
PixelShaderUid ps_uid;
VertexShaderUid vs_uid;
GeometryShaderUid gs_uid;
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology;
PortableVertexDeclaration vertex_declaration; // Used to construct the input layout.
};
class StateCache
{
public:
StateCache();
static void Init();
// Get D3D12 descs for the internal state bitfields.
static D3D12_SAMPLER_DESC GetDesc12(SamplerState state);
static D3D12_BLEND_DESC GetDesc12(BlendState state);
static D3D12_RASTERIZER_DESC GetDesc12(RasterizerState state);
static D3D12_DEPTH_STENCIL_DESC GetDesc12(ZMode state);
HRESULT GetPipelineStateObjectFromCache(D3D12_GRAPHICS_PIPELINE_STATE_DESC* pso_desc,
ID3D12PipelineState** pso);
HRESULT GetPipelineStateObjectFromCache(SmallPsoDesc* pso_desc, ID3D12PipelineState** pso,
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology,
const GeometryShaderUid* gs_uid,
const PixelShaderUid* ps_uid,
const VertexShaderUid* vs_uid);
// Called when the MSAA count/quality changes. Invalidates all small PSOs.
void OnMSAASettingsChanged();
// Release all cached states and clear hash tables.
void Clear();
private:
friend DX12::PipelineStateCacheInserter;
D3D12_GRAPHICS_PIPELINE_STATE_DESC m_current_pso_desc;
struct hash_pso_desc
{
size_t operator()(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& pso_desc) const
{
return ((uintptr_t)pso_desc.PS.pShaderBytecode * 1000000) ^
((uintptr_t)pso_desc.VS.pShaderBytecode * 1000) ^
((uintptr_t)pso_desc.InputLayout.pInputElementDescs);
}
};
struct equality_pipeline_state_desc
{
bool operator()(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& lhs,
const D3D12_GRAPHICS_PIPELINE_STATE_DESC& rhs) const
{
return std::tie(
lhs.PS.pShaderBytecode, lhs.VS.pShaderBytecode, lhs.GS.pShaderBytecode,
lhs.RasterizerState.CullMode, lhs.DepthStencilState.DepthEnable,
lhs.DepthStencilState.DepthFunc, lhs.DepthStencilState.DepthWriteMask,
lhs.BlendState.RenderTarget[0].BlendEnable, lhs.BlendState.RenderTarget[0].BlendOp,
lhs.BlendState.RenderTarget[0].DestBlend, lhs.BlendState.RenderTarget[0].SrcBlend,
lhs.BlendState.RenderTarget[0].RenderTargetWriteMask, lhs.RTVFormats[0],
lhs.SampleDesc.Count) ==
std::tie(
rhs.PS.pShaderBytecode, rhs.VS.pShaderBytecode, rhs.GS.pShaderBytecode,
rhs.RasterizerState.CullMode, rhs.DepthStencilState.DepthEnable,
rhs.DepthStencilState.DepthFunc, rhs.DepthStencilState.DepthWriteMask,
rhs.BlendState.RenderTarget[0].BlendEnable, rhs.BlendState.RenderTarget[0].BlendOp,
rhs.BlendState.RenderTarget[0].DestBlend, rhs.BlendState.RenderTarget[0].SrcBlend,
rhs.BlendState.RenderTarget[0].RenderTargetWriteMask, rhs.RTVFormats[0],
rhs.SampleDesc.Count);
}
};
std::unordered_map<D3D12_GRAPHICS_PIPELINE_STATE_DESC, ID3D12PipelineState*, hash_pso_desc,
equality_pipeline_state_desc>
m_pso_map;
struct hash_small_pso_desc
{
size_t operator()(const SmallPsoDesc& pso_desc) const
{
return ((uintptr_t)pso_desc.vs_bytecode.pShaderBytecode << 10) ^
((uintptr_t)pso_desc.ps_bytecode.pShaderBytecode) + pso_desc.blend_state.hex +
pso_desc.depth_stencil_state.hex;
}
};
struct equality_small_pipeline_state_desc
{
bool operator()(const SmallPsoDesc& lhs, const SmallPsoDesc& rhs) const
{
return std::tie(lhs.ps_bytecode.pShaderBytecode, lhs.vs_bytecode.pShaderBytecode,
lhs.gs_bytecode.pShaderBytecode, lhs.input_layout, lhs.blend_state.hex,
lhs.depth_stencil_state.hex, lhs.rasterizer_state.hex) ==
std::tie(rhs.ps_bytecode.pShaderBytecode, rhs.vs_bytecode.pShaderBytecode,
rhs.gs_bytecode.pShaderBytecode, rhs.input_layout, rhs.blend_state.hex,
rhs.depth_stencil_state.hex, rhs.rasterizer_state.hex);
}
};
struct hash_shader_bytecode
{
size_t operator()(const D3D12_SHADER_BYTECODE& shader) const
{
return (uintptr_t)shader.pShaderBytecode;
}
};
struct equality_shader_bytecode
{
bool operator()(const D3D12_SHADER_BYTECODE& lhs, const D3D12_SHADER_BYTECODE& rhs) const
{
return lhs.pShaderBytecode == rhs.pShaderBytecode;
}
};
std::unordered_map<SmallPsoDesc, ID3D12PipelineState*, hash_small_pso_desc,
equality_small_pipeline_state_desc>
m_small_pso_map;
};
} // namespace DX12

View File

@ -1,394 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DUtil.h"
namespace DX12
{
D3DStreamBuffer::D3DStreamBuffer(size_t initial_size, size_t max_size,
bool* buffer_reallocation_notification)
: m_buffer_size(initial_size), m_buffer_max_size(max_size),
m_buffer_reallocation_notification(buffer_reallocation_notification)
{
CHECK(initial_size <= max_size,
"Error: Initial size for D3DStreamBuffer is greater than max_size.");
AllocateBuffer(initial_size);
// Register for callback from D3DCommandListManager each time a fence is queued to be signaled.
m_buffer_tracking_fence =
D3D::command_list_mgr->RegisterQueueFenceCallback(this, &D3DStreamBuffer::QueueFenceCallback);
}
D3DStreamBuffer::~D3DStreamBuffer()
{
D3D::command_list_mgr->RemoveQueueFenceCallback(this);
D3D12_RANGE write_range = {0, m_buffer_size};
m_buffer->Unmap(0, &write_range);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer);
}
// Function returns true if (worst case), needed to flush existing command list in order to
// ensure the GPU finished with current use of buffer. The calling function will need to take
// care to reset GPU state to what it was previously.
// Obviously this is non-performant, so the buffer max_size should be large enough to
// ensure this never happens.
bool D3DStreamBuffer::AllocateSpaceInBuffer(size_t allocation_size, size_t alignment,
bool allow_execute)
{
CHECK(allocation_size <= m_buffer_max_size, "Error: Requested allocation size in D3DStreamBuffer "
"is greater than max allowed size of backing "
"buffer.");
if (alignment && m_buffer_offset > 0)
{
size_t padding = m_buffer_offset % alignment;
// Check for case when adding alignment causes CPU offset to equal GPU offset,
// which would imply entire buffer is available (if not corrected).
if (m_buffer_offset < m_buffer_gpu_completion_offset &&
m_buffer_offset + alignment - padding >= m_buffer_gpu_completion_offset)
{
m_buffer_gpu_completion_offset++;
}
m_buffer_offset += alignment - padding;
if (m_buffer_offset > m_buffer_size)
{
m_buffer_offset = 0;
// Correct for case where CPU was about to run into GPU.
if (m_buffer_gpu_completion_offset == 0)
m_buffer_gpu_completion_offset = 1;
}
}
// First, check if there is available (not-in-use-by-GPU) space in existing buffer.
if (AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(allocation_size))
{
return false;
}
// Slow path. No room at front, or back, due to the GPU still (possibly) accessing parts of the
// buffer.
// Resize if possible, else stall.
bool command_list_executed = AttemptBufferResizeOrElseStall(allocation_size, allow_execute);
return command_list_executed;
}
// In VertexManager, we don't know the 'real' size of the allocation at the time
// we call AllocateSpaceInBuffer. We have to conservatively allocate 16MB (!).
// After the vertex data is written, we can choose to specify the 'real' allocation
// size to avoid wasting space.
void D3DStreamBuffer::OverrideSizeOfPreviousAllocation(size_t override_allocation_size)
{
m_buffer_offset = m_buffer_current_allocation_offset + override_allocation_size;
}
void D3DStreamBuffer::AllocateBuffer(size_t size)
{
// First, put existing buffer (if it exists) in deferred destruction list.
if (m_buffer)
{
D3D12_RANGE write_range = {0, m_buffer_size};
m_buffer->Unmap(0, &write_range);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_buffer);
m_buffer = nullptr;
}
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(size), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(&m_buffer)));
D3D12_RANGE read_range = {};
CheckHR(m_buffer->Map(0, &read_range, &m_buffer_cpu_address));
m_buffer_gpu_address = m_buffer->GetGPUVirtualAddress();
m_buffer_size = size;
// Start at the beginning of the new buffer.
m_buffer_gpu_completion_offset = 0;
m_buffer_current_allocation_offset = 0;
m_buffer_offset = 0;
// Notify observers.
if (m_buffer_reallocation_notification != nullptr)
*m_buffer_reallocation_notification = true;
// If we had any fences queued, they are no longer relevant.
ClearFences();
}
// Function returns true if current command list executed as a result of current command list
// referencing all of buffer's contents, AND we are already at max_size. No alternative but to
// flush. See comments above AllocateSpaceInBuffer for more details.
bool D3DStreamBuffer::AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute)
{
// This function will attempt to increase the size of the buffer, in response
// to running out of room. If the buffer is already at its maximum size specified
// at creation time, then stall waiting for the GPU to finish with the currently
// requested memory.
// Four possibilities, in order of desirability.
// 1) Best - Update GPU tracking progress - maybe the GPU has made enough
// progress such that there is now room.
// 2) Enlarge GPU buffer, up to our max allowed size.
// 3) Stall until GPU finishes existing queued work/advances offset
// in buffer enough to free room.
// 4) Worst - flush current GPU commands and wait, which will free all room
// in buffer.
// 1) First, let's check if GPU has already continued farther along buffer. If it has freed up
// enough of the buffer, we won't have to stall/allocate new memory.
UpdateGPUProgress();
// Now that GPU progress is updated, do we have room in the queue?
if (AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(allocation_size))
{
return false;
}
// 2) Next, prefer increasing buffer size instead of stalling.
size_t new_size = std::min(static_cast<size_t>(m_buffer_size * 1.5f), m_buffer_max_size);
new_size = std::max(new_size, allocation_size);
// Can we grow buffer further?
if (new_size > m_buffer_size)
{
AllocateBuffer(new_size);
m_buffer_offset = allocation_size;
return false;
}
// 3) Bad case - we need to stall.
// This might be ok if we have > 2 frames queued up or something, but
// we don't want to be stalling as we generate the front-of-queue frame.
const bool found_fence_to_wait_on = AttemptToFindExistingFenceToStallOn(allocation_size);
if (found_fence_to_wait_on)
{
return false;
}
// If allow_execute is false, the caller cannot handle command list execution (and the associated
// reset), so re-allocate the same-sized buffer.
if (!allow_execute)
{
AllocateBuffer(new_size);
m_buffer_offset = allocation_size;
return false;
}
// 4) If we get to this point, that means there is no outstanding queued GPU work, and we're still
// out of room.
// This is bad - and performance will suffer due to the CPU/GPU serialization, but the show must
// go on.
// This is guaranteed to succeed, since we've already CHECK'd that the allocation_size <=
// max_buffer_size, and flushing now and waiting will
// free all space in buffer.
D3D::command_list_mgr->ExecuteQueuedWork(true);
m_buffer_offset = allocation_size;
m_buffer_current_allocation_offset = 0;
m_buffer_gpu_completion_offset = 0;
ClearFences();
return true;
}
// Return true if space is found.
bool D3DStreamBuffer::AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(size_t allocation_size)
{
// First, check if there is room at end of buffer. Fast path.
if (m_buffer_offset >= m_buffer_gpu_completion_offset)
{
if (m_buffer_offset + allocation_size <= m_buffer_size)
{
m_buffer_current_allocation_offset = m_buffer_offset;
m_buffer_offset += allocation_size;
return true;
}
if (0 + allocation_size < m_buffer_gpu_completion_offset)
{
m_buffer_current_allocation_offset = 0;
m_buffer_offset = allocation_size;
return true;
}
}
// Next, check if there is room at front of buffer. Fast path.
if (m_buffer_offset < m_buffer_gpu_completion_offset &&
m_buffer_offset + allocation_size < m_buffer_gpu_completion_offset)
{
m_buffer_current_allocation_offset = m_buffer_offset;
m_buffer_offset += allocation_size;
return true;
}
return false;
}
// Returns true if fence was found and waited on.
bool D3DStreamBuffer::AttemptToFindExistingFenceToStallOn(size_t allocation_size)
{
// Let's find the first fence that will free up enough space in our buffer.
UINT64 fence_value_required = 0;
while (m_queued_fences.size() > 0)
{
FenceTrackingInformation tracking_information = m_queued_fences.front();
m_queued_fences.pop();
if (m_buffer_offset >= m_buffer_gpu_completion_offset)
{
// At this point, we need to wrap around, so req'd gpu offset is allocation_size.
if (tracking_information.buffer_offset >= allocation_size)
{
fence_value_required = tracking_information.fence_value;
m_buffer_current_allocation_offset = 0;
m_buffer_offset = allocation_size;
break;
}
}
else
{
if (m_buffer_offset + allocation_size <= m_buffer_size)
{
if (tracking_information.buffer_offset >= m_buffer_offset + allocation_size)
{
fence_value_required = tracking_information.fence_value;
m_buffer_current_allocation_offset = m_buffer_offset;
m_buffer_offset = m_buffer_offset + allocation_size;
break;
}
}
else
{
if (tracking_information.buffer_offset >= allocation_size)
{
fence_value_required = tracking_information.fence_value;
m_buffer_current_allocation_offset = 0;
m_buffer_offset = allocation_size;
break;
}
}
}
}
// Check if we found a fence we can wait on, for GPU to make sufficient progress.
// If so, wait on it.
if (fence_value_required > 0)
{
D3D::command_list_mgr->WaitOnCPUForFence(m_buffer_tracking_fence, fence_value_required);
return true;
}
return false;
}
void D3DStreamBuffer::UpdateGPUProgress()
{
const UINT64 fence_value = m_buffer_tracking_fence->GetCompletedValue();
while (m_queued_fences.size() > 0)
{
FenceTrackingInformation tracking_information = m_queued_fences.front();
m_queued_fences.pop();
// Has fence gone past this point?
if (fence_value >= tracking_information.fence_value)
{
m_buffer_gpu_completion_offset = tracking_information.buffer_offset;
}
else
{
// Fences are stored in ascending order, so once we hit a fence we haven't yet crossed on GPU,
// abort search.
break;
}
}
}
void D3DStreamBuffer::QueueFenceCallback(void* owning_object, UINT64 fence_value)
{
D3DStreamBuffer* owning_stream_buffer = reinterpret_cast<D3DStreamBuffer*>(owning_object);
if (owning_stream_buffer->HasBufferOffsetChangedSinceLastFence())
owning_stream_buffer->QueueFence(fence_value);
}
void D3DStreamBuffer::ClearFences()
{
while (!m_queued_fences.empty())
m_queued_fences.pop();
}
bool D3DStreamBuffer::HasBufferOffsetChangedSinceLastFence() const
{
if (m_queued_fences.empty())
return true;
// Don't add a new fence tracking entry when our offset hasn't changed.
return (m_queued_fences.back().buffer_offset != m_buffer_offset);
}
void D3DStreamBuffer::QueueFence(UINT64 fence_value)
{
FenceTrackingInformation tracking_information = {};
tracking_information.fence_value = fence_value;
tracking_information.buffer_offset = m_buffer_offset;
m_queued_fences.push(tracking_information);
}
ID3D12Resource* D3DStreamBuffer::GetBuffer() const
{
return m_buffer;
}
D3D12_GPU_VIRTUAL_ADDRESS D3DStreamBuffer::GetGPUAddressOfCurrentAllocation() const
{
return m_buffer_gpu_address + m_buffer_current_allocation_offset;
}
void* D3DStreamBuffer::GetCPUAddressOfCurrentAllocation() const
{
return static_cast<u8*>(m_buffer_cpu_address) + m_buffer_current_allocation_offset;
}
size_t D3DStreamBuffer::GetOffsetOfCurrentAllocation() const
{
return m_buffer_current_allocation_offset;
}
size_t D3DStreamBuffer::GetSize() const
{
return m_buffer_size;
}
void* D3DStreamBuffer::GetBaseCPUAddress() const
{
return m_buffer_cpu_address;
}
D3D12_GPU_VIRTUAL_ADDRESS D3DStreamBuffer::GetBaseGPUAddress() const
{
return m_buffer_gpu_address;
}
}

View File

@ -1,71 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <queue>
struct ID3D12Resource;
namespace DX12
{
class D3DStreamBuffer
{
public:
D3DStreamBuffer(size_t initial_size, size_t max_size, bool* buffer_reallocation_notification);
~D3DStreamBuffer();
bool AllocateSpaceInBuffer(size_t allocation_size, size_t alignment, bool allow_execute = true);
void OverrideSizeOfPreviousAllocation(size_t override_allocation_size);
void* GetBaseCPUAddress() const;
D3D12_GPU_VIRTUAL_ADDRESS GetBaseGPUAddress() const;
ID3D12Resource* GetBuffer() const;
void* GetCPUAddressOfCurrentAllocation() const;
D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddressOfCurrentAllocation() const;
size_t GetOffsetOfCurrentAllocation() const;
size_t GetSize() const;
static void QueueFenceCallback(void* owning_object, UINT64 fence_value);
private:
void AllocateBuffer(size_t size);
bool AttemptBufferResizeOrElseStall(size_t allocation_size, bool allow_execute);
bool AttemptToAllocateOutOfExistingUnusedSpaceInBuffer(size_t allocation_size);
bool AttemptToFindExistingFenceToStallOn(size_t allocation_size);
void UpdateGPUProgress();
void ClearFences();
bool HasBufferOffsetChangedSinceLastFence() const;
void QueueFence(UINT64 fence_value);
struct FenceTrackingInformation
{
UINT64 fence_value;
size_t buffer_offset;
};
std::queue<FenceTrackingInformation> m_queued_fences;
ID3D12Fence* m_buffer_tracking_fence = nullptr;
ID3D12Resource* m_buffer = nullptr;
void* m_buffer_cpu_address = nullptr;
D3D12_GPU_VIRTUAL_ADDRESS m_buffer_gpu_address = {};
size_t m_buffer_current_allocation_offset = 0;
size_t m_buffer_offset = 0;
size_t m_buffer_size = 0;
const size_t m_buffer_max_size = 0;
size_t m_buffer_gpu_completion_offset = 0;
bool* m_buffer_reallocation_notification = nullptr;
};
}

View File

@ -1,309 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <memory>
#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
namespace DX12
{
namespace D3D
{
constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 4 * 1024 * 1024;
constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
static std::unique_ptr<D3DStreamBuffer> s_texture_upload_stream_buffer;
void CleanupPersistentD3DTextureResources()
{
s_texture_upload_stream_buffer.reset();
}
void ReplaceRGBATexture2D(ID3D12Resource* texture12, const u8* buffer, unsigned int width,
unsigned int height, unsigned int src_pitch, unsigned int level,
D3D12_RESOURCE_STATES current_resource_state)
{
const unsigned int upload_size =
Common::AlignUp(src_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) * height;
ID3D12Resource* upload_buffer = nullptr;
size_t upload_buffer_offset = 0;
u8* dest_data = nullptr;
if (upload_size > MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
{
// If the texture is too large to fit in the upload buffer, create a temporary buffer instead.
// This will only be the case for large (e.g. 8192x8192) textures from custom texture packs.
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(upload_size), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(&upload_buffer)));
D3D12_RANGE read_range = {};
CheckHR(upload_buffer->Map(0, &read_range, reinterpret_cast<void**>(&dest_data)));
}
else
{
if (!s_texture_upload_stream_buffer)
s_texture_upload_stream_buffer = std::make_unique<D3DStreamBuffer>(
INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE, MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE, nullptr);
s_texture_upload_stream_buffer->AllocateSpaceInBuffer(upload_size,
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
upload_buffer = s_texture_upload_stream_buffer->GetBuffer();
upload_buffer_offset = s_texture_upload_stream_buffer->GetOffsetOfCurrentAllocation();
dest_data =
reinterpret_cast<u8*>(s_texture_upload_stream_buffer->GetCPUAddressOfCurrentAllocation());
}
ResourceBarrier(current_command_list, texture12, current_resource_state,
D3D12_RESOURCE_STATE_COPY_DEST, level);
D3D12_PLACED_SUBRESOURCE_FOOTPRINT upload_footprint = {};
u32 upload_rows = 0;
u64 upload_row_size_in_bytes = 0;
u64 upload_total_bytes = 0;
D3D::device12->GetCopyableFootprints(&texture12->GetDesc(), level, 1, upload_buffer_offset,
&upload_footprint, &upload_rows, &upload_row_size_in_bytes,
&upload_total_bytes);
const u8* src_data = reinterpret_cast<const u8*>(buffer);
for (u32 y = 0; y < upload_rows; ++y)
{
memcpy(dest_data + upload_footprint.Footprint.RowPitch * y, src_data + src_pitch * y,
upload_row_size_in_bytes);
}
D3D::current_command_list->CopyTextureRegion(
&CD3DX12_TEXTURE_COPY_LOCATION(texture12, level), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(upload_buffer, upload_footprint), nullptr);
ResourceBarrier(D3D::current_command_list, texture12, D3D12_RESOURCE_STATE_COPY_DEST,
current_resource_state, level);
// Release temporary buffer after commands complete.
// We block here because otherwise if there was a large number of texture uploads, we may run out
// of memory.
if (!s_texture_upload_stream_buffer ||
upload_buffer != s_texture_upload_stream_buffer->GetBuffer())
{
D3D12_RANGE write_range = {0, upload_size};
upload_buffer->Unmap(0, &write_range);
D3D::command_list_mgr->ExecuteQueuedWork(true);
upload_buffer->Release();
}
}
} // namespace
D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, u32 bind,
DXGI_FORMAT fmt, unsigned int levels, unsigned int slices,
D3D12_SUBRESOURCE_DATA* data)
{
ID3D12Resource* texture12 = nullptr;
D3D12_RESOURCE_DESC texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(fmt, width, height, slices, levels);
D3D12_CLEAR_VALUE optimized_clear_value = {};
optimized_clear_value.Format = fmt;
if (bind & TEXTURE_BIND_FLAG_RENDER_TARGET)
{
texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
optimized_clear_value.Color[0] = 0.0f;
optimized_clear_value.Color[1] = 0.0f;
optimized_clear_value.Color[2] = 0.0f;
optimized_clear_value.Color[3] = 1.0f;
}
if (bind & TEXTURE_BIND_FLAG_DEPTH_STENCIL)
{
texdesc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
optimized_clear_value.DepthStencil.Depth = 0.0f;
optimized_clear_value.DepthStencil.Stencil = 0;
}
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC(texdesc12), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
&optimized_clear_value, IID_PPV_ARGS(&texture12)));
D3D::SetDebugObjectName12(texture12, "Texture created via D3DTexture2D::Create");
D3DTexture2D* ret =
new D3DTexture2D(texture12, bind, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
if (data)
{
DX12::D3D::ReplaceRGBATexture2D(texture12, reinterpret_cast<const u8*>(data->pData), width,
height, static_cast<unsigned int>(data->RowPitch), 0,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
SAFE_RELEASE(texture12);
return ret;
}
void D3DTexture2D::AddRef()
{
m_ref.fetch_add(1);
}
UINT D3DTexture2D::Release()
{
// fetch_sub returns the value held before the subtraction.
if (m_ref.fetch_sub(1) == 1)
{
delete this;
return 0;
}
return m_ref.load();
}
D3D12_RESOURCE_STATES D3DTexture2D::GetResourceUsageState() const
{
return m_resource_state;
}
bool D3DTexture2D::GetMultisampled() const
{
return m_multisampled;
}
ID3D12Resource* D3DTexture2D::GetTex12() const
{
return m_tex12;
}
D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12CPU() const
{
return m_srv12_cpu;
}
D3D12_GPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12GPU() const
{
return m_srv12_gpu;
}
D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetSRV12GPUCPUShadow() const
{
return m_srv12_gpu_cpu_shadow;
}
D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetDSV12() const
{
return m_dsv12;
}
D3D12_CPU_DESCRIPTOR_HANDLE D3DTexture2D::GetRTV12() const
{
return m_rtv12;
}
D3DTexture2D::D3DTexture2D(ID3D12Resource* texptr, u32 bind, DXGI_FORMAT srv_format,
DXGI_FORMAT dsv_format, DXGI_FORMAT rtv_format, bool multisampled,
D3D12_RESOURCE_STATES resource_state)
: m_tex12(texptr), m_resource_state(resource_state), m_multisampled(multisampled)
{
D3D12_SRV_DIMENSION srv_dim12 =
multisampled ? D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY : D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
D3D12_DSV_DIMENSION dsv_dim12 =
multisampled ? D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
D3D12_RTV_DIMENSION rtv_dim12 =
multisampled ? D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
if (bind & TEXTURE_BIND_FLAG_SHADER_RESOURCE)
{
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {
srv_format, // DXGI_FORMAT Format
srv_dim12 // D3D12_SRV_DIMENSION ViewDimension
};
if (srv_dim12 == D3D12_SRV_DIMENSION_TEXTURE2DARRAY)
{
srv_desc.Texture2DArray.MipLevels = -1;
srv_desc.Texture2DArray.MostDetailedMip = 0;
srv_desc.Texture2DArray.ResourceMinLODClamp = 0;
srv_desc.Texture2DArray.ArraySize = -1;
}
else
{
srv_desc.Texture2DMSArray.ArraySize = -1;
}
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
CHECK(
D3D::gpu_descriptor_heap_mgr->Allocate(&m_srv12_cpu, &m_srv12_gpu, &m_srv12_gpu_cpu_shadow),
"Error: Ran out of permenant slots in GPU descriptor heap, but don't support rolling over "
"heap.");
D3D::device12->CreateShaderResourceView(m_tex12, &srv_desc, m_srv12_cpu);
D3D::device12->CreateShaderResourceView(m_tex12, &srv_desc, m_srv12_gpu_cpu_shadow);
}
if (bind & TEXTURE_BIND_FLAG_DEPTH_STENCIL)
{
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {
dsv_format, // DXGI_FORMAT Format
dsv_dim12, // D3D12_DSV_DIMENSION
D3D12_DSV_FLAG_NONE // D3D12_DSV_FLAG Flags
};
if (dsv_dim12 == D3D12_DSV_DIMENSION_TEXTURE2DARRAY)
dsv_desc.Texture2DArray.ArraySize = -1;
else
dsv_desc.Texture2DMSArray.ArraySize = -1;
D3D::dsv_descriptor_heap_mgr->Allocate(&m_dsv12);
D3D::device12->CreateDepthStencilView(m_tex12, &dsv_desc, m_dsv12);
}
if (bind & TEXTURE_BIND_FLAG_RENDER_TARGET)
{
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {
rtv_format, // DXGI_FORMAT Format
rtv_dim12 // D3D12_RTV_DIMENSION ViewDimension
};
if (rtv_dim12 == D3D12_RTV_DIMENSION_TEXTURE2DARRAY)
rtv_desc.Texture2DArray.ArraySize = -1;
else
rtv_desc.Texture2DMSArray.ArraySize = -1;
D3D::rtv_descriptor_heap_mgr->Allocate(&m_rtv12);
D3D::device12->CreateRenderTargetView(m_tex12, &rtv_desc, m_rtv12);
}
m_tex12->AddRef();
}
void D3DTexture2D::TransitionToResourceState(ID3D12GraphicsCommandList* command_list,
D3D12_RESOURCE_STATES state_after)
{
DX12::D3D::ResourceBarrier(command_list, m_tex12, m_resource_state, state_after,
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
m_resource_state = state_after;
}
D3DTexture2D::~D3DTexture2D()
{
DX12::D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_tex12);
}
} // namespace DX12

View File

@ -1,84 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/D3DBase.h"
namespace DX12
{
enum TEXTURE_BIND_FLAG : u32
{
TEXTURE_BIND_FLAG_SHADER_RESOURCE = (1 << 0),
TEXTURE_BIND_FLAG_RENDER_TARGET = (1 << 1),
TEXTURE_BIND_FLAG_DEPTH_STENCIL = (1 << 2)
};
namespace D3D
{
void ReplaceRGBATexture2D(
ID3D12Resource* pTexture, const u8* buffer, unsigned int width, unsigned int height,
unsigned int src_pitch, unsigned int level,
D3D12_RESOURCE_STATES current_resource_state = D3D12_RESOURCE_STATE_COMMON);
void CleanupPersistentD3DTextureResources();
}
class D3DTexture2D
{
public:
// there are two ways to create a D3DTexture2D object:
// either create an ID3D12Resource object, pass it to the constructor and specify what views
// to create
// or let the texture automatically be created by D3DTexture2D::Create
D3DTexture2D(ID3D12Resource* texptr, u32 bind, DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false,
D3D12_RESOURCE_STATES resource_state = D3D12_RESOURCE_STATE_COMMON);
static D3DTexture2D* Create(unsigned int width, unsigned int height, u32 bind, DXGI_FORMAT fmt,
unsigned int levels = 1, unsigned int slices = 1,
D3D12_SUBRESOURCE_DATA* data = nullptr);
void TransitionToResourceState(ID3D12GraphicsCommandList* command_list,
D3D12_RESOURCE_STATES state_after);
// reference counting, use AddRef() when creating a new reference and Release() it when you don't
// need it anymore
void AddRef();
UINT Release();
ID3D12Resource* GetTex12() const;
D3D12_CPU_DESCRIPTOR_HANDLE GetSRV12CPU() const;
D3D12_GPU_DESCRIPTOR_HANDLE GetSRV12GPU() const;
D3D12_CPU_DESCRIPTOR_HANDLE GetSRV12GPUCPUShadow() const;
D3D12_CPU_DESCRIPTOR_HANDLE GetDSV12() const;
D3D12_CPU_DESCRIPTOR_HANDLE GetRTV12() const;
D3D12_RESOURCE_STATES GetResourceUsageState() const;
bool GetMultisampled() const;
private:
~D3DTexture2D();
ID3D12Resource* m_tex12 = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE m_srv12_cpu = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_srv12_gpu = {};
D3D12_CPU_DESCRIPTOR_HANDLE m_srv12_gpu_cpu_shadow = {};
D3D12_CPU_DESCRIPTOR_HANDLE m_dsv12 = {};
D3D12_CPU_DESCRIPTOR_HANDLE m_rtv12 = {};
D3D12_RESOURCE_STATES m_resource_state = D3D12_RESOURCE_STATE_COMMON;
bool m_multisampled = false;
std::atomic<unsigned long> m_ref = 1;
};
} // namespace DX12

View File

@ -1,902 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/D3DUtil.h"
#include <cctype>
#include <list>
#include <memory>
#include <string>
#include "Common/Align.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoCommon/VideoBackendBase.h"
namespace DX12
{
namespace D3D
{
void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource,
D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after,
UINT subresource)
{
if (state_before == state_after)
return;
CHECK(resource, "NULL resource passed to ResourceBarrier.");
D3D12_RESOURCE_BARRIER resourceBarrierDesc = {
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, // D3D12_RESOURCE_TRANSITION_BARRIER_DESC Transition
D3D12_RESOURCE_BARRIER_FLAG_NONE, // D3D12_RESOURCE_BARRIER_FLAGS Flags
// D3D12_RESOURCE_TRANSITION_BARRIER_DESC Transition
{
resource, // ID3D12Resource *pResource;
subresource, // UINT Subresource;
state_before, // UINT StateBefore;
state_after // UINT StateAfter;
}};
command_list->ResourceBarrier(1, &resourceBarrierDesc);
}
// Ring buffer class, shared between the draw* functions
class UtilVertexBuffer
{
public:
explicit UtilVertexBuffer(size_t size)
{
m_stream_buffer = std::make_unique<D3DStreamBuffer>(size, size * 4, nullptr);
}
~UtilVertexBuffer() {}
size_t GetSize() const { return m_stream_buffer->GetSize(); }
// returns vertex offset to the new data
size_t AppendData(const void* data, size_t size, size_t vertex_size)
{
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false);
memcpy(static_cast<u8*>(m_stream_buffer->GetCPUAddressOfCurrentAllocation()), data, size);
return m_stream_buffer->GetOffsetOfCurrentAllocation() / vertex_size;
}
size_t BeginAppendData(void** write_ptr, size_t size, size_t vertex_size)
{
m_stream_buffer->AllocateSpaceInBuffer(size, vertex_size, false);
*write_ptr = m_stream_buffer->GetCPUAddressOfCurrentAllocation();
return m_stream_buffer->GetOffsetOfCurrentAllocation() / vertex_size;
}
void EndAppendData()
{
// No-op on DX12.
}
ID3D12Resource* GetBuffer12() { return m_stream_buffer->GetBuffer(); }
private:
std::unique_ptr<D3DStreamBuffer> m_stream_buffer;
};
CD3DFont font;
static std::unique_ptr<UtilVertexBuffer> util_vbuf_stq;
static std::unique_ptr<UtilVertexBuffer> util_vbuf_clearq;
static std::unique_ptr<UtilVertexBuffer> util_vbuf_efbpokequads;
static const unsigned int s_max_num_vertices = 8000 * 6;
struct FONT2DVERTEX
{
float x, y, z;
float col[4];
float tu, tv;
};
FONT2DVERTEX InitFont2DVertex(float x, float y, u32 color, float tu, float tv)
{
FONT2DVERTEX v;
v.x = x;
v.y = y;
v.z = 0;
v.tu = tu;
v.tv = tv;
v.col[0] = (static_cast<float>((color >> 16) & 0xFF)) / 255.f;
v.col[1] = (static_cast<float>((color >> 8) & 0xFF)) / 255.f;
v.col[2] = (static_cast<float>((color >> 0) & 0xFF)) / 255.f;
v.col[3] = (static_cast<float>((color >> 24) & 0xFF)) / 255.f;
return v;
}
CD3DFont::CD3DFont()
{
}
constexpr const char fontpixshader[] = {
"Texture2D tex2D;\n"
"SamplerState linearSampler\n"
"{\n"
" Filter = MIN_MAG_MIP_LINEAR;\n"
" AddressU = D3D11_TEXTURE_ADDRESS_BORDER;\n"
" AddressV = D3D11_TEXTURE_ADDRESS_BORDER;\n"
" BorderColor = float4(0.f, 0.f, 0.f, 0.f);\n"
"};\n"
"struct PS_INPUT\n"
"{\n"
" float4 pos : SV_POSITION;\n"
" float4 col : COLOR;\n"
" float2 tex : TEXCOORD;\n"
"};\n"
"float4 main( PS_INPUT input ) : SV_Target\n"
"{\n"
" return tex2D.Sample( linearSampler, input.tex ) * input.col;\n"
"};\n"};
constexpr const char fontvertshader[] = {"struct VS_INPUT\n"
"{\n"
" float4 pos : POSITION;\n"
" float4 col : COLOR;\n"
" float2 tex : TEXCOORD;\n"
"};\n"
"struct PS_INPUT\n"
"{\n"
" float4 pos : SV_POSITION;\n"
" float4 col : COLOR;\n"
" float2 tex : TEXCOORD;\n"
"};\n"
"PS_INPUT main( VS_INPUT input )\n"
"{\n"
" PS_INPUT output;\n"
" output.pos = input.pos;\n"
" output.col = input.col;\n"
" output.tex = input.tex;\n"
" return output;\n"
"};\n"};
int CD3DFont::Init()
{
// Create vertex buffer for the letters
// Prepare to create a bitmap
unsigned int* bitmap_bits;
BITMAPINFO bmi;
ZeroMemory(&bmi.bmiHeader, sizeof(BITMAPINFOHEADER));
bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bmi.bmiHeader.biWidth = static_cast<int>(m_tex_width);
bmi.bmiHeader.biHeight = -static_cast<int>(m_tex_height);
bmi.bmiHeader.biPlanes = 1;
bmi.bmiHeader.biCompression = BI_RGB;
bmi.bmiHeader.biBitCount = 32;
// Create a DC and a bitmap for the font
HDC hDC = CreateCompatibleDC(nullptr);
HBITMAP hbmBitmap = CreateDIBSection(hDC, &bmi, DIB_RGB_COLORS,
reinterpret_cast<void**>(&bitmap_bits), nullptr, 0);
SetMapMode(hDC, MM_TEXT);
// create a GDI font
HFONT hFont =
CreateFont(24, 0, 0, 0, FW_NORMAL, FALSE, FALSE, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS,
CLIP_DEFAULT_PRECIS, PROOF_QUALITY, VARIABLE_PITCH, _T("Tahoma"));
if (nullptr == hFont)
return E_FAIL;
HGDIOBJ hOldbmBitmap = SelectObject(hDC, hbmBitmap);
HGDIOBJ hOldFont = SelectObject(hDC, hFont);
// Set text properties
SetTextColor(hDC, 0xFFFFFF);
SetBkColor(hDC, 0);
SetTextAlign(hDC, TA_TOP);
TEXTMETRICW tm;
GetTextMetricsW(hDC, &tm);
m_line_height = tm.tmHeight;
// Loop through all printable characters and output them to the bitmap
// Meanwhile, keep track of the corresponding tex coords for each character.
int x = 0, y = 0;
char str[2] = "\0";
for (int c = 0; c < 127 - 32; c++)
{
str[0] = c + 32;
SIZE size;
GetTextExtentPoint32A(hDC, str, 1, &size);
if (static_cast<int>(x + size.cx + 1) > m_tex_width)
{
x = 0;
y += m_line_height;
}
ExtTextOutA(hDC, x + 1, y + 0, ETO_OPAQUE | ETO_CLIPPED, nullptr, str, 1, nullptr);
m_tex_coords[c][0] = (static_cast<float>(x + 0)) / m_tex_width;
m_tex_coords[c][1] = (static_cast<float>(y + 0)) / m_tex_height;
m_tex_coords[c][2] = (static_cast<float>(x + 0 + size.cx)) / m_tex_width;
m_tex_coords[c][3] = (static_cast<float>(y + 0 + size.cy)) / m_tex_height;
x += size.cx + 3; // 3 to work around annoying ij conflict (part of the j ends up with the i)
}
// Create a new texture for the font
// possible optimization: store the converted data in a buffer and fill the texture on creation.
// That way, we can use a static texture
std::unique_ptr<byte[]> tex_initial_data(new byte[m_tex_width * m_tex_height * 4]);
for (y = 0; y < m_tex_height; y++)
{
u32* pDst32 =
reinterpret_cast<u32*>(static_cast<u8*>(tex_initial_data.get()) + y * m_tex_width * 4);
for (x = 0; x < m_tex_width; x++)
{
const u8 bAlpha = (bitmap_bits[m_tex_width * y + x] & 0xff);
*pDst32++ = (((bAlpha << 4) | bAlpha) << 24) | 0xFFFFFF;
}
}
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_tex_width, m_tex_height, 1, 1),
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&m_texture12)));
D3D::SetDebugObjectName12(m_texture12, "texture of a CD3DFont object");
ID3D12Resource* temporaryFontTextureUploadBuffer;
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(Common::AlignUp(static_cast<unsigned int>(m_tex_width) * 4,
D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) *
m_tex_height),
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&temporaryFontTextureUploadBuffer)));
D3D12_SUBRESOURCE_DATA subresource_data_dest = {
tex_initial_data.get(), // const void *pData;
m_tex_width * 4, // LONG_PTR RowPitch;
0 // LONG_PTR SlicePitch;
};
D3D::ResourceBarrier(D3D::current_command_list, m_texture12, D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
CHECK(0 != UpdateSubresources(D3D::current_command_list, m_texture12,
temporaryFontTextureUploadBuffer, 0, 0, 1, &subresource_data_dest),
"UpdateSubresources call failed.");
command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(
temporaryFontTextureUploadBuffer);
tex_initial_data.release();
D3D::gpu_descriptor_heap_mgr->Allocate(&m_texture12_cpu, &m_texture12_gpu);
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {};
srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = -1;
D3D::device12->CreateShaderResourceView(m_texture12, &srv_desc, m_texture12_cpu);
D3D::ResourceBarrier(D3D::current_command_list, m_texture12, D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
SelectObject(hDC, hOldbmBitmap);
DeleteObject(hbmBitmap);
SelectObject(hDC, hOldFont);
DeleteObject(hFont);
// setup device objects for drawing
ID3DBlob* psbytecode = nullptr;
D3D::CompilePixelShader(fontpixshader, &psbytecode);
if (psbytecode == nullptr)
PanicAlert("Failed to compile pixel shader, %s %d\n", __FILE__, __LINE__);
m_pshader12.pShaderBytecode = psbytecode->GetBufferPointer();
m_pshader12.BytecodeLength = psbytecode->GetBufferSize();
ID3DBlob* vsbytecode = nullptr;
D3D::CompileVertexShader(fontvertshader, &vsbytecode);
if (vsbytecode == nullptr)
PanicAlert("Failed to compile vertex shader, %s %d\n", __FILE__, __LINE__);
m_vshader12.pShaderBytecode = vsbytecode->GetBufferPointer();
m_vshader12.BytecodeLength = vsbytecode->GetBufferSize();
const D3D12_INPUT_ELEMENT_DESC desc[] = {
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
0},
{"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12,
D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 28, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
0},
};
m_input_layout12.NumElements = ARRAYSIZE(desc);
m_input_layout12.pInputElementDescs = desc;
D3D12_BLEND_DESC blenddesc = {};
blenddesc.AlphaToCoverageEnable = FALSE;
blenddesc.IndependentBlendEnable = FALSE;
blenddesc.RenderTarget[0].BlendEnable = TRUE;
blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
blenddesc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
blenddesc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
blenddesc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
blenddesc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
blenddesc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
blenddesc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
blenddesc.RenderTarget[0].LogicOp = D3D12_LOGIC_OP_NOOP;
blenddesc.RenderTarget[0].LogicOpEnable = FALSE;
m_blendstate12 = blenddesc;
D3D12_RASTERIZER_DESC rastdesc = {
D3D12_FILL_MODE_SOLID, D3D12_CULL_MODE_NONE, false, 0, 0.f, 0.f, false, false, false, false};
m_raststate12 = rastdesc;
const unsigned int text_vb_size = s_max_num_vertices * sizeof(FONT2DVERTEX);
m_vertex_buffer = std::make_unique<D3DStreamBuffer>(text_vb_size * 2, text_vb_size * 16, nullptr);
D3D12_GRAPHICS_PIPELINE_STATE_DESC text_pso_desc = {
default_root_signature, // ID3D12RootSignature *pRootSignature;
{vsbytecode->GetBufferPointer(), vsbytecode->GetBufferSize()}, // D3D12_SHADER_BYTECODE VS;
{psbytecode->GetBufferPointer(), psbytecode->GetBufferSize()}, // D3D12_SHADER_BYTECODE PS;
{}, // D3D12_SHADER_BYTECODE DS;
{}, // D3D12_SHADER_BYTECODE HS;
{}, // D3D12_SHADER_BYTECODE GS;
{}, // D3D12_STREAM_OUTPUT_DESC StreamOutput
blenddesc, // D3D12_BLEND_DESC BlendState;
UINT_MAX, // UINT SampleMask;
rastdesc, // D3D12_RASTERIZER_DESC RasterizerState
CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT), // D3D12_DEPTH_STENCIL_DESC DepthStencilState
m_input_layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_STRIP_CUT_VALUE
// IndexBufferProperties
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE
// PrimitiveTopologyType
1, // UINT NumRenderTargets
{DXGI_FORMAT_R8G8B8A8_UNORM}, // DXGI_FORMAT RTVFormats[8]
DXGI_FORMAT_UNKNOWN, // DXGI_FORMAT DSVFormat
{1 /* UINT Count */, 0 /* UINT Quality */} // DXGI_SAMPLE_DESC SampleDesc
};
CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&text_pso_desc, &m_pso));
SAFE_RELEASE(psbytecode);
SAFE_RELEASE(vsbytecode);
return S_OK;
}
int CD3DFont::Shutdown()
{
m_vertex_buffer.reset();
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_texture12);
return S_OK;
}
int CD3DFont::DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor,
const std::string& text)
{
if (!m_vertex_buffer)
return 0;
float scale_x = 1 / static_cast<float>(D3D::GetBackBufferWidth()) * 2.f;
float scale_y = 1 / static_cast<float>(D3D::GetBackBufferHeight()) * 2.f;
float sizeratio = size / static_cast<float>(m_line_height);
// translate starting positions
float sx = x * scale_x - 1.f;
float sy = 1.f - y * scale_y;
// set general pipeline state
D3D::current_command_list->SetPipelineState(m_pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV,
m_texture12_gpu);
// upper bound is nchars * 6, assuming no spaces
m_vertex_buffer->AllocateSpaceInBuffer(static_cast<u32>(text.length()) * 6 * sizeof(FONT2DVERTEX),
sizeof(FONT2DVERTEX), false);
FONT2DVERTEX* vertices12 =
reinterpret_cast<FONT2DVERTEX*>(m_vertex_buffer->GetCPUAddressOfCurrentAllocation());
int num_triangles = 0;
float start_x = sx;
for (char c : text)
{
if (c == '\n')
{
sx = start_x;
sy -= scale_y * size;
}
if (!std::isprint(c))
continue;
c -= 32;
float tx1 = m_tex_coords[c][0];
float ty1 = m_tex_coords[c][1];
float tx2 = m_tex_coords[c][2];
float ty2 = m_tex_coords[c][3];
float w = static_cast<float>(tx2 - tx1) * m_tex_width * scale_x * sizeratio;
float h = static_cast<float>(ty1 - ty2) * m_tex_height * scale_y * sizeratio;
FONT2DVERTEX v[6];
v[0] = InitFont2DVertex(sx, sy + h, dwColor, tx1, ty2);
v[1] = InitFont2DVertex(sx, sy, dwColor, tx1, ty1);
v[2] = InitFont2DVertex(sx + w, sy + h, dwColor, tx2, ty2);
v[3] = InitFont2DVertex(sx + w, sy, dwColor, tx2, ty1);
v[4] = v[2];
v[5] = v[1];
memcpy(vertices12, v, 6 * sizeof(FONT2DVERTEX));
vertices12 += 6;
num_triangles += 2;
sx += w + spacing * scale_x * size;
}
// Render the vertex buffer
if (num_triangles > 0)
{
u32 written_size = num_triangles * 3 * sizeof(FONT2DVERTEX);
m_vertex_buffer->OverrideSizeOfPreviousAllocation(written_size);
D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer->GetGPUAddressOfCurrentAllocation(),
written_size, sizeof(FONT2DVERTEX)};
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::current_command_list->DrawInstanced(3 * num_triangles, 1, 0, 0);
}
return S_OK;
}
D3D12_CPU_DESCRIPTOR_HANDLE linear_copy_sampler12CPU;
D3D12_GPU_DESCRIPTOR_HANDLE linear_copy_sampler12GPU;
D3D12_CPU_DESCRIPTOR_HANDLE point_copy_sampler12CPU;
D3D12_GPU_DESCRIPTOR_HANDLE point_copy_sampler12GPU;
struct STQVertex
{
float x, y, z, u, v, w, g;
};
struct ClearVertex
{
float x, y, z;
u32 col;
};
struct ColVertex
{
float x, y, z;
u32 col;
};
struct
{
float u1, v1, u2, v2, S, G;
} tex_quad_data;
struct
{
u32 col;
float z;
} clear_quad_data;
// ring buffer offsets
static size_t stq_offset;
static size_t clearq_offset;
void InitUtils()
{
util_vbuf_stq = std::make_unique<UtilVertexBuffer>(0x10000);
util_vbuf_clearq = std::make_unique<UtilVertexBuffer>(0x10000);
util_vbuf_efbpokequads = std::make_unique<UtilVertexBuffer>(0x100000);
D3D12_SAMPLER_DESC point_sampler_desc = {D3D12_FILTER_MIN_MAG_MIP_POINT,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
0.f,
1,
D3D12_COMPARISON_FUNC_ALWAYS,
{0.f, 0.f, 0.f, 0.f},
0.f,
0.f};
D3D::sampler_descriptor_heap_mgr->Allocate(&point_copy_sampler12CPU, &point_copy_sampler12GPU);
D3D::device12->CreateSampler(&point_sampler_desc, point_copy_sampler12CPU);
D3D12_SAMPLER_DESC linear_sampler_desc = {D3D12_FILTER_MIN_MAG_MIP_LINEAR,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
D3D12_TEXTURE_ADDRESS_MODE_BORDER,
0.f,
1,
D3D12_COMPARISON_FUNC_ALWAYS,
{0.f, 0.f, 0.f, 0.f},
0.f,
0.f};
D3D::sampler_descriptor_heap_mgr->Allocate(&linear_copy_sampler12CPU, &linear_copy_sampler12GPU);
D3D::device12->CreateSampler(&linear_sampler_desc, linear_copy_sampler12CPU);
// cached data used to avoid unnecessarily reloading the vertex buffers
memset(&tex_quad_data, 0, sizeof(tex_quad_data));
memset(&clear_quad_data, 0, sizeof(clear_quad_data));
font.Init();
}
void ShutdownUtils()
{
font.Shutdown();
util_vbuf_stq.reset();
util_vbuf_clearq.reset();
util_vbuf_efbpokequads.reset();
}
void SetPointCopySampler()
{
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER,
point_copy_sampler12GPU);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true);
}
void SetLinearCopySampler()
{
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SAMPLER,
linear_copy_sampler12GPU);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_SAMPLERS, true);
}
void SetViewportAndScissor(int top_left_x, int top_left_y, int width, int height, float min_depth,
float max_depth)
{
D3D12_VIEWPORT viewport = {static_cast<float>(top_left_x),
static_cast<float>(top_left_y),
static_cast<float>(width),
static_cast<float>(height),
min_depth,
max_depth};
D3D12_RECT scissor = {static_cast<LONG>(top_left_x), static_cast<LONG>(top_left_y),
static_cast<LONG>(top_left_x + width),
static_cast<LONG>(top_left_y + height)};
D3D::current_command_list->RSSetViewports(1, &viewport);
D3D::current_command_list->RSSetScissorRects(1, &scissor);
};
void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* rSource, int source_width,
int source_height, D3D12_SHADER_BYTECODE pshader12,
D3D12_SHADER_BYTECODE vshader12, D3D12_INPUT_LAYOUT_DESC layout12,
D3D12_SHADER_BYTECODE gshader12, float gamma, u32 slice,
DXGI_FORMAT rt_format, bool inherit_srv_binding, bool rt_multisampled)
{
float sw = 1.0f / static_cast<float>(source_width);
float sh = 1.0f / static_cast<float>(source_height);
float u1 = static_cast<float>(rSource->left) * sw;
float u2 = static_cast<float>(rSource->right) * sw;
float v1 = static_cast<float>(rSource->top) * sh;
float v2 = static_cast<float>(rSource->bottom) * sh;
float S = static_cast<float>(slice);
float G = 1.0f / gamma;
STQVertex coords[4] = {
{-1.0f, 1.0f, 0.0f, u1, v1, S, G},
{1.0f, 1.0f, 0.0f, u2, v1, S, G},
{-1.0f, -1.0f, 0.0f, u1, v2, S, G},
{1.0f, -1.0f, 0.0f, u2, v2, S, G},
};
// only upload the data to VRAM if it changed
if (tex_quad_data.u1 != u1 || tex_quad_data.v1 != v1 || tex_quad_data.u2 != u2 ||
tex_quad_data.v2 != v2 || tex_quad_data.S != S || tex_quad_data.G != G)
{
stq_offset = util_vbuf_stq->AppendData(coords, sizeof(coords), sizeof(STQVertex));
tex_quad_data.u1 = u1;
tex_quad_data.v1 = v1;
tex_quad_data.u2 = u2;
tex_quad_data.v2 = v2;
tex_quad_data.S = S;
tex_quad_data.G = G;
}
D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
D3D12_VERTEX_BUFFER_VIEW vb_view = {
util_vbuf_stq->GetBuffer12()
->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
static_cast<UINT>(util_vbuf_stq->GetSize()), // UINT SizeInBytes; This is the size of the
// entire buffer, not just the size of the
// vertex data for one draw call, since the
// offsetting is done in the draw call itself.
sizeof(STQVertex) // UINT StrideInBytes;
};
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true);
if (!inherit_srv_binding)
{
texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV,
texture->GetSRV12GPU());
}
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = {
default_root_signature, // ID3D12RootSignature *pRootSignature;
vshader12, // D3D12_SHADER_BYTECODE VS;
pshader12, // D3D12_SHADER_BYTECODE PS;
{}, // D3D12_SHADER_BYTECODE DS;
{}, // D3D12_SHADER_BYTECODE HS;
gshader12, // D3D12_SHADER_BYTECODE GS;
{}, // D3D12_STREAM_OUTPUT_DESC StreamOutput
Renderer::GetResetBlendDesc(), // D3D12_BLEND_DESC BlendState;
UINT_MAX, // UINT SampleMask;
Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState
Renderer::GetResetDepthStencilDesc(), // D3D12_DEPTH_STENCIL_DESC DepthStencilState
layout12, // D3D12_INPUT_LAYOUT_DESC InputLayout
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES
// IndexBufferProperties
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE
// PrimitiveTopologyType
1, // UINT NumRenderTargets
{rt_format}, // DXGI_FORMAT RTVFormats[8]
DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat
{1 /* UINT Count */, 0 /* UINT Quality */} // DXGI_SAMPLE_DESC SampleDesc
};
if (rt_multisampled)
{
pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}
ID3D12PipelineState* pso = nullptr;
CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso));
D3D::current_command_list->SetPipelineState(pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
D3D::current_command_list->DrawInstanced(4, 1, static_cast<UINT>(stq_offset), 0);
}
void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled)
{
ClearVertex coords[4] = {
{-1.0f, 1.0f, z, Color},
{1.0f, 1.0f, z, Color},
{-1.0f, -1.0f, z, Color},
{1.0f, -1.0f, z, Color},
};
if (clear_quad_data.col != Color || clear_quad_data.z != z)
{
clearq_offset = util_vbuf_clearq->AppendData(coords, sizeof(coords), sizeof(ClearVertex));
clear_quad_data.col = Color;
clear_quad_data.z = z;
}
D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
D3D12_VERTEX_BUFFER_VIEW vb_view = {
util_vbuf_clearq->GetBuffer12()
->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
static_cast<UINT>(util_vbuf_clearq->GetSize()), // UINT SizeInBytes; This is the size of the
// entire buffer, not just the size of the
// vertex data for one draw call, since the
// offsetting is done in the draw call
// itself.
sizeof(ClearVertex) // UINT StrideInBytes;
};
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true);
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = {
default_root_signature, // ID3D12RootSignature *pRootSignature;
StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS;
StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS;
{}, // D3D12_SHADER_BYTECODE DS;
{}, // D3D12_SHADER_BYTECODE HS;
g_ActiveConfig.iStereoMode > 0 ? StaticShaderCache::GetClearGeometryShader() :
D3D12_SHADER_BYTECODE(), // D3D12_SHADER_BYTECODE GS;
{}, // D3D12_STREAM_OUTPUT_DESC StreamOutput
*blend_desc, // D3D12_BLEND_DESC BlendState;
UINT_MAX, // UINT SampleMask;
Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState
*depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState
StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES
// IndexBufferProperties
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE
// PrimitiveTopologyType
1, // UINT NumRenderTargets
{DXGI_FORMAT_R8G8B8A8_UNORM}, // DXGI_FORMAT RTVFormats[8]
DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat
{1 /* UINT Count */, 0 /* UINT Quality */} // DXGI_SAMPLE_DESC SampleDesc
};
if (rt_multisampled)
{
pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}
ID3D12PipelineState* pso = nullptr;
CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso));
D3D::current_command_list->SetPipelineState(pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
D3D::current_command_list->DrawInstanced(4, 1, static_cast<UINT>(clearq_offset), 0);
}
static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col)
{
vert->x = x;
vert->y = y;
vert->z = z;
vert->col = col;
}
void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points,
D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc,
D3D12_CPU_DESCRIPTOR_HANDLE* render_target,
D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled)
{
// The viewport and RT/DB are passed in so we can reconstruct the state if we need to execute in
// the middle of building the vertex buffer.
D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = {
default_root_signature, // ID3D12RootSignature *pRootSignature;
StaticShaderCache::GetClearVertexShader(), // D3D12_SHADER_BYTECODE VS;
StaticShaderCache::GetClearPixelShader(), // D3D12_SHADER_BYTECODE PS;
{}, // D3D12_SHADER_BYTECODE DS;
{}, // D3D12_SHADER_BYTECODE HS;
g_ActiveConfig.iStereoMode > 0 ? StaticShaderCache::GetClearGeometryShader() :
D3D12_SHADER_BYTECODE(), // D3D12_SHADER_BYTECODE GS;
{}, // D3D12_STREAM_OUTPUT_DESC StreamOutput
*blend_desc, // D3D12_BLEND_DESC BlendState;
UINT_MAX, // UINT SampleMask;
Renderer::GetResetRasterizerDesc(), // D3D12_RASTERIZER_DESC RasterizerState
*depth_stencil_desc, // D3D12_DEPTH_STENCIL_DESC DepthStencilState
StaticShaderCache::GetClearVertexShaderInputLayout(), // D3D12_INPUT_LAYOUT_DESC InputLayout
D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, // D3D12_INDEX_BUFFER_PROPERTIES
// IndexBufferProperties
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, // D3D12_PRIMITIVE_TOPOLOGY_TYPE
// PrimitiveTopologyType
1, // UINT NumRenderTargets
{DXGI_FORMAT_R8G8B8A8_UNORM}, // DXGI_FORMAT RTVFormats[8]
DXGI_FORMAT_D32_FLOAT, // DXGI_FORMAT DSVFormat
{1 /* UINT Count */, 0 /* UINT Quality */} // DXGI_SAMPLE_DESC SampleDesc
};
if (rt_multisampled)
{
pso_desc.SampleDesc.Count = g_ActiveConfig.iMultisamples;
}
ID3D12PipelineState* pso = nullptr;
CheckHR(DX12::gx_state_cache.GetPipelineStateObjectFromCache(&pso_desc, &pso));
// If drawing a large number of points at once, this will have to be split into multiple passes.
const size_t COL_QUAD_SIZE = sizeof(ColVertex) * 6;
size_t points_per_draw = util_vbuf_efbpokequads->GetSize() / COL_QUAD_SIZE;
size_t current_point_index = 0;
while (current_point_index < num_points)
{
// Map and reserve enough buffer space for this draw
size_t points_to_draw = std::min(num_points - current_point_index, points_per_draw);
size_t required_bytes = COL_QUAD_SIZE * points_to_draw;
void* buffer_ptr = nullptr;
size_t base_vertex_index =
util_vbuf_efbpokequads->BeginAppendData(&buffer_ptr, required_bytes, sizeof(ColVertex));
CHECK(base_vertex_index * 16 + required_bytes <= util_vbuf_efbpokequads->GetSize(), "Uh oh");
// Corresponding dirty flags set outside loop.
D3D::current_command_list->OMSetRenderTargets(1, render_target, FALSE, depth_buffer);
D3D::current_command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D::command_list_mgr->SetCommandListPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
D3D12_VERTEX_BUFFER_VIEW vb_view = {
util_vbuf_efbpokequads->GetBuffer12()
->GetGPUVirtualAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
static_cast<UINT>(util_vbuf_efbpokequads->GetSize()), // UINT SizeInBytes; This is the size
// of the entire buffer, not just the
// size of the vertex data for one
// draw call, since the offsetting is
// done in the draw call itself.
sizeof(ColVertex) // UINT StrideInBytes;
};
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true);
D3D::current_command_list->SetPipelineState(pso);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
// generate quads for each efb point
ColVertex* base_vertex_ptr = reinterpret_cast<ColVertex*>(buffer_ptr);
for (size_t i = 0; i < points_to_draw; i++)
{
// generate quad from the single point (clip-space coordinates)
const EfbPokeData* point = &points[current_point_index];
float x1 = float(point->x) * 2.0f / EFB_WIDTH - 1.0f;
float y1 = -float(point->y) * 2.0f / EFB_HEIGHT + 1.0f;
float x2 = float(point->x + 1) * 2.0f / EFB_WIDTH - 1.0f;
float y2 = -float(point->y + 1) * 2.0f / EFB_HEIGHT + 1.0f;
float z = 0.0f;
u32 col = 0;
if (type == EFBAccessType::PokeZ)
{
z = 1.0f - static_cast<float>(point->data & 0xFFFFFF) / 16777216.0f;
}
else
{
col = ((point->data & 0xFF00FF00) | ((point->data >> 16) & 0xFF) |
((point->data << 16) & 0xFF0000));
}
current_point_index++;
// quad -> triangles
ColVertex* vertex = &base_vertex_ptr[i * 6];
InitColVertex(&vertex[0], x1, y1, z, col);
InitColVertex(&vertex[1], x2, y1, z, col);
InitColVertex(&vertex[2], x1, y2, z, col);
InitColVertex(&vertex[3], x1, y2, z, col);
InitColVertex(&vertex[4], x2, y1, z, col);
InitColVertex(&vertex[5], x2, y2, z, col);
if (type == EFBAccessType::PokeColor)
FramebufferManager::UpdateEFBColorAccessCopy(point->x, point->y, col);
else if (type == EFBAccessType::PokeZ)
FramebufferManager::UpdateEFBDepthAccessCopy(point->x, point->y, z);
}
// Issue the draw
D3D::current_command_list->DrawInstanced(6 * static_cast<UINT>(points_to_draw), 1,
static_cast<UINT>(base_vertex_index), 0);
}
}
} // namespace D3D
} // namespace DX12

View File

@ -1,93 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <string>
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/D3DState.h"
enum class EFBAccessType;
struct EfbPokeData;
namespace DX12
{
class D3DStreamBuffer;
extern StateCache gx_state_cache;
namespace D3D
{
void ResourceBarrier(ID3D12GraphicsCommandList* command_list, ID3D12Resource* resource,
D3D12_RESOURCE_STATES state_before, D3D12_RESOURCE_STATES state_after,
UINT subresource);
// Font creation flags
static const unsigned int D3DFONT_BOLD = 0x0001;
static const unsigned int D3DFONT_ITALIC = 0x0002;
// Font rendering flags
static const unsigned int D3DFONT_CENTERED = 0x0001;
class CD3DFont
{
public:
CD3DFont();
// 2D text drawing function
// Initializing and destroying device-dependent objects
int Init();
int Shutdown();
int DrawTextScaled(float x, float y, float size, float spacing, u32 dwColor,
const std::string& text);
private:
ID3D12Resource* m_texture12 = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE m_texture12_cpu = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_texture12_gpu = {};
std::unique_ptr<D3DStreamBuffer> m_vertex_buffer;
D3D12_INPUT_LAYOUT_DESC m_input_layout12 = {};
D3D12_SHADER_BYTECODE m_pshader12 = {};
D3D12_SHADER_BYTECODE m_vshader12 = {};
D3D12_BLEND_DESC m_blendstate12 = {};
D3D12_RASTERIZER_DESC m_raststate12 = {};
ID3D12PipelineState* m_pso = nullptr;
unsigned int m_line_height = 0;
float m_tex_coords[128 - 32][4] = {};
const int m_tex_width = 512;
const int m_tex_height = 512;
};
extern CD3DFont font;
void InitUtils();
void ShutdownUtils();
void SetPointCopySampler();
void SetLinearCopySampler();
void SetViewportAndScissor(int top_left_x, int top_left_y, int width, int height,
float min_depth = D3D12_MIN_DEPTH, float max_depth = D3D12_MAX_DEPTH);
void DrawShadedTexQuad(D3DTexture2D* texture, const D3D12_RECT* source, int source_width,
int source_height, D3D12_SHADER_BYTECODE pshader12 = {},
D3D12_SHADER_BYTECODE vshader12 = {}, D3D12_INPUT_LAYOUT_DESC layout12 = {},
D3D12_SHADER_BYTECODE gshader12 = {}, float gamma = 1.0f, u32 slice = 0,
DXGI_FORMAT rt_format = DXGI_FORMAT_R8G8B8A8_UNORM,
bool inherit_srv_binding = false, bool rt_multisampled = false);
void DrawClearQuad(u32 Color, float z, D3D12_BLEND_DESC* blend_desc,
D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc, bool rt_multisampled);
void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points,
D3D12_BLEND_DESC* blend_desc, D3D12_DEPTH_STENCIL_DESC* depth_stencil_desc,
D3D12_CPU_DESCRIPTOR_HANDLE* render_target,
D3D12_CPU_DESCRIPTOR_HANDLE* depth_buffer, bool rt_multisampled);
}
}

View File

@ -1,544 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Core/HW/Memmap.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoBackends/D3D12/XFBEncoder.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
FramebufferManager::Efb FramebufferManager::m_efb;
unsigned int FramebufferManager::m_target_width;
unsigned int FramebufferManager::m_target_height;
D3DTexture2D*& FramebufferManager::GetEFBColorTexture()
{
return m_efb.color_tex;
}
D3DTexture2D*& FramebufferManager::GetEFBDepthTexture()
{
return m_efb.depth_tex;
}
D3DTexture2D*& FramebufferManager::GetEFBColorTempTexture()
{
return m_efb.color_temp_tex;
}
void FramebufferManager::SwapReinterpretTexture()
{
D3DTexture2D* swaptex = GetEFBColorTempTexture();
m_efb.color_temp_tex = GetEFBColorTexture();
m_efb.color_tex = swaptex;
}
D3DTexture2D*& FramebufferManager::GetResolvedEFBColorTexture()
{
if (g_ActiveConfig.iMultisamples > 1)
{
m_efb.resolved_color_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RESOLVE_DEST);
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
for (int i = 0; i < m_efb.slices; i++)
{
D3D::current_command_list->ResolveSubresource(
m_efb.resolved_color_tex->GetTex12(), D3D12CalcSubresource(0, i, 0, 1, m_efb.slices),
m_efb.color_tex->GetTex12(), D3D12CalcSubresource(0, i, 0, 1, m_efb.slices),
DXGI_FORMAT_R8G8B8A8_UNORM);
}
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
return m_efb.resolved_color_tex;
}
else
{
return m_efb.color_tex;
}
}
D3DTexture2D*& FramebufferManager::GetResolvedEFBDepthTexture()
{
if (g_ActiveConfig.iMultisamples > 1)
{
ResolveDepthTexture();
return m_efb.resolved_depth_tex;
}
else
{
return m_efb.depth_tex;
}
}
FramebufferManager::FramebufferManager(int target_width, int target_height)
{
m_target_width = static_cast<unsigned int>(std::max(target_width, 1));
m_target_height = static_cast<unsigned int>(std::max(target_height, 1));
DXGI_SAMPLE_DESC sample_desc;
sample_desc.Count = g_ActiveConfig.iMultisamples;
sample_desc.Quality = 0;
ID3D12Resource* buf12;
D3D12_RESOURCE_DESC texdesc12;
D3D12_CLEAR_VALUE optimized_clear_valueRTV = {DXGI_FORMAT_R8G8B8A8_UNORM,
{0.0f, 0.0f, 0.0f, 1.0f}};
D3D12_CLEAR_VALUE optimized_clear_valueDSV = CD3DX12_CLEAR_VALUE(DXGI_FORMAT_D32_FLOAT, 0.0f, 0);
HRESULT hr;
m_EFBLayers = m_efb.slices = (g_ActiveConfig.iStereoMode > 0) ? 2 : 1;
// EFB color texture - primary render target
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1,
sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12));
m_efb.color_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM,
(sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
// Temporary EFB color texture - used in ReinterpretPixelData
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, m_efb.slices, 1,
sample_desc.Count, sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueRTV, IID_PPV_ARGS(&buf12)));
m_efb.color_temp_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM,
(sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.color_temp_tex->GetTex12(), "EFB color temp texture");
// EFB depth buffer - primary depth buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(
DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, 1, sample_desc.Count,
sample_desc.Quality, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL);
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, &optimized_clear_valueDSV, IID_PPV_ARGS(&buf12)));
m_efb.depth_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_DEPTH_STENCIL,
DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN,
(sample_desc.Count > 1), D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.depth_tex->GetTex12(), "EFB depth texture");
if (g_ActiveConfig.iMultisamples > 1)
{
// Framebuffer resolve textures (color+depth)
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width,
m_target_height, m_efb.slices, 1);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB color resolve texture (size: %dx%d)", m_target_width,
m_target_height);
m_efb.resolved_color_tex = new D3DTexture2D(
buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, false, D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.resolved_color_tex->GetTex12(),
"EFB color resolve texture shader resource view");
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height,
m_efb.slices, 1, 1, 0,
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width,
m_target_height, hr);
m_efb.resolved_depth_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false,
D3D12_RESOURCE_STATE_COMMON);
SAFE_RELEASE(buf12);
D3D::SetDebugObjectName12(m_efb.resolved_depth_tex->GetTex12(),
"EFB depth resolve texture shader resource view");
}
else
{
m_efb.resolved_color_tex = nullptr;
m_efb.resolved_depth_tex = nullptr;
}
InitializeEFBAccessCopies();
}
FramebufferManager::~FramebufferManager()
{
DestroyEFBAccessCopies();
SAFE_RELEASE(m_efb.color_tex);
SAFE_RELEASE(m_efb.depth_tex);
SAFE_RELEASE(m_efb.color_temp_tex);
SAFE_RELEASE(m_efb.resolved_color_tex);
SAFE_RELEASE(m_efb.resolved_depth_tex);
}
void FramebufferManager::CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight,
const EFBRectangle& sourceRc, float gamma)
{
u8* dst = Memory::GetPointer(xfbAddr);
D3DTexture2D* src_texture = GetResolvedEFBColorTexture();
TargetRectangle scaled_rect = g_renderer->ConvertEFBRectangle(sourceRc);
g_xfb_encoder->EncodeTextureToRam(dst, fbStride, fbHeight, src_texture, scaled_rect,
m_target_width, m_target_height, gamma);
}
std::unique_ptr<XFBSourceBase> FramebufferManager::CreateXFBSource(unsigned int target_width,
unsigned int target_height,
unsigned int layers)
{
return std::make_unique<XFBSource>(
D3DTexture2D::Create(target_width, target_height,
TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
DXGI_FORMAT_R8G8B8A8_UNORM, 1, layers),
layers);
}
std::pair<u32, u32> FramebufferManager::GetTargetSize() const
{
return std::make_pair(m_target_width, m_target_height);
}
void FramebufferManager::ResolveDepthTexture()
{
// ResolveSubresource does not work with depth textures.
// Instead, we use a shader that selects the minimum depth from all samples.
D3D::SetViewportAndScissor(0, 0, m_target_width, m_target_height);
m_efb.resolved_depth_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(0, nullptr, FALSE,
&m_efb.resolved_depth_tex->GetDSV12());
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
const D3D12_RECT source_rect = CD3DX12_RECT(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(FramebufferManager::GetEFBDepthTexture(), &source_rect, m_target_width,
m_target_height, StaticShaderCache::GetDepthResolveToColorPixelShader(),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
StaticShaderCache::GetCopyGeometryShader(), 1.0, 0, DXGI_FORMAT_D32_FLOAT);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();
}
void FramebufferManager::RestoreEFBRenderTargets()
{
D3D::current_command_list->OMSetRenderTargets(
1, &FramebufferManager::GetEFBColorTexture()->GetRTV12(), FALSE,
&FramebufferManager::GetEFBDepthTexture()->GetDSV12());
}
u32 FramebufferManager::ReadEFBColorAccessCopy(u32 x, u32 y)
{
if (!m_efb.color_access_readback_map)
MapEFBColorAccessCopy();
u32 color;
size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32);
memcpy(&color, &m_efb.color_access_readback_map[buffer_offset], sizeof(color));
return color;
}
float FramebufferManager::ReadEFBDepthAccessCopy(u32 x, u32 y)
{
if (!m_efb.depth_access_readback_map)
MapEFBDepthAccessCopy();
float depth;
size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float);
memcpy(&depth, &m_efb.depth_access_readback_map[buffer_offset], sizeof(depth));
return depth;
}
void FramebufferManager::UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color)
{
if (!m_efb.color_access_readback_map)
return;
size_t buffer_offset = y * m_efb.color_access_readback_pitch + x * sizeof(u32);
memcpy(&m_efb.color_access_readback_map[buffer_offset], &color, sizeof(color));
}
void FramebufferManager::UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth)
{
if (!m_efb.depth_access_readback_map)
return;
size_t buffer_offset = y * m_efb.depth_access_readback_pitch + x * sizeof(float);
memcpy(&m_efb.depth_access_readback_map[buffer_offset], &depth, sizeof(depth));
}
void FramebufferManager::InitializeEFBAccessCopies()
{
D3D12_CLEAR_VALUE optimized_color_clear_value = {DXGI_FORMAT_R8G8B8A8_UNORM,
{0.0f, 0.0f, 0.0f, 1.0f}};
D3D12_CLEAR_VALUE optimized_depth_clear_value = {DXGI_FORMAT_R32_FLOAT, {1.0f}};
CD3DX12_RESOURCE_DESC texdesc12;
ID3D12Resource* buf12;
HRESULT hr;
// EFB access - color resize buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, 1,
1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_TEXTURE_LAYOUT_UNKNOWN, 0);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, &optimized_color_clear_value, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB access color resize buffer (hr=%#x)", hr);
m_efb.color_access_resize_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R8G8B8A8_UNORM);
D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(),
"EFB access color resize buffer");
buf12->Release();
// EFB access - color staging/readback buffer
m_efb.color_access_readback_pitch = Common::AlignUp(static_cast<u32>(EFB_WIDTH * sizeof(u32)),
D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.color_access_readback_pitch * EFB_HEIGHT);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.color_access_readback_buffer));
D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access color readback buffer");
// EFB access - depth resize buffer
texdesc12 = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, 0,
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_TEXTURE_LAYOUT_UNKNOWN, 0);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COMMON, &optimized_depth_clear_value, IID_PPV_ARGS(&buf12));
CHECK(hr == S_OK, "create EFB access depth resize buffer (hr=%#x)", hr);
m_efb.depth_access_resize_tex =
new D3DTexture2D(buf12, TEXTURE_BIND_FLAG_RENDER_TARGET, DXGI_FORMAT_R32_FLOAT);
D3D::SetDebugObjectName12(m_efb.color_access_resize_tex->GetTex12(),
"EFB access depth resize buffer");
buf12->Release();
// EFB access - depth staging/readback buffer
m_efb.depth_access_readback_pitch = Common::AlignUp(static_cast<u32>(EFB_WIDTH * sizeof(float)),
D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
texdesc12 = CD3DX12_RESOURCE_DESC::Buffer(m_efb.depth_access_readback_pitch * EFB_HEIGHT);
hr = D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE, &texdesc12,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_efb.depth_access_readback_buffer));
D3D::SetDebugObjectName12(m_efb.color_access_readback_buffer, "EFB access depth readback buffer");
}
void FramebufferManager::MapEFBColorAccessCopy()
{
D3D::command_list_mgr->CPUAccessNotify();
ID3D12Resource* src_resource;
if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT ||
g_ActiveConfig.iMultisamples > 1)
{
// for non-1xIR or multisampled cases, we need to copy to an intermediate texture first
m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT);
D3D::SetPointCopySampler();
D3D::current_command_list->OMSetRenderTargets(1, &m_efb.color_access_resize_tex->GetRTV12(),
FALSE, nullptr);
CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(m_efb.color_tex, &src_rect, m_target_width, m_target_height,
StaticShaderCache::GetColorCopyPixelShader(true),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), {}, 1.0f, 0,
DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
m_efb.color_access_resize_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.color_access_resize_tex->GetTex12();
}
else
{
// Can source the EFB buffer
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.color_tex->GetTex12();
}
// Copy to staging resource
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {
0, {DXGI_FORMAT_R8G8B8A8_UNORM, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.color_access_readback_pitch}};
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.color_access_readback_buffer, dst_footprint);
CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
// Restore EFB resource state if it was sourced from here
if (src_resource == m_efb.color_tex->GetTex12())
m_efb.color_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
// Block until completion - state is automatically restored
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Resource copy has finished, so safe to map now
D3D12_RANGE read_range = {0, m_efb.color_access_readback_pitch * EFB_HEIGHT};
m_efb.color_access_readback_buffer->Map(
0, &read_range, reinterpret_cast<void**>(&m_efb.color_access_readback_map));
}
void FramebufferManager::MapEFBDepthAccessCopy()
{
D3D::command_list_mgr->CPUAccessNotify();
ID3D12Resource* src_resource;
if (m_target_width != EFB_WIDTH || m_target_height != EFB_HEIGHT ||
g_ActiveConfig.iMultisamples > 1)
{
// for non-1xIR or multisampled cases, we need to copy to an intermediate texture first
m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT);
D3D::SetPointCopySampler();
D3D::current_command_list->OMSetRenderTargets(1, &m_efb.depth_access_resize_tex->GetRTV12(),
FALSE, nullptr);
CD3DX12_RECT src_rect(0, 0, m_target_width, m_target_height);
D3D::DrawShadedTexQuad(m_efb.depth_tex, &src_rect, m_target_width, m_target_height,
(g_ActiveConfig.iMultisamples > 1) ?
StaticShaderCache::GetDepthResolveToColorPixelShader() :
StaticShaderCache::GetColorCopyPixelShader(false),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), {}, 1.0f, 0,
DXGI_FORMAT_R32_FLOAT, false, false);
m_efb.depth_access_resize_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.depth_access_resize_tex->GetTex12();
}
else
{
// Can source the EFB buffer
m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
src_resource = m_efb.depth_tex->GetTex12();
}
// Copy to staging resource
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {
0, {DXGI_FORMAT_R32_FLOAT, EFB_WIDTH, EFB_HEIGHT, 1, m_efb.depth_access_readback_pitch}};
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_efb.depth_access_readback_buffer, dst_footprint);
CD3DX12_TEXTURE_COPY_LOCATION src_location(src_resource, 0);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
// Restore EFB resource state if it was sourced from here
if (src_resource == m_efb.depth_tex->GetTex12())
m_efb.depth_tex->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_DEPTH_WRITE);
// Block until completion - state is automatically restored
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Resource copy has finished, so safe to map now
D3D12_RANGE read_range = {0, m_efb.depth_access_readback_pitch * EFB_HEIGHT};
m_efb.depth_access_readback_buffer->Map(
0, &read_range, reinterpret_cast<void**>(&m_efb.depth_access_readback_map));
}
void FramebufferManager::InvalidateEFBAccessCopies()
{
D3D12_RANGE write_range = {};
if (m_efb.color_access_readback_map)
{
m_efb.color_access_readback_buffer->Unmap(0, &write_range);
m_efb.color_access_readback_map = nullptr;
}
if (m_efb.depth_access_readback_map)
{
m_efb.depth_access_readback_buffer->Unmap(0, &write_range);
m_efb.depth_access_readback_map = nullptr;
}
}
void FramebufferManager::DestroyEFBAccessCopies()
{
InvalidateEFBAccessCopies();
SAFE_RELEASE(m_efb.color_access_resize_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(
m_efb.color_access_readback_buffer);
m_efb.color_access_readback_buffer = nullptr;
SAFE_RELEASE(m_efb.depth_access_resize_tex);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(
m_efb.depth_access_readback_buffer);
m_efb.depth_access_readback_buffer = nullptr;
}
void XFBSource::DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight)
{
u8* src = Memory::GetPointer(xfbAddr);
g_xfb_encoder->DecodeToTexture(m_tex, src, fbWidth, fbHeight);
}
void XFBSource::CopyEFB(float gamma)
{
// Copy EFB data to XFB and restore render target again
D3D::SetViewportAndScissor(0, 0, texWidth, texHeight);
const D3D12_RECT rect = CD3DX12_RECT(0, 0, texWidth, texHeight);
m_tex->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &m_tex->GetRTV12(), FALSE, nullptr);
D3D::SetPointCopySampler();
D3D::DrawShadedTexQuad(FramebufferManager::GetEFBColorTexture(), &rect,
g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
StaticShaderCache::GetColorCopyPixelShader(true),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
StaticShaderCache::GetCopyGeometryShader(), gamma, 0,
DXGI_FORMAT_R8G8B8A8_UNORM, false, m_tex->GetMultisampled());
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
// Restores proper viewport/scissor settings.
g_renderer->RestoreAPIState();
}
} // namespace DX12

View File

@ -1,124 +0,0 @@
// Copyright 2009 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoCommon/FramebufferManagerBase.h"
namespace DX12
{
// On the GameCube, the game sends a request for the graphics processor to
// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM
// called the XFB (External Framebuffer). The size and location of the XFB is
// decided at the time of the copy, and the format is always YUYV. The video
// interface is given a pointer to the XFB, which will be decoded and
// displayed on the TV.
//
// There are two ways for Dolphin to emulate this:
//
// Real XFB mode:
//
// Dolphin will behave like the GameCube and encode the EFB to
// a portion of GameCube RAM. The emulated video interface will decode the data
// for output to the screen.
//
// Advantages: Behaves exactly like the GameCube.
// Disadvantages: Resolution will be limited.
//
// Virtual XFB mode:
//
// When a request is made to copy the EFB to an XFB, Dolphin
// will remember the RAM location and size of the XFB in a Virtual XFB list.
// The video interface will look up the XFB in the list and use the enhanced
// data stored there, if available.
//
// Advantages: Enables high resolution graphics, better than real hardware.
// Disadvantages: If the GameCube CPU writes directly to the XFB (which is
// possible but uncommon), the Virtual XFB will not capture this information.
// There may be multiple XFBs in GameCube RAM. This is the maximum number to
// virtualize.
struct XFBSource final : public XFBSourceBase
{
XFBSource(D3DTexture2D* tex, int slices) : m_tex(tex), m_slices(slices) {}
~XFBSource() { m_tex->Release(); }
void DecodeToTexture(u32 xfbAddr, u32 fbWidth, u32 fbHeight) override;
void CopyEFB(float gamma) override;
D3DTexture2D* m_tex;
const int m_slices;
};
class FramebufferManager final : public FramebufferManagerBase
{
public:
FramebufferManager(int target_width, int target_height);
~FramebufferManager();
static D3DTexture2D*& GetEFBColorTexture();
static D3DTexture2D*& GetEFBDepthTexture();
static D3DTexture2D*& GetResolvedEFBColorTexture();
static D3DTexture2D*& GetResolvedEFBDepthTexture();
static D3DTexture2D*& GetEFBColorTempTexture();
static void SwapReinterpretTexture();
static void ResolveDepthTexture();
static void RestoreEFBRenderTargets();
// Access EFB from CPU
static u32 ReadEFBColorAccessCopy(u32 x, u32 y);
static float ReadEFBDepthAccessCopy(u32 x, u32 y);
static void UpdateEFBColorAccessCopy(u32 x, u32 y, u32 color);
static void UpdateEFBDepthAccessCopy(u32 x, u32 y, float depth);
static void InitializeEFBAccessCopies();
static void MapEFBColorAccessCopy();
static void MapEFBDepthAccessCopy();
static void InvalidateEFBAccessCopies();
static void DestroyEFBAccessCopies();
private:
std::unique_ptr<XFBSourceBase> CreateXFBSource(unsigned int target_width,
unsigned int target_height,
unsigned int layers) override;
std::pair<u32, u32> GetTargetSize() const override;
void CopyToRealXFB(u32 xfbAddr, u32 fbStride, u32 fbHeight, const EFBRectangle& sourceRc,
float gamma) override;
static struct Efb
{
D3DTexture2D* color_tex;
D3DTexture2D* depth_tex;
D3DTexture2D* color_temp_tex;
D3DTexture2D* resolved_color_tex;
D3DTexture2D* resolved_depth_tex;
D3DTexture2D* color_access_resize_tex;
ID3D12Resource* color_access_readback_buffer;
u8* color_access_readback_map;
u32 color_access_readback_pitch;
D3DTexture2D* depth_access_resize_tex;
ID3D12Resource* depth_access_readback_buffer;
u8* depth_access_readback_map;
u32 depth_access_readback_pitch;
int slices;
} m_efb;
static unsigned int m_target_width;
static unsigned int m_target_height;
};
} // namespace DX12

View File

@ -1,115 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/NativeVertexFormat.h"
#include "VideoBackends/D3D12/VertexManager.h"
namespace DX12
{
std::unique_ptr<NativeVertexFormat>
VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
return std::make_unique<D3DVertexFormat>(vtx_decl);
}
static const constexpr DXGI_FORMAT d3d_format_lookup[5 * 4 * 2] = {
// float formats
DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM,
DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R16G16_UNORM,
DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32G32B32_FLOAT,
DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R16G16B16A16_UNORM,
DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R32G32B32A32_FLOAT,
// integer formats
DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R16G16_UINT,
DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_UNKNOWN,
};
DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
{
DXGI_FORMAT retval =
d3d_format_lookup[static_cast<int>(t) + 5 * (size - 1) + 5 * 4 * static_cast<int>(integer)];
if (retval == DXGI_FORMAT_UNKNOWN)
{
PanicAlert("VarToD3D: Invalid type/size combo %i , %i, %i", static_cast<int>(t), size,
static_cast<int>(integer));
}
return retval;
}
D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
this->vtx_decl = vtx_decl;
AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.position, "POSITION", 0);
for (int i = 0; i < 3; i++)
{
AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.normals[i], "NORMAL", i);
}
for (int i = 0; i < 2; i++)
{
AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.colors[i], "COLOR", i);
}
for (int i = 0; i < 8; i++)
{
AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.texcoords[i], "TEXCOORD", i);
}
AddInputElementDescFromAttributeFormatIfValid(&vtx_decl.posmtx, "BLENDINDICES", 0);
m_layout12.NumElements = m_num_elems;
m_layout12.pInputElementDescs = m_elems.data();
}
D3DVertexFormat::~D3DVertexFormat()
{
}
void D3DVertexFormat::AddInputElementDescFromAttributeFormatIfValid(const AttributeFormat* format,
const char* semantic_name,
unsigned int semantic_index)
{
if (!format->enable)
{
return;
}
D3D12_INPUT_ELEMENT_DESC desc = {};
desc.AlignedByteOffset = format->offset;
desc.Format = VarToD3D(format->type, format->components, format->integer);
desc.InputSlot = 0;
desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
desc.SemanticName = semantic_name;
desc.SemanticIndex = semantic_index;
m_elems[m_num_elems] = desc;
++m_num_elems;
}
void D3DVertexFormat::SetupVertexPointers()
{
// No-op on DX12.
}
D3D12_INPUT_LAYOUT_DESC D3DVertexFormat::GetActiveInputLayout12() const
{
return m_layout12;
}
} // namespace DX12

View File

@ -1,32 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
#pragma once
#include <array>
#include <d3d12.h>
#include "VideoCommon/NativeVertexFormat.h"
namespace DX12
{
class D3DVertexFormat final : public NativeVertexFormat
{
public:
D3DVertexFormat(const PortableVertexDeclaration& vtx_decl);
~D3DVertexFormat();
void SetupVertexPointers() override;
D3D12_INPUT_LAYOUT_DESC GetActiveInputLayout12() const;
private:
void AddInputElementDescFromAttributeFormatIfValid(const AttributeFormat* format,
const char* semantic_name,
unsigned int semantic_index);
std::array<D3D12_INPUT_ELEMENT_DESC, 15> m_elems{};
UINT m_num_elems = 0;
D3D12_INPUT_LAYOUT_DESC m_layout12{};
};
}

View File

@ -1,248 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/PSTextureEncoder.h"
#include "Common/Align.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Core/HW/Memmap.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoBackends/D3D12/TextureCache.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/VideoCommon.h"
namespace DX12
{
struct EFBEncodeParams
{
DWORD SrcLeft;
DWORD SrcTop;
DWORD DestWidth;
DWORD ScaleFactor;
};
PSTextureEncoder::PSTextureEncoder()
{
}
void PSTextureEncoder::Init()
{
// Create output texture RGBA format
D3D12_RESOURCE_DESC out_tex_desc =
CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_B8G8R8A8_UNORM, EFB_WIDTH * 4, EFB_HEIGHT / 4, 1, 0,
1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET);
D3D12_CLEAR_VALUE optimized_clear_value = {DXGI_FORMAT_B8G8R8A8_UNORM, {0.0f, 0.0f, 0.0f, 1.0f}};
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE, &out_tex_desc,
D3D12_RESOURCE_STATE_COPY_SOURCE, &optimized_clear_value, IID_PPV_ARGS(&m_out)));
D3D::SetDebugObjectName12(m_out, "efb encoder output texture");
// Create output render target view
D3D12_RENDER_TARGET_VIEW_DESC tex_rtv_desc = {
DXGI_FORMAT_B8G8R8A8_UNORM, // DXGI_FORMAT Format;
D3D12_RTV_DIMENSION_TEXTURE2D // D3D12_RTV_DIMENSION ViewDimension;
};
tex_rtv_desc.Texture2D.MipSlice = 0;
D3D::rtv_descriptor_heap_mgr->Allocate(&m_out_rtv_cpu);
D3D::device12->CreateRenderTargetView(m_out, &tex_rtv_desc, m_out_rtv_cpu);
// Create output staging buffer
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(
Common::AlignUp(static_cast<unsigned int>(out_tex_desc.Width) * 4,
D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) *
out_tex_desc.Height),
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_out_readback_buffer)));
D3D::SetDebugObjectName12(m_out_readback_buffer, "efb encoder output staging buffer");
// Create constant buffer for uploading data to shaders. Need to align to 256 bytes.
unsigned int encode_params_buffer_size = (sizeof(EFBEncodeParams) + 0xff) & ~0xff;
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(encode_params_buffer_size), D3D12_RESOURCE_STATE_GENERIC_READ,
nullptr, IID_PPV_ARGS(&m_encode_params_buffer)));
D3D::SetDebugObjectName12(m_encode_params_buffer, "efb encoder params buffer");
// NOTE: This upload buffer is okay to overwrite each time, since we block until completion when
// it's used anyway.
D3D12_RANGE read_range = {};
CheckHR(m_encode_params_buffer->Map(0, &read_range, &m_encode_params_buffer_data));
m_ready = true;
}
void PSTextureEncoder::Shutdown()
{
m_ready = false;
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_out);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_out_readback_buffer);
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_encode_params_buffer);
for (auto& it : m_shader_blobs)
{
SAFE_RELEASE(it);
}
m_shader_blobs.clear();
m_encoding_shaders.clear();
}
void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
{
if (!m_ready) // Make sure we initialized OK
return;
D3D::command_list_mgr->CPUAccessNotify();
// Resolve MSAA targets before copying.
D3DTexture2D* efb_source =
is_depth_copy ?
FramebufferManager::GetResolvedEFBDepthTexture() :
// EXISTINGD3D11TODO: Instead of resolving EFB, it would be better to pick out a
// single sample from each pixel. The game may break if it isn't
// expecting the blurred edges around multisampled shapes.
FramebufferManager::GetResolvedEFBColorTexture();
// GetResolvedEFBDepthTexture will set the render targets, when MSAA is enabled
// (since it needs to do a manual depth resolve). So make sure to set the RTs
// afterwards.
const u32 words_per_row = bytes_per_row / sizeof(u32);
D3D::SetViewportAndScissor(0, 0, words_per_row, num_blocks_y);
constexpr EFBRectangle full_src_rect(0, 0, EFB_WIDTH, EFB_HEIGHT);
TargetRectangle target_rect = g_renderer->ConvertEFBRectangle(full_src_rect);
D3D::ResourceBarrier(D3D::current_command_list, m_out, D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_RENDER_TARGET, 0);
D3D::current_command_list->OMSetRenderTargets(1, &m_out_rtv_cpu, FALSE, nullptr);
EFBEncodeParams params;
params.SrcLeft = src_rect.left;
params.SrcTop = src_rect.top;
params.DestWidth = native_width;
params.ScaleFactor = scale_by_half ? 2 : 1;
memcpy(m_encode_params_buffer_data, &params, sizeof(params));
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, m_encode_params_buffer->GetGPUVirtualAddress());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 1.5x and 2x IR, other resolution will
// need more complex down filtering to average all pixels and produce the correct result.
// Also, box filtering won't be correct for anything other than 1x IR
if (scale_by_half || g_ActiveConfig.iEFBScale != SCALE_1X)
D3D::SetLinearCopySampler();
else
D3D::SetPointCopySampler();
D3D::DrawShadedTexQuad(
efb_source, target_rect.AsRECT(), g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
GetEncodingPixelShader(format), StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), 1.0f, 0,
DXGI_FORMAT_B8G8R8A8_UNORM, false, false /* Render target is not multisampled */
);
// Copy to staging buffer
D3D12_BOX src_box = CD3DX12_BOX(0, 0, 0, words_per_row, num_blocks_y, 1);
D3D12_TEXTURE_COPY_LOCATION dst_location = {};
dst_location.pResource = m_out_readback_buffer;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
dst_location.PlacedFootprint.Footprint.Width = EFB_WIDTH * 4;
dst_location.PlacedFootprint.Footprint.Height = EFB_HEIGHT / 4;
dst_location.PlacedFootprint.Footprint.Depth = 1;
dst_location.PlacedFootprint.Footprint.RowPitch = Common::AlignUp(
dst_location.PlacedFootprint.Footprint.Width * 4, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12_TEXTURE_COPY_LOCATION src_location = {};
src_location.pResource = m_out;
src_location.SubresourceIndex = 0;
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
D3D::ResourceBarrier(D3D::current_command_list, m_out, D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_COPY_SOURCE, 0);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
// State is automatically restored after executing command list.
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Transfer staging buffer to GameCube/Wii RAM
void* readback_data_map;
D3D12_RANGE read_range = {0, dst_location.PlacedFootprint.Footprint.RowPitch * num_blocks_y};
CheckHR(m_out_readback_buffer->Map(0, &read_range, &readback_data_map));
u8* src = static_cast<u8*>(readback_data_map);
u32 read_stride = std::min(bytes_per_row, dst_location.PlacedFootprint.Footprint.RowPitch);
for (unsigned int y = 0; y < num_blocks_y; ++y)
{
memcpy(dst, src, read_stride);
dst += memory_stride;
src += dst_location.PlacedFootprint.Footprint.RowPitch;
}
D3D12_RANGE write_range = {};
m_out_readback_buffer->Unmap(0, &write_range);
}
D3D12_SHADER_BYTECODE PSTextureEncoder::GetEncodingPixelShader(const EFBCopyFormat& format)
{
auto iter = m_encoding_shaders.find(format);
if (iter != m_encoding_shaders.end())
return iter->second;
ID3DBlob* bytecode = nullptr;
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::D3D);
if (!D3D::CompilePixelShader(shader, &bytecode))
{
PanicAlert("Failed to compile texture encoding shader.");
m_encoding_shaders[format] = {};
return {};
}
D3D12_SHADER_BYTECODE new_shader = {bytecode->GetBufferPointer(), bytecode->GetBufferSize()};
m_encoding_shaders.emplace(format, new_shader);
// Keep track of the ID3DBlobs, so we can free them upon shutdown.
m_shader_blobs.push_back(bytecode);
return new_shader;
}
}

View File

@ -1,44 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <map>
#include <vector>
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/VideoCommon.h"
namespace DX12
{
class PSTextureEncoder final
{
public:
PSTextureEncoder();
void Init();
void Shutdown();
void Encode(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half);
private:
D3D12_SHADER_BYTECODE GetEncodingPixelShader(const EFBCopyFormat& format);
bool m_ready = false;
ID3D12Resource* m_out = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE m_out_rtv_cpu = {};
ID3D12Resource* m_out_readback_buffer = nullptr;
ID3D12Resource* m_encode_params_buffer = nullptr;
void* m_encode_params_buffer_data = nullptr;
std::map<EFBCopyFormat, D3D12_SHADER_BYTECODE> m_encoding_shaders;
std::vector<ID3DBlob*> m_shader_blobs;
};
}

View File

@ -1,227 +0,0 @@
// Copyright 2012 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <algorithm>
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/PerfQuery.h"
#include "VideoCommon/RenderBase.h"
namespace DX12
{
PerfQuery::PerfQuery()
{
D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_OCCLUSION, PERF_QUERY_BUFFER_SIZE, 0};
CheckHR(D3D::device12->CreateQueryHeap(&desc, IID_PPV_ARGS(&m_query_heap)));
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(QUERY_READBACK_BUFFER_SIZE), D3D12_RESOURCE_STATE_COPY_DEST,
nullptr, IID_PPV_ARGS(&m_query_readback_buffer)));
m_tracking_fence =
D3D::command_list_mgr->RegisterQueueFenceCallback(this, &PerfQuery::QueueFenceCallback);
}
PerfQuery::~PerfQuery()
{
D3D::command_list_mgr->RemoveQueueFenceCallback(this);
SAFE_RELEASE(m_query_heap);
SAFE_RELEASE(m_query_readback_buffer);
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
if (m_query_count > m_query_buffer.size() / 2)
WeakFlush();
// all queries already used?
if (m_query_buffer.size() == m_query_count)
{
FlushOne();
// WARN_LOG(VIDEO, "Flushed query buffer early!");
}
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
size_t index = (m_query_read_pos + m_query_count) % m_query_buffer.size();
auto& entry = m_query_buffer[index];
D3D::current_command_list->BeginQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION,
static_cast<UINT>(index));
entry.query_type = type;
entry.fence_value = -1;
++m_query_count;
}
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
size_t index =
(m_query_read_pos + m_query_count + m_query_buffer.size() - 1) % m_query_buffer.size();
auto& entry = m_query_buffer[index];
D3D::current_command_list->EndQuery(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION,
static_cast<UINT>(index));
D3D::current_command_list->ResolveQueryData(m_query_heap, D3D12_QUERY_TYPE_OCCLUSION,
static_cast<UINT>(index), 1,
m_query_readback_buffer, index * sizeof(UINT64));
entry.fence_value = m_next_fence_value;
}
}
void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ArraySize(m_results), 0);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];
return result;
}
void PerfQuery::FlushOne()
{
size_t index = m_query_read_pos;
ActiveQuery& entry = m_query_buffer[index];
// Has the command list been executed yet?
if (entry.fence_value == m_next_fence_value)
D3D::command_list_mgr->ExecuteQueuedWork(false);
// Block until the fence is reached
D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, entry.fence_value);
// Copy from readback buffer to local
void* readback_buffer_map;
D3D12_RANGE read_range = {sizeof(UINT64) * index, sizeof(UINT64) * (index + 1)};
CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map));
UINT64 result;
memcpy(&result, reinterpret_cast<u8*>(readback_buffer_map) + sizeof(UINT64) * index,
sizeof(UINT64));
D3D12_RANGE write_range = {};
m_query_readback_buffer->Unmap(0, &write_range);
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count--;
}
UINT64 PerfQuery::FindLastPendingFenceValue() const
{
UINT64 last_fence_value = 0;
u32 query_count = m_query_count;
u32 query_read_pos = m_query_read_pos;
while (query_count > 0)
{
const ActiveQuery& entry = m_query_buffer[query_read_pos];
last_fence_value = std::max(entry.fence_value, last_fence_value);
query_read_pos = (query_read_pos + 1) % m_query_buffer.size();
query_count--;
}
return last_fence_value;
}
void PerfQuery::FlushResults()
{
if (IsFlushed())
return;
// Find the fence value we have to wait for.
UINT64 last_fence_value = FindLastPendingFenceValue();
if (last_fence_value == m_next_fence_value)
D3D::command_list_mgr->ExecuteQueuedWork(false);
// Wait for all queries to be resolved.
D3D::command_list_mgr->WaitOnCPUForFence(m_tracking_fence, last_fence_value);
// Map the whole readback buffer. Shouldn't have much overhead, and saves taking the
// wrapped-around cases into consideration.
void* readback_buffer_map;
D3D12_RANGE read_range = {0, QUERY_READBACK_BUFFER_SIZE};
CheckHR(m_query_readback_buffer->Map(0, &read_range, &readback_buffer_map));
// Read all pending queries.
while (m_query_count > 0)
{
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
UINT64 result;
memcpy(&result, reinterpret_cast<u8*>(readback_buffer_map) + sizeof(UINT64) * m_query_read_pos,
sizeof(UINT64));
// NOTE: Reported pixel metrics should be referenced to native resolution
// TODO: Dropping the lower 2 bits from this count should be closer to actual
// hardware behavior when drawing triangles.
m_results[entry.query_type] += (u32)(result * EFB_WIDTH / g_renderer->GetTargetWidth() *
EFB_HEIGHT / g_renderer->GetTargetHeight());
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
m_query_count--;
}
D3D12_RANGE write_range = {};
m_query_readback_buffer->Unmap(0, &write_range);
}
void PerfQuery::WeakFlush()
{
UINT64 completed_fence = m_tracking_fence->GetCompletedValue();
while (!IsFlushed())
{
ActiveQuery& entry = m_query_buffer[m_query_read_pos];
if (entry.fence_value > completed_fence)
break;
FlushOne();
}
}
bool PerfQuery::IsFlushed() const
{
return m_query_count == 0;
}
void PerfQuery::QueueFenceCallback(void* owning_object, UINT64 fence_value)
{
PerfQuery* owning_perf_query = static_cast<PerfQuery*>(owning_object);
owning_perf_query->QueueFence(fence_value);
}
void PerfQuery::QueueFence(UINT64 fence_value)
{
m_next_fence_value = fence_value + 1;
}
} // namespace

View File

@ -1,59 +0,0 @@
// Copyright 2012 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <d3d12.h>
#include "VideoCommon/PerfQueryBase.h"
namespace DX12
{
class PerfQuery final : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();
void EnableQuery(PerfQueryGroup type) override;
void DisableQuery(PerfQueryGroup type) override;
void ResetQuery() override;
u32 GetQueryResult(PerfQueryType type) override;
void FlushResults() override;
bool IsFlushed() const override;
private:
struct ActiveQuery
{
PerfQueryGroup query_type;
UINT64 fence_value;
};
void WeakFlush();
// Find the last fence value of all pending queries.
UINT64 FindLastPendingFenceValue() const;
// Only use when non-empty
void FlushOne();
static void QueueFenceCallback(void* owning_object, UINT64 fence_value);
void QueueFence(UINT64 fence_value);
// when testing in SMS: 64 was too small, 128 was ok
static constexpr size_t PERF_QUERY_BUFFER_SIZE = 512;
static constexpr size_t QUERY_READBACK_BUFFER_SIZE = PERF_QUERY_BUFFER_SIZE * sizeof(UINT64);
std::array<ActiveQuery, PERF_QUERY_BUFFER_SIZE> m_query_buffer;
int m_query_read_pos = 0;
ID3D12QueryHeap* m_query_heap = nullptr;
ID3D12Resource* m_query_readback_buffer = nullptr;
ID3D12Fence* m_tracking_fence = nullptr;
UINT64 m_next_fence_value = 0;
};
} // namespace

File diff suppressed because it is too large Load Diff

View File

@ -1,71 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <d3d12.h>
#include <string>
#include "VideoCommon/RenderBase.h"
enum class EFBAccessType;
namespace DX12
{
class D3DTexture2D;
class Renderer final : public ::Renderer
{
public:
Renderer();
~Renderer() override;
void SetColorMask() override;
void SetBlendMode(bool force_update) override;
void SetScissorRect(const EFBRectangle& rc) override;
void SetGenerationMode() override;
void SetDepthMode() override;
void SetLogicOpMode() override;
void SetSamplerState(int stage, int tex_index, bool custom_tex) override;
void SetInterlacingMode() override;
void SetViewport() override;
// TODO: Fix confusing names (see ResetAPIState and RestoreAPIState)
void ApplyState() override;
void RestoreState() override;
void ApplyCullDisable();
void RestoreCull();
void RenderText(const std::string& text, int left, int top, u32 color) override;
u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override;
u16 BBoxRead(int index) override;
void BBoxWrite(int index, u16 value) override;
void ResetAPIState() override;
void RestoreAPIState() override;
TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override;
void SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, const EFBRectangle& rc,
u64 ticks, float gamma) override;
void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable,
u32 color, u32 z) override;
void ReinterpretPixelData(unsigned int conv_type) override;
bool CheckForResize();
static D3D12_BLEND_DESC GetResetBlendDesc();
static D3D12_DEPTH_STENCIL_DESC GetResetDepthStencilDesc();
static D3D12_RASTERIZER_DESC GetResetRasterizerDesc();
private:
void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture,
u32 src_width, u32 src_height, float gamma);
};
}

View File

@ -1,360 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/LinearDiskCache.h"
#include "Core/ConfigManager.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VideoCommon.h"
namespace DX12
{
// Primitive topology type is always triangle, unless the GS stage is used. This is consumed
// by the PSO created in Renderer::ApplyState.
static D3D12_PRIMITIVE_TOPOLOGY_TYPE s_current_primitive_topology =
D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
using GsBytecodeCache = std::map<GeometryShaderUid, D3D12_SHADER_BYTECODE>;
using PsBytecodeCache = std::map<PixelShaderUid, D3D12_SHADER_BYTECODE>;
using VsBytecodeCache = std::map<VertexShaderUid, D3D12_SHADER_BYTECODE>;
GsBytecodeCache s_gs_bytecode_cache;
PsBytecodeCache s_ps_bytecode_cache;
VsBytecodeCache s_vs_bytecode_cache;
// Used to keep track of blobs to release at Shutdown time.
static std::vector<ID3DBlob*> s_shader_blob_list;
static LinearDiskCache<GeometryShaderUid, u8> s_gs_disk_cache;
static LinearDiskCache<PixelShaderUid, u8> s_ps_disk_cache;
static LinearDiskCache<VertexShaderUid, u8> s_vs_disk_cache;
static D3D12_SHADER_BYTECODE s_last_geometry_shader_bytecode;
static D3D12_SHADER_BYTECODE s_last_pixel_shader_bytecode;
static D3D12_SHADER_BYTECODE s_last_vertex_shader_bytecode;
static GeometryShaderUid s_last_geometry_shader_uid;
static PixelShaderUid s_last_pixel_shader_uid;
static VertexShaderUid s_last_vertex_shader_uid;
template <class UidType, class ShaderCacheType, ShaderCacheType* cache>
class ShaderCacheInserter final : public LinearDiskCacheReader<UidType, u8>
{
public:
void Read(const UidType& key, const u8* value, u32 value_size)
{
ID3DBlob* blob = nullptr;
CheckHR(d3d_create_blob(value_size, &blob));
memcpy(blob->GetBufferPointer(), value, value_size);
ShaderCache::InsertByteCode<UidType, ShaderCacheType>(key, cache, blob);
}
};
void ShaderCache::Init()
{
// This class intentionally shares its shader cache files with DX11, as the shaders are (right
// now) identical.
// Reduces unnecessary compilation when switching between APIs.
s_last_geometry_shader_bytecode = {};
s_last_pixel_shader_bytecode = {};
s_last_vertex_shader_bytecode = {};
s_last_geometry_shader_uid = {};
s_last_pixel_shader_uid = {};
s_last_vertex_shader_uid = {};
if (g_ActiveConfig.bShaderCache)
{
// Ensure shader cache directory exists..
std::string shader_cache_path = File::GetUserPath(D_SHADERCACHE_IDX);
if (!File::Exists(shader_cache_path))
File::CreateDir(File::GetUserPath(D_SHADERCACHE_IDX));
const std::string& title_game_id = SConfig::GetInstance().GetGameID();
std::string gs_cache_filename =
StringFromFormat("%sdx11-%s-gs.cache", shader_cache_path.c_str(), title_game_id.c_str());
std::string ps_cache_filename =
StringFromFormat("%sdx11-%s-ps.cache", shader_cache_path.c_str(), title_game_id.c_str());
std::string vs_cache_filename =
StringFromFormat("%sdx11-%s-vs.cache", shader_cache_path.c_str(), title_game_id.c_str());
ShaderCacheInserter<GeometryShaderUid, GsBytecodeCache, &s_gs_bytecode_cache> gs_inserter;
s_gs_disk_cache.OpenAndRead(gs_cache_filename, gs_inserter);
ShaderCacheInserter<PixelShaderUid, PsBytecodeCache, &s_ps_bytecode_cache> ps_inserter;
s_ps_disk_cache.OpenAndRead(ps_cache_filename, ps_inserter);
ShaderCacheInserter<VertexShaderUid, VsBytecodeCache, &s_vs_bytecode_cache> vs_inserter;
s_vs_disk_cache.OpenAndRead(vs_cache_filename, vs_inserter);
}
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(s_ps_bytecode_cache.size()));
SETSTAT(stats.numPixelShadersCreated, static_cast<int>(s_ps_bytecode_cache.size()));
SETSTAT(stats.numVertexShadersAlive, static_cast<int>(s_vs_bytecode_cache.size()));
SETSTAT(stats.numVertexShadersCreated, static_cast<int>(s_vs_bytecode_cache.size()));
}
void ShaderCache::Clear()
{
for (auto& iter : s_shader_blob_list)
SAFE_RELEASE(iter);
s_shader_blob_list.clear();
s_gs_bytecode_cache.clear();
s_ps_bytecode_cache.clear();
s_vs_bytecode_cache.clear();
s_last_geometry_shader_bytecode = {};
s_last_geometry_shader_uid = {};
s_last_pixel_shader_bytecode = {};
s_last_pixel_shader_uid = {};
s_last_vertex_shader_bytecode = {};
s_last_vertex_shader_uid = {};
}
void ShaderCache::Shutdown()
{
Clear();
s_gs_disk_cache.Sync();
s_gs_disk_cache.Close();
s_ps_disk_cache.Sync();
s_ps_disk_cache.Close();
s_vs_disk_cache.Sync();
s_vs_disk_cache.Close();
}
void ShaderCache::LoadAndSetActiveShaders(u32 gs_primitive_type)
{
SetCurrentPrimitiveTopology(gs_primitive_type);
GeometryShaderUid gs_uid = GetGeometryShaderUid(gs_primitive_type);
PixelShaderUid ps_uid = GetPixelShaderUid();
VertexShaderUid vs_uid = GetVertexShaderUid();
bool gs_changed = gs_uid != s_last_geometry_shader_uid;
bool ps_changed = ps_uid != s_last_pixel_shader_uid;
bool vs_changed = vs_uid != s_last_vertex_shader_uid;
if (!gs_changed && !ps_changed && !vs_changed)
{
return;
}
if (gs_changed)
{
HandleGSUIDChange(gs_uid, gs_primitive_type);
}
if (ps_changed)
{
HandlePSUIDChange(ps_uid);
}
if (vs_changed)
{
HandleVSUIDChange(vs_uid);
}
// A Uid has changed, so the PSO will need to be reset at next ApplyState.
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PSO, true);
}
void ShaderCache::SetCurrentPrimitiveTopology(u32 gs_primitive_type)
{
switch (gs_primitive_type)
{
case PRIMITIVE_TRIANGLES:
s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
break;
case PRIMITIVE_LINES:
s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
break;
case PRIMITIVE_POINTS:
s_current_primitive_topology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
break;
default:
CHECK(0, "Invalid primitive type.");
break;
}
}
void ShaderCache::HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_type)
{
s_last_geometry_shader_uid = gs_uid;
if (gs_uid.GetUidData()->IsPassthrough())
{
s_last_geometry_shader_bytecode = {};
return;
}
auto gs_iterator = s_gs_bytecode_cache.find(gs_uid);
if (gs_iterator != s_gs_bytecode_cache.end())
{
s_last_geometry_shader_bytecode = gs_iterator->second;
}
else
{
ShaderCode gs_code = GenerateGeometryShaderCode(APIType::D3D, gs_uid.GetUidData());
ID3DBlob* gs_bytecode = nullptr;
if (!D3D::CompileGeometryShader(gs_code.GetBuffer(), &gs_bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return;
}
s_last_geometry_shader_bytecode = InsertByteCode(gs_uid, &s_gs_bytecode_cache, gs_bytecode);
s_gs_disk_cache.Append(gs_uid, reinterpret_cast<u8*>(gs_bytecode->GetBufferPointer()),
static_cast<u32>(gs_bytecode->GetBufferSize()));
}
}
void ShaderCache::HandlePSUIDChange(PixelShaderUid ps_uid)
{
s_last_pixel_shader_uid = ps_uid;
auto ps_iterator = s_ps_bytecode_cache.find(ps_uid);
if (ps_iterator != s_ps_bytecode_cache.end())
{
s_last_pixel_shader_bytecode = ps_iterator->second;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
}
else
{
ShaderCode ps_code = GeneratePixelShaderCode(APIType::D3D, ps_uid.GetUidData());
ID3DBlob* ps_bytecode = nullptr;
if (!D3D::CompilePixelShader(ps_code.GetBuffer(), &ps_bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return;
}
s_last_pixel_shader_bytecode = InsertByteCode(ps_uid, &s_ps_bytecode_cache, ps_bytecode);
s_ps_disk_cache.Append(ps_uid, reinterpret_cast<u8*>(ps_bytecode->GetBufferPointer()),
static_cast<u32>(ps_bytecode->GetBufferSize()));
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(s_ps_bytecode_cache.size()));
INCSTAT(stats.numPixelShadersCreated);
}
}
void ShaderCache::HandleVSUIDChange(VertexShaderUid vs_uid)
{
s_last_vertex_shader_uid = vs_uid;
auto vs_iterator = s_vs_bytecode_cache.find(vs_uid);
if (vs_iterator != s_vs_bytecode_cache.end())
{
s_last_vertex_shader_bytecode = vs_iterator->second;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
}
else
{
ShaderCode vs_code = GenerateVertexShaderCode(APIType::D3D, vs_uid.GetUidData());
ID3DBlob* vs_bytecode = nullptr;
if (!D3D::CompileVertexShader(vs_code.GetBuffer(), &vs_bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return;
}
s_last_vertex_shader_bytecode = InsertByteCode(vs_uid, &s_vs_bytecode_cache, vs_bytecode);
s_vs_disk_cache.Append(vs_uid, reinterpret_cast<u8*>(vs_bytecode->GetBufferPointer()),
static_cast<u32>(vs_bytecode->GetBufferSize()));
SETSTAT(stats.numVertexShadersAlive, static_cast<int>(s_vs_bytecode_cache.size()));
INCSTAT(stats.numVertexShadersCreated);
}
}
template <class UidType, class ShaderCacheType>
D3D12_SHADER_BYTECODE ShaderCache::InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache,
ID3DBlob* bytecode_blob)
{
// Note: Don't release the incoming bytecode, we need it to stick around, since in D3D12
// the raw bytecode itself is bound. It is released at Shutdown() time.
s_shader_blob_list.push_back(bytecode_blob);
D3D12_SHADER_BYTECODE shader_bytecode;
shader_bytecode.pShaderBytecode = bytecode_blob->GetBufferPointer();
shader_bytecode.BytecodeLength = bytecode_blob->GetBufferSize();
(*shader_cache)[uid] = shader_bytecode;
return shader_bytecode;
}
D3D12_PRIMITIVE_TOPOLOGY_TYPE ShaderCache::GetCurrentPrimitiveTopology()
{
return s_current_primitive_topology;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveGeometryShaderBytecode()
{
return s_last_geometry_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActivePixelShaderBytecode()
{
return s_last_pixel_shader_bytecode;
}
D3D12_SHADER_BYTECODE ShaderCache::GetActiveVertexShaderBytecode()
{
return s_last_vertex_shader_bytecode;
}
const GeometryShaderUid* ShaderCache::GetActiveGeometryShaderUid()
{
return &s_last_geometry_shader_uid;
}
const PixelShaderUid* ShaderCache::GetActivePixelShaderUid()
{
return &s_last_pixel_shader_uid;
}
const VertexShaderUid* ShaderCache::GetActiveVertexShaderUid()
{
return &s_last_vertex_shader_uid;
}
D3D12_SHADER_BYTECODE ShaderCache::GetGeometryShaderFromUid(const GeometryShaderUid* uid)
{
auto bytecode = s_gs_bytecode_cache.find(*uid);
if (bytecode != s_gs_bytecode_cache.end())
return bytecode->second;
return D3D12_SHADER_BYTECODE();
}
D3D12_SHADER_BYTECODE ShaderCache::GetPixelShaderFromUid(const PixelShaderUid* uid)
{
auto bytecode = s_ps_bytecode_cache.find(*uid);
if (bytecode != s_ps_bytecode_cache.end())
return bytecode->second;
return D3D12_SHADER_BYTECODE();
}
D3D12_SHADER_BYTECODE ShaderCache::GetVertexShaderFromUid(const VertexShaderUid* uid)
{
auto bytecode = s_vs_bytecode_cache.find(*uid);
if (bytecode != s_vs_bytecode_cache.end())
return bytecode->second;
return D3D12_SHADER_BYTECODE();
}
}

View File

@ -1,51 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/CommonTypes.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/VertexShaderGen.h"
namespace DX12
{
class D3DBlob;
class ShaderCache final
{
public:
static void Init();
static void Clear();
static void Shutdown();
static void LoadAndSetActiveShaders(u32 gs_primitive_type);
template <class UidType, class ShaderCacheType>
static D3D12_SHADER_BYTECODE InsertByteCode(const UidType& uid, ShaderCacheType* shader_cache,
ID3DBlob* bytecode_blob);
static D3D12_SHADER_BYTECODE GetActiveGeometryShaderBytecode();
static D3D12_SHADER_BYTECODE GetActivePixelShaderBytecode();
static D3D12_SHADER_BYTECODE GetActiveVertexShaderBytecode();
static const GeometryShaderUid* GetActiveGeometryShaderUid();
static const PixelShaderUid* GetActivePixelShaderUid();
static const VertexShaderUid* GetActiveVertexShaderUid();
static D3D12_SHADER_BYTECODE GetGeometryShaderFromUid(const GeometryShaderUid* uid);
static D3D12_SHADER_BYTECODE GetPixelShaderFromUid(const PixelShaderUid* uid);
static D3D12_SHADER_BYTECODE GetVertexShaderFromUid(const VertexShaderUid* uid);
static D3D12_PRIMITIVE_TOPOLOGY_TYPE GetCurrentPrimitiveTopology();
private:
static void SetCurrentPrimitiveTopology(u32 gs_primitive_type);
static void HandleGSUIDChange(GeometryShaderUid gs_uid, u32 gs_primitive_type);
static void HandlePSUIDChange(PixelShaderUid ps_uid);
static void HandleVSUIDChange(VertexShaderUid vs_uid);
};
}

View File

@ -1,165 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <memory>
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/ShaderConstantsManager.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
enum SHADER_STAGE
{
SHADER_STAGE_GEOMETRY_SHADER = 0,
SHADER_STAGE_PIXEL_SHADER = 1,
SHADER_STAGE_VERTEX_SHADER = 2,
SHADER_STAGE_COUNT = 3
};
static std::array<std::unique_ptr<D3DStreamBuffer>, SHADER_STAGE_COUNT>
s_shader_constant_stream_buffers;
static const unsigned int s_shader_constant_buffer_padded_sizes[SHADER_STAGE_COUNT] = {
(sizeof(GeometryShaderConstants) + 0xff) & ~0xff, (sizeof(PixelShaderConstants) + 0xff) & ~0xff,
(sizeof(VertexShaderConstants) + 0xff) & ~0xff};
void ShaderConstantsManager::Init()
{
// Allow a large maximum size, as we want to minimize stalls here
std::generate(std::begin(s_shader_constant_stream_buffers),
std::end(s_shader_constant_stream_buffers), []() {
return std::make_unique<D3DStreamBuffer>(2 * 1024 * 1024, 64 * 1024 * 1024,
nullptr);
});
}
void ShaderConstantsManager::Shutdown()
{
for (auto& buffer : s_shader_constant_stream_buffers)
buffer.reset();
}
bool ShaderConstantsManager::LoadAndSetGeometryShaderConstants()
{
bool command_list_executed = false;
if (GeometryShaderManager::dirty)
{
command_list_executed =
s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]->AllocateSpaceInBuffer(
s_shader_constant_buffer_padded_sizes[SHADER_STAGE_GEOMETRY_SHADER],
0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about
// manually aligning offset.
);
memcpy(s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]
->GetCPUAddressOfCurrentAllocation(),
&GeometryShaderManager::constants, sizeof(GeometryShaderConstants));
GeometryShaderManager::dirty = false;
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants));
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV, true);
}
if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV))
{
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_GS_CBV, s_shader_constant_stream_buffers[SHADER_STAGE_GEOMETRY_SHADER]
->GetGPUAddressOfCurrentAllocation());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_GS_CBV, false);
}
return command_list_executed;
}
bool ShaderConstantsManager::LoadAndSetPixelShaderConstants()
{
bool command_list_executed = false;
if (PixelShaderManager::dirty)
{
command_list_executed =
s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]->AllocateSpaceInBuffer(
s_shader_constant_buffer_padded_sizes[SHADER_STAGE_PIXEL_SHADER],
0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about
// manually aligning offset.
);
memcpy(s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]
->GetCPUAddressOfCurrentAllocation(),
&PixelShaderManager::constants, sizeof(PixelShaderConstants));
PixelShaderManager::dirty = false;
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
}
if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV))
{
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, s_shader_constant_stream_buffers[SHADER_STAGE_PIXEL_SHADER]
->GetGPUAddressOfCurrentAllocation());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, false);
}
return command_list_executed;
}
bool ShaderConstantsManager::LoadAndSetVertexShaderConstants()
{
bool command_list_executed = false;
if (VertexShaderManager::dirty)
{
command_list_executed =
s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]->AllocateSpaceInBuffer(
s_shader_constant_buffer_padded_sizes[SHADER_STAGE_VERTEX_SHADER],
0 // The padded sizes are already aligned to 256 bytes, so don't need to worry about
// manually aligning offset.
);
memcpy(s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]
->GetCPUAddressOfCurrentAllocation(),
&VertexShaderManager::constants, sizeof(VertexShaderConstants));
VertexShaderManager::dirty = false;
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants));
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV, true);
}
if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV))
{
const D3D12_GPU_VIRTUAL_ADDRESS calculated_gpu_va =
s_shader_constant_stream_buffers[SHADER_STAGE_VERTEX_SHADER]
->GetGPUAddressOfCurrentAllocation();
D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_VS_CBV,
calculated_gpu_va);
if (g_ActiveConfig.bEnablePixelLighting)
D3D::current_command_list->SetGraphicsRootConstantBufferView(DESCRIPTOR_TABLE_PS_CBVTWO,
calculated_gpu_va);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VS_CBV, false);
}
return command_list_executed;
}
}

View File

@ -1,19 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
namespace DX12
{
class ShaderConstantsManager final
{
public:
static void Init();
static void Shutdown();
static bool LoadAndSetGeometryShaderConstants();
static bool LoadAndSetPixelShaderConstants();
static bool LoadAndSetVertexShaderConstants();
};
}

View File

@ -1,798 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "Common/StringUtil.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
// Pixel Shader blobs
static ID3DBlob* s_color_matrix_program_blob[2] = {};
static ID3DBlob* s_color_copy_program_blob[2] = {};
static ID3DBlob* s_depth_matrix_program_blob[2] = {};
static ID3DBlob* s_depth_resolve_to_color_program_blob = {};
static ID3DBlob* s_clear_program_blob = {};
static ID3DBlob* s_anaglyph_program_blob = {};
static ID3DBlob* s_xfb_encode_shader_blob = {};
static ID3DBlob* s_xfb_decode_shader_blob = {};
static ID3DBlob* s_rgba6_to_rgb8_program_blob[2] = {};
static ID3DBlob* s_rgb8_to_rgba6_program_blob[2] = {};
// Vertex Shader blobs/input layouts
static ID3DBlob* s_simple_vertex_shader_blob = {};
static ID3DBlob* s_simple_clear_vertex_shader_blob = {};
static const D3D12_INPUT_ELEMENT_DESC s_simple_vertex_shader_input_elements[] = {
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
0},
};
static const D3D12_INPUT_LAYOUT_DESC s_simple_vertex_shader_input_layout = {
s_simple_vertex_shader_input_elements, ARRAYSIZE(s_simple_vertex_shader_input_elements)};
static const D3D12_INPUT_ELEMENT_DESC s_clear_vertex_shader_input_elements[] = {
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
};
static const D3D12_INPUT_LAYOUT_DESC s_clear_vertex_shader_input_layout = {
s_clear_vertex_shader_input_elements, ARRAYSIZE(s_clear_vertex_shader_input_elements)};
// Geometry Shader blobs
static ID3DBlob* s_clear_geometry_shader_blob = nullptr;
static ID3DBlob* s_copy_geometry_shader_blob = nullptr;
// Pixel Shader HLSL
static constexpr const char s_clear_program_hlsl[] = {"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float4 incol0 : COLOR0){\n"
"ocol0 = incol0;\n"
"}\n"};
// EXISTINGD3D11TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA...
static constexpr const char s_color_copy_program_hlsl[] = {"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"ocol0 = Tex0.Sample(samp0,uv0);\n"
"}\n"};
// Anaglyph Red-Cyan shader based on Dubois algorithm
// Constants taken from the paper:
// "Conversion of a Stereo Pair to Anaglyph with
// the Least-Squares Projection Method"
// Eric Dubois, March 2009
static constexpr const char s_anaglyph_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"float4 c0 = Tex0.Sample(samp0, float3(uv0.xy, 0.0));\n"
"float4 c1 = Tex0.Sample(samp0, float3(uv0.xy, 1.0));\n"
"float3x3 l = float3x3( 0.437, 0.449, 0.164,\n"
" -0.062,-0.062,-0.024,\n"
" -0.048,-0.050,-0.017);\n"
"float3x3 r = float3x3(-0.011,-0.032,-0.007,\n"
" 0.377, 0.761, 0.009,\n"
" -0.026,-0.093, 1.234);\n"
"ocol0 = float4(mul(l, c0.rgb) + mul(r, c1.rgb), c0.a);\n"
"}\n"};
// TODO: Improve sampling algorithm!
static constexpr const char s_color_copy_program_msaa_hlsl[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"int width, height, slices, samples;\n"
"Tex0.GetDimensions(width, height, slices, samples);\n"
"ocol0 = 0;\n"
"for(int i = 0; i < SAMPLES; ++i)\n"
" ocol0 += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
"ocol0 /= SAMPLES;\n"
"}\n"};
static constexpr const char s_depth_resolve_to_color_program_hlsl[] = {
"#define SAMPLES %d\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"void main(\n"
" out float ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n"
" for(int i = 1; i < SAMPLES; ++i)\n"
" ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n"
"}\n"};
static constexpr const char s_color_matrix_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"float4 texcol = Tex0.Sample(samp0,uv0);\n"
"texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
static constexpr const char s_color_matrix_program_msaa_hlsl[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"int width, height, slices, samples;\n"
"Tex0.GetDimensions(width, height, slices, samples);\n"
"float4 texcol = 0;\n"
"for(int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
"texcol /= SAMPLES;\n"
"texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
static constexpr const char s_depth_matrix_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0){\n"
" float4 texcol = Tex0.Sample(samp0,uv0);\n"
" int depth = int((1.0 - texcol.x) * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n"
// Apply color matrix
" ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
static constexpr const char s_depth_matrix_program_msaa_hlsl[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0){\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" float4 texcol = 0;\n"
" for(int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
" texcol /= SAMPLES;\n"
" int depth = int((1.0 - texcol.x) * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n"
// Apply color matrix
" ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
static constexpr const char s_reint_rgba6_to_rgb8_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"void main(\n"
" out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n"
" int4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
" dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
" dst8.a = 255;\n"
" ocol0 = (float4)dst8 / 255.f;\n"
"}"};
static constexpr const char s_reint_rgba6_to_rgb8_program_msaa_hlsl[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"void main(\n"
" out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" float4 texcol = 0;\n"
" for (int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
" texcol /= SAMPLES;\n"
" int4 src6 = round(texcol * 63.f);\n"
" int4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
" dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"
" dst8.a = 255;\n"
" ocol0 = (float4)dst8 / 255.f;\n"
"}"};
static constexpr const char s_reint_rgb8_to_rgba6_program_hlsl[] = {
"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"void main(\n"
" out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n"
" int4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
" dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
" dst6.a = src8.b & 0x3F;\n"
" ocol0 = (float4)dst6 / 63.f;\n"
"}\n"};
static constexpr const char s_reint_rgb8_to_rgba6_program_msaa_hlsl[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"void main(\n"
" out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0)\n"
"{\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" float4 texcol = 0;\n"
" for (int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
" texcol /= SAMPLES;\n"
" int4 src8 = round(texcol * 255.f);\n"
" int4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
" dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"
" dst6.a = src8.b & 0x3F;\n"
" ocol0 = (float4)dst6 / 63.f;\n"
"}\n"};
// Vertex Shader HLSL
static constexpr const char s_simple_vertex_shader_hlsl[] = {
"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float3 vTexCoord : TEXCOORD0;\n"
"float vTexCoord1 : TEXCOORD1;\n"
"};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float4 inTEX0 : TEXCOORD0)\n"
"{\n"
"VSOUTPUT OUT;\n"
"OUT.vPosition = inPosition;\n"
"OUT.vTexCoord = inTEX0.xyz;\n"
"OUT.vTexCoord1 = inTEX0.w;\n"
"return OUT;\n"
"}\n"};
static constexpr const char s_clear_vertex_shader_hlsl[] = {
"struct VSOUTPUT\n"
"{\n"
"float4 vPosition : POSITION;\n"
"float4 vColor0 : COLOR0;\n"
"};\n"
"VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n"
"{\n"
"VSOUTPUT OUT;\n"
"OUT.vPosition = inPosition;\n"
"OUT.vColor0 = inColor0;\n"
"return OUT;\n"
"}\n"};
// Geometry Shader HLSL
static constexpr const char s_clear_geometry_shader_hlsl[] = {
"struct VSOUTPUT\n"
"{\n"
" float4 vPosition : POSITION;\n"
" float4 vColor0 : COLOR0;\n"
"};\n"
"struct GSOUTPUT\n"
"{\n"
" float4 vPosition : POSITION;\n"
" float4 vColor0 : COLOR0;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"};\n"
"[maxvertexcount(6)]\n"
"void main(triangle VSOUTPUT o[3], inout TriangleStream<GSOUTPUT> Output)\n"
"{\n"
"for(int slice = 0; slice < 2; slice++)\n"
"{\n"
" for(int i = 0; i < 3; i++)\n"
" {\n"
" GSOUTPUT OUT;\n"
" OUT.vPosition = o[i].vPosition;\n"
" OUT.vColor0 = o[i].vColor0;\n"
" OUT.slice = slice;\n"
" Output.Append(OUT);\n"
" }\n"
" Output.RestartStrip();\n"
"}\n"
"}\n"};
static constexpr const char s_copy_geometry_shader_hlsl[] = {
"struct VSOUTPUT\n"
"{\n"
" float4 vPosition : POSITION;\n"
" float3 vTexCoord : TEXCOORD0;\n"
" float vTexCoord1 : TEXCOORD1;\n"
"};\n"
"struct GSOUTPUT\n"
"{\n"
" float4 vPosition : POSITION;\n"
" float3 vTexCoord : TEXCOORD0;\n"
" float vTexCoord1 : TEXCOORD1;\n"
" uint slice : SV_RenderTargetArrayIndex;\n"
"};\n"
"[maxvertexcount(6)]\n"
"void main(triangle VSOUTPUT o[3], inout TriangleStream<GSOUTPUT> Output)\n"
"{\n"
"for(int slice = 0; slice < 2; slice++)\n"
"{\n"
" for(int i = 0; i < 3; i++)\n"
" {\n"
" GSOUTPUT OUT;\n"
" OUT.vPosition = o[i].vPosition;\n"
" OUT.vTexCoord = o[i].vTexCoord;\n"
" OUT.vTexCoord.z = slice;\n"
" OUT.vTexCoord1 = o[i].vTexCoord1;\n"
" OUT.slice = slice;\n"
" Output.Append(OUT);\n"
" }\n"
" Output.RestartStrip();\n"
"}\n"
"}\n"};
static const char s_xfb_encode_shader_hlsl[] = R"(
Texture2DArray tex0 : register(t0);
SamplerState samp0 : register(s0);
cbuffer EncodeParams : register(b0)
{
float4 srcRect;
float2 texelSize;
}
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
// <http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion>
static const float3x4 RGB_TO_YCBCR = float3x4(
0.257, 0.504, 0.098, 16.0/255.0,
-0.148, -0.291, 0.439, 128.0/255.0,
0.439, -0.368, -0.071, 128.0/255.0
);
void main(
out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float3 uv0 : TEXCOORD0,
in float gamma : TEXCOORD1)
{
// Load three input pixels, emulate clamp sampler by clamping to the source rectangle.
// Subtract 0.5 from the x coordinate because we're doubling the width, and want the pixel center shifted back to 0.5.
// The native resolution is used as a reference here so bilinear filtering works as expected.
float2 baseCoords = lerp(srcRect.xy, srcRect.zw, float2(uv0.x - 0.5 * texelSize.x, uv0.y));
float3 sampleL = tex0.Sample(samp0, float3(max(srcRect.xy, baseCoords - float2(texelSize.x, 0)), 0)).rgb;
float3 sampleM = tex0.Sample(samp0, float3(baseCoords, 0)).rgb;
float3 sampleR = tex0.Sample(samp0, float3(min(srcRect.zw, baseCoords + float2(texelSize.x, 0)), 0)).rgb;
// Gamma correction (gamma is already rcp(gamma))
// abs() here because the HLSL compiler throws a warning otherwise.
sampleL = pow(abs(sampleL), gamma);
sampleM = pow(abs(sampleM), gamma);
sampleR = pow(abs(sampleR), gamma);
// RGB -> YUV
float3 yuvL = mul(RGB_TO_YCBCR, float4(sampleL,1));
float3 yuvM = mul(RGB_TO_YCBCR, float4(sampleM,1));
float3 yuvR = mul(RGB_TO_YCBCR, float4(sampleR,1));
// The Y components correspond to two EFB pixels, while the U and V are
// made from a blend of three EFB pixels.
float y0 = yuvM.r;
float y1 = yuvR.r;
float u0 = 0.25*yuvL.g + 0.5*yuvM.g + 0.25*yuvR.g;
float v0 = 0.25*yuvL.b + 0.5*yuvM.b + 0.25*yuvR.b;
ocol0 = float4(y0, u0, y1, v0);
}
)";
static const char s_xfb_decode_shader_hlsl[] = R"(
Texture2DArray tex0 : register(t0);
static const float3x3 YCBCR_TO_RGB = float3x3(
1.164, 0.000, 1.596,
1.164, -0.392, -0.813,
1.164, 2.017, 0.000
);
void main(
out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float3 uv0 : TEXCOORD0)
{
// Divide coordinates by 2 due to half-width YUYV texure.
int2 ipos = int2(pos.xy);
int2 texpos = int2(ipos.x >> 1, ipos.y);
float4 yuyv = tex0.Load(int4(texpos, 0, 0));
// Select U for even pixels, V for odd pixels.
float y = lerp(yuyv.r, yuyv.b, float(ipos.x & 1));
// Recover RGB components
float3 yuv_601_sub = float3(y, yuyv.ga) - float3(16.0/255.0, 128.0/255.0, 128.0/255.0);
float3 rgb_601 = mul(YCBCR_TO_RGB, yuv_601_sub);
ocol0 = float4(rgb_601, 1);
}
)";
D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGBA6ToRGB8PixelShader(bool multisampled)
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
{
if (!s_rgba6_to_rgb8_program_blob[0])
{
D3D::CompilePixelShader(s_reint_rgba6_to_rgb8_program_hlsl, &s_rgba6_to_rgb8_program_blob[0]);
}
bytecode = {s_rgba6_to_rgb8_program_blob[0]->GetBufferPointer(),
s_rgba6_to_rgb8_program_blob[0]->GetBufferSize()};
return bytecode;
}
else if (!s_rgba6_to_rgb8_program_blob[1])
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_reint_rgba6_to_rgb8_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_rgba6_to_rgb8_program_blob[1]);
bytecode = {s_rgba6_to_rgb8_program_blob[1]->GetBufferPointer(),
s_rgba6_to_rgb8_program_blob[1]->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetReinterpRGB8ToRGBA6PixelShader(bool multisampled)
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
{
if (!s_rgb8_to_rgba6_program_blob[0])
{
D3D::CompilePixelShader(s_reint_rgb8_to_rgba6_program_hlsl, &s_rgb8_to_rgba6_program_blob[0]);
}
bytecode = {s_rgb8_to_rgba6_program_blob[0]->GetBufferPointer(),
s_rgb8_to_rgba6_program_blob[0]->GetBufferSize()};
return bytecode;
}
else if (!s_rgb8_to_rgba6_program_blob[1])
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_reint_rgb8_to_rgba6_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_rgb8_to_rgba6_program_blob[1]);
bytecode = {s_rgb8_to_rgba6_program_blob[1]->GetBufferPointer(),
s_rgb8_to_rgba6_program_blob[1]->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetColorCopyPixelShader(bool multisampled)
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
{
bytecode = {s_color_copy_program_blob[0]->GetBufferPointer(),
s_color_copy_program_blob[0]->GetBufferSize()};
}
else if (s_color_copy_program_blob[1])
{
bytecode = {s_color_copy_program_blob[1]->GetBufferPointer(),
s_color_copy_program_blob[1]->GetBufferSize()};
}
else
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_color_copy_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_color_copy_program_blob[1]);
bytecode = {s_color_copy_program_blob[1]->GetBufferPointer(),
s_color_copy_program_blob[1]->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthResolveToColorPixelShader()
{
D3D12_SHADER_BYTECODE bytecode = {};
if (s_depth_resolve_to_color_program_blob)
{
bytecode = {s_depth_resolve_to_color_program_blob->GetBufferPointer(),
s_depth_resolve_to_color_program_blob->GetBufferSize()};
}
else
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_depth_resolve_to_color_program_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_depth_resolve_to_color_program_blob);
bytecode = {s_depth_resolve_to_color_program_blob->GetBufferPointer(),
s_depth_resolve_to_color_program_blob->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetColorMatrixPixelShader(bool multisampled)
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
{
bytecode = {s_color_matrix_program_blob[0]->GetBufferPointer(),
s_color_matrix_program_blob[0]->GetBufferSize()};
}
else if (s_color_matrix_program_blob[1])
{
bytecode = {s_color_matrix_program_blob[1]->GetBufferPointer(),
s_color_matrix_program_blob[1]->GetBufferSize()};
}
else
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_color_matrix_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_color_matrix_program_blob[1]);
bytecode = {s_color_matrix_program_blob[1]->GetBufferPointer(),
s_color_matrix_program_blob[1]->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetDepthMatrixPixelShader(bool multisampled)
{
D3D12_SHADER_BYTECODE bytecode = {};
if (!multisampled || g_ActiveConfig.iMultisamples == 1)
{
bytecode = {s_depth_matrix_program_blob[0]->GetBufferPointer(),
s_depth_matrix_program_blob[0]->GetBufferSize()};
}
else if (s_depth_matrix_program_blob[1])
{
bytecode = {s_depth_matrix_program_blob[1]->GetBufferPointer(),
s_depth_matrix_program_blob[1]->GetBufferSize()};
}
else
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(s_depth_matrix_program_msaa_hlsl, g_ActiveConfig.iMultisamples);
D3D::CompilePixelShader(buf, &s_depth_matrix_program_blob[1]);
bytecode = {s_depth_matrix_program_blob[1]->GetBufferPointer(),
s_depth_matrix_program_blob[1]->GetBufferSize()};
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetClearPixelShader()
{
D3D12_SHADER_BYTECODE shader = {};
shader.BytecodeLength = s_clear_program_blob->GetBufferSize();
shader.pShaderBytecode = s_clear_program_blob->GetBufferPointer();
return shader;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetAnaglyphPixelShader()
{
D3D12_SHADER_BYTECODE shader = {};
shader.BytecodeLength = s_anaglyph_program_blob->GetBufferSize();
shader.pShaderBytecode = s_anaglyph_program_blob->GetBufferPointer();
return shader;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetSimpleVertexShader()
{
D3D12_SHADER_BYTECODE shader = {};
shader.BytecodeLength = s_simple_vertex_shader_blob->GetBufferSize();
shader.pShaderBytecode = s_simple_vertex_shader_blob->GetBufferPointer();
return shader;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetClearVertexShader()
{
D3D12_SHADER_BYTECODE shader = {};
shader.BytecodeLength = s_simple_clear_vertex_shader_blob->GetBufferSize();
shader.pShaderBytecode = s_simple_clear_vertex_shader_blob->GetBufferPointer();
return shader;
}
D3D12_INPUT_LAYOUT_DESC StaticShaderCache::GetSimpleVertexShaderInputLayout()
{
return s_simple_vertex_shader_input_layout;
}
D3D12_INPUT_LAYOUT_DESC StaticShaderCache::GetClearVertexShaderInputLayout()
{
return s_clear_vertex_shader_input_layout;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetClearGeometryShader()
{
D3D12_SHADER_BYTECODE bytecode = {};
if (g_ActiveConfig.iStereoMode > 0)
{
bytecode.BytecodeLength = s_clear_geometry_shader_blob->GetBufferSize();
bytecode.pShaderBytecode = s_clear_geometry_shader_blob->GetBufferPointer();
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetCopyGeometryShader()
{
D3D12_SHADER_BYTECODE bytecode = {};
if (g_ActiveConfig.iStereoMode > 0)
{
bytecode.BytecodeLength = s_copy_geometry_shader_blob->GetBufferSize();
bytecode.pShaderBytecode = s_copy_geometry_shader_blob->GetBufferPointer();
}
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBEncodePixelShader()
{
D3D12_SHADER_BYTECODE bytecode = {s_xfb_encode_shader_blob->GetBufferPointer(),
s_xfb_encode_shader_blob->GetBufferSize()};
return bytecode;
}
D3D12_SHADER_BYTECODE StaticShaderCache::GetXFBDecodePixelShader()
{
D3D12_SHADER_BYTECODE bytecode = {s_xfb_decode_shader_blob->GetBufferPointer(),
s_xfb_decode_shader_blob->GetBufferSize()};
return bytecode;
}
void StaticShaderCache::Init()
{
// Compile static pixel shaders
D3D::CompilePixelShader(s_clear_program_hlsl, &s_clear_program_blob);
D3D::CompilePixelShader(s_anaglyph_program_hlsl, &s_anaglyph_program_blob);
D3D::CompilePixelShader(s_color_copy_program_hlsl, &s_color_copy_program_blob[0]);
D3D::CompilePixelShader(s_color_matrix_program_hlsl, &s_color_matrix_program_blob[0]);
D3D::CompilePixelShader(s_depth_matrix_program_hlsl, &s_depth_matrix_program_blob[0]);
D3D::CompilePixelShader(s_xfb_encode_shader_hlsl, &s_xfb_encode_shader_blob);
D3D::CompilePixelShader(s_xfb_decode_shader_hlsl, &s_xfb_decode_shader_blob);
// Compile static vertex shaders
D3D::CompileVertexShader(s_simple_vertex_shader_hlsl, &s_simple_vertex_shader_blob);
D3D::CompileVertexShader(s_clear_vertex_shader_hlsl, &s_simple_clear_vertex_shader_blob);
// Compile static geometry shaders
D3D::CompileGeometryShader(s_clear_geometry_shader_hlsl, &s_clear_geometry_shader_blob);
D3D::CompileGeometryShader(s_copy_geometry_shader_hlsl, &s_copy_geometry_shader_blob);
}
// Call this when multisampling mode changes, and shaders need to be regenerated.
void StaticShaderCache::InvalidateMSAAShaders()
{
SAFE_RELEASE(s_color_copy_program_blob[1]);
SAFE_RELEASE(s_color_matrix_program_blob[1]);
SAFE_RELEASE(s_depth_matrix_program_blob[1]);
SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[1]);
SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[1]);
SAFE_RELEASE(s_depth_resolve_to_color_program_blob);
}
void StaticShaderCache::Shutdown()
{
// Free pixel shader blobs
SAFE_RELEASE(s_xfb_decode_shader_blob);
SAFE_RELEASE(s_xfb_encode_shader_blob);
SAFE_RELEASE(s_clear_program_blob);
SAFE_RELEASE(s_anaglyph_program_blob);
SAFE_RELEASE(s_depth_resolve_to_color_program_blob);
for (unsigned int i = 0; i < 2; ++i)
{
SAFE_RELEASE(s_color_copy_program_blob[i]);
SAFE_RELEASE(s_color_matrix_program_blob[i]);
SAFE_RELEASE(s_depth_matrix_program_blob[i]);
SAFE_RELEASE(s_rgba6_to_rgb8_program_blob[i]);
SAFE_RELEASE(s_rgb8_to_rgba6_program_blob[i]);
}
// Free vertex shader blobs
SAFE_RELEASE(s_simple_vertex_shader_blob);
SAFE_RELEASE(s_simple_clear_vertex_shader_blob);
// Free geometry shader blobs
SAFE_RELEASE(s_clear_geometry_shader_blob);
SAFE_RELEASE(s_copy_geometry_shader_blob);
}
}

View File

@ -1,40 +0,0 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "VideoBackends/D3D12/D3DBase.h"
namespace DX12
{
class StaticShaderCache final
{
public:
static void Init();
static void InvalidateMSAAShaders();
static void Shutdown();
// Pixel shaders
static D3D12_SHADER_BYTECODE GetColorMatrixPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetColorCopyPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetDepthMatrixPixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetDepthResolveToColorPixelShader();
static D3D12_SHADER_BYTECODE GetClearPixelShader();
static D3D12_SHADER_BYTECODE GetAnaglyphPixelShader();
static D3D12_SHADER_BYTECODE GetReinterpRGBA6ToRGB8PixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetReinterpRGB8ToRGBA6PixelShader(bool multisampled);
static D3D12_SHADER_BYTECODE GetXFBEncodePixelShader();
static D3D12_SHADER_BYTECODE GetXFBDecodePixelShader();
// Vertex shaders
static D3D12_SHADER_BYTECODE GetSimpleVertexShader();
static D3D12_SHADER_BYTECODE GetClearVertexShader();
static D3D12_INPUT_LAYOUT_DESC GetSimpleVertexShaderInputLayout();
static D3D12_INPUT_LAYOUT_DESC GetClearVertexShaderInputLayout();
// Geometry shaders
static D3D12_SHADER_BYTECODE GetClearGeometryShader();
static D3D12_SHADER_BYTECODE GetCopyGeometryShader();
};
}

View File

@ -1,667 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/TextureCache.h"
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DDescriptorHeapManager.h"
#include "VideoBackends/D3D12/D3DShader.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/PSTextureEncoder.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
static std::unique_ptr<PSTextureEncoder> s_encoder = nullptr;
static std::unique_ptr<D3DStreamBuffer> s_efb_copy_stream_buffer = nullptr;
static u32 s_efb_copy_last_cbuf_id = UINT_MAX;
static ID3D12Resource* s_texture_cache_entry_readback_buffer = nullptr;
static size_t s_texture_cache_entry_readback_buffer_size = 0;
static DXGI_FORMAT GetDXGIFormatForHostFormat(HostTextureFormat format)
{
switch (format)
{
case HostTextureFormat::DXT1:
return DXGI_FORMAT_BC1_UNORM;
case HostTextureFormat::DXT3:
return DXGI_FORMAT_BC2_UNORM;
case HostTextureFormat::DXT5:
return DXGI_FORMAT_BC3_UNORM;
case HostTextureFormat::RGBA8:
default:
return DXGI_FORMAT_R8G8B8A8_UNORM;
}
}
TextureCache::TCacheEntry::~TCacheEntry()
{
m_texture->Release();
}
void TextureCache::TCacheEntry::Bind(unsigned int stage)
{
// Textures bound as group in TextureCache::BindTextures method.
}
bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int level)
{
u32 level_width = std::max(config.width >> level, 1u);
u32 level_height = std::max(config.height >> level, 1u);
size_t level_pitch =
Common::AlignUp(level_width * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
size_t required_readback_buffer_size = level_pitch * level_height;
// We can't dump compressed textures currently (it would mean drawing them to a RGBA8
// framebuffer, and saving that). TextureCache does not call Save for custom textures
// anyway, so this is fine for now.
_assert_(config.format == HostTextureFormat::RGBA8);
// Check if the current readback buffer is large enough
if (required_readback_buffer_size > s_texture_cache_entry_readback_buffer_size)
{
// Reallocate the buffer with the new size. Safe to immediately release because we're the only
// user and we block until completion.
if (s_texture_cache_entry_readback_buffer)
s_texture_cache_entry_readback_buffer->Release();
s_texture_cache_entry_readback_buffer_size = required_readback_buffer_size;
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(s_texture_cache_entry_readback_buffer_size),
D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
IID_PPV_ARGS(&s_texture_cache_entry_readback_buffer)));
}
m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D12_TEXTURE_COPY_LOCATION dst_location = {};
dst_location.pResource = s_texture_cache_entry_readback_buffer;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dst_location.PlacedFootprint.Offset = 0;
dst_location.PlacedFootprint.Footprint.Depth = 1;
dst_location.PlacedFootprint.Footprint.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
dst_location.PlacedFootprint.Footprint.Width = level_width;
dst_location.PlacedFootprint.Footprint.Height = level_height;
dst_location.PlacedFootprint.Footprint.RowPitch = static_cast<UINT>(level_pitch);
D3D12_TEXTURE_COPY_LOCATION src_location =
CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), level);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, nullptr);
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Map readback buffer and save to file.
void* readback_texture_map;
D3D12_RANGE read_range = {0, required_readback_buffer_size};
CheckHR(s_texture_cache_entry_readback_buffer->Map(0, &read_range, &readback_texture_map));
bool saved = TextureToPng(static_cast<u8*>(readback_texture_map),
dst_location.PlacedFootprint.Footprint.RowPitch, filename,
dst_location.PlacedFootprint.Footprint.Width,
dst_location.PlacedFootprint.Footprint.Height);
D3D12_RANGE write_range = {};
s_texture_cache_entry_readback_buffer->Unmap(0, &write_range);
return saved;
}
void TextureCache::TCacheEntry::CopyRectangleFromTexture(const TCacheEntryBase* source,
const MathUtil::Rectangle<int>& src_rect,
const MathUtil::Rectangle<int>& dst_rect)
{
const TCacheEntry* srcentry = reinterpret_cast<const TCacheEntry*>(source);
if (src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight())
{
// These assertions should hold true unless the base code is passing us sizes too large, in
// which case it should be fixed instead.
_assert_msg_(VIDEO, static_cast<u32>(src_rect.GetWidth()) <= source->config.width &&
static_cast<u32>(src_rect.GetHeight()) <= source->config.height,
"Source rect is too large for CopyRectangleFromTexture");
_assert_msg_(VIDEO, static_cast<u32>(dst_rect.GetWidth()) <= config.width &&
static_cast<u32>(dst_rect.GetHeight()) <= config.height,
"Dest rect is too large for CopyRectangleFromTexture");
CD3DX12_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom,
srcentry->config.layers);
D3D12_TEXTURE_COPY_LOCATION dst_location =
CD3DX12_TEXTURE_COPY_LOCATION(m_texture->GetTex12(), 0);
D3D12_TEXTURE_COPY_LOCATION src_location =
CD3DX12_TEXTURE_COPY_LOCATION(srcentry->m_texture->GetTex12(), 0);
m_texture->TransitionToResourceState(D3D::current_command_list, D3D12_RESOURCE_STATE_COPY_DEST);
srcentry->m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D::current_command_list->CopyTextureRegion(&dst_location, dst_rect.left, dst_rect.top, 0,
&src_location, &src_box);
m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
srcentry->m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
return;
}
else if (!config.rendertarget)
{
return;
}
D3D::SetViewportAndScissor(dst_rect.left, dst_rect.top, dst_rect.GetWidth(),
dst_rect.GetHeight());
m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr);
D3D::SetLinearCopySampler();
D3D12_RECT src_rc;
src_rc.left = src_rect.left;
src_rc.right = src_rect.right;
src_rc.top = src_rect.top;
src_rc.bottom = src_rect.bottom;
D3D::DrawShadedTexQuad(
srcentry->m_texture, &src_rc, srcentry->config.width, srcentry->config.height,
StaticShaderCache::GetColorCopyPixelShader(false), StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), D3D12_SHADER_BYTECODE(), 1.0, 0,
DXGI_FORMAT_R8G8B8A8_UNORM, false, m_texture->GetMultisampled());
m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
g_renderer->RestoreAPIState();
}
void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_length,
const u8* buffer, size_t buffer_size)
{
size_t src_pitch = CalculateHostTextureLevelPitch(config.format, row_length);
D3D::ReplaceRGBATexture2D(m_texture->GetTex12(), buffer, width, height,
static_cast<unsigned int>(src_pitch), level,
m_texture->GetResourceUsageState());
}
TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConfig& config)
{
DXGI_FORMAT dxgi_format = GetDXGIFormatForHostFormat(config.format);
if (config.rendertarget)
{
D3DTexture2D* texture =
D3DTexture2D::Create(config.width, config.height,
TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
dxgi_format, 1, config.layers);
TCacheEntry* entry = new TCacheEntry(config, texture);
entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU();
entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU();
entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow();
return entry;
}
else
{
ID3D12Resource* texture_resource = nullptr;
D3D12_RESOURCE_DESC texture_resource_desc =
CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, config.width, config.height, 1, config.levels);
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC(texture_resource_desc), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
nullptr, IID_PPV_ARGS(&texture_resource)));
D3DTexture2D* texture =
new D3DTexture2D(texture_resource, TEXTURE_BIND_FLAG_SHADER_RESOURCE, DXGI_FORMAT_UNKNOWN,
DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, false,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
TCacheEntry* const entry = new TCacheEntry(config, texture);
entry->m_texture_srv_cpu_handle = texture->GetSRV12CPU();
entry->m_texture_srv_gpu_handle = texture->GetSRV12GPU();
entry->m_texture_srv_gpu_handle_cpu_shadow = texture->GetSRV12GPUCPUShadow();
// EXISTINGD3D11TODO: better debug names
D3D::SetDebugObjectName12(entry->m_texture->GetTex12(), "a texture of the TextureCache");
SAFE_RELEASE(texture_resource);
return entry;
}
}
void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& srcRect,
bool scale_by_half, unsigned int cbuf_id,
const float* colmat)
{
// When copying at half size, in multisampled mode, resolve the color/depth buffer first.
// This is because multisampled texture reads go through Load, not Sample, and the linear
// filter is ignored.
bool multisampled = (g_ActiveConfig.iMultisamples > 1);
D3DTexture2D* efb_tex = is_depth_copy ? FramebufferManager::GetEFBDepthTexture() :
FramebufferManager::GetEFBColorTexture();
if (multisampled && scale_by_half)
{
multisampled = false;
efb_tex = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture() :
FramebufferManager::GetResolvedEFBColorTexture();
}
// set transformation
if (s_efb_copy_last_cbuf_id != cbuf_id)
{
s_efb_copy_stream_buffer->AllocateSpaceInBuffer(28 * sizeof(float), 256);
memcpy(s_efb_copy_stream_buffer->GetCPUAddressOfCurrentAllocation(), colmat,
28 * sizeof(float));
s_efb_copy_last_cbuf_id = cbuf_id;
}
// stretch picture with increased internal resolution
D3D::SetViewportAndScissor(0, 0, config.width, config.height);
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, s_efb_copy_stream_buffer->GetGPUAddressOfCurrentAllocation());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect);
// EXISTINGD3D11TODO: try targetSource.asRECT();
const D3D12_RECT sourcerect =
CD3DX12_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom);
// Use linear filtering if (bScaleByHalf), use point filtering otherwise
if (scale_by_half)
D3D::SetLinearCopySampler();
else
D3D::SetPointCopySampler();
// Make sure we don't draw with the texture set as both a source and target.
// (This can happen because we don't unbind textures when we free them.)
m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &m_texture->GetRTV12(), FALSE, nullptr);
// Create texture copy
D3D::DrawShadedTexQuad(
efb_tex, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
is_depth_copy ? StaticShaderCache::GetDepthMatrixPixelShader(multisampled) :
StaticShaderCache::GetColorMatrixPixelShader(multisampled),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
StaticShaderCache::GetCopyGeometryShader(), 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, false,
m_texture->GetMultisampled());
m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
g_renderer->RestoreAPIState();
}
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half)
{
s_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride,
is_depth_copy, src_rect, scale_by_half);
}
static const constexpr char s_palette_shader_hlsl[] =
R"HLSL(
sampler samp0 : register(s0);
Texture2DArray Tex0 : register(t0);
Buffer<uint> Tex1 : register(t1);
uniform float Multiply;
uint Convert3To8(uint v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
uint Convert4To8(uint v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
uint Convert5To8(uint v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
uint Convert6To8(uint v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
float4 DecodePixel_RGB5A3(uint val)
{
int r,g,b,a;
if ((val&0x8000))
{
r=Convert5To8((val>>10) & 0x1f);
g=Convert5To8((val>>5 ) & 0x1f);
b=Convert5To8((val ) & 0x1f);
a=0xFF;
}
else
{
a=Convert3To8((val>>12) & 0x7);
r=Convert4To8((val>>8 ) & 0xf);
g=Convert4To8((val>>4 ) & 0xf);
b=Convert4To8((val ) & 0xf);
}
return float4(r, g, b, a) / 255;
}
float4 DecodePixel_RGB565(uint val)
{
int r, g, b, a;
r = Convert5To8((val >> 11) & 0x1f);
g = Convert6To8((val >> 5) & 0x3f);
b = Convert5To8((val) & 0x1f);
a = 0xFF;
return float4(r, g, b, a) / 255;
}
float4 DecodePixel_IA8(uint val)
{
int i = val & 0xFF;
int a = val >> 8;
return float4(i, i, i, a) / 255;
}
void main(
out float4 ocol0 : SV_Target,
in float4 pos : SV_Position,
in float3 uv0 : TEXCOORD0)
{
uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r;
src = Tex1.Load(src);
src = ((src << 8) & 0xFF00) | (src >> 8);
ocol0 = DECODE(src);
}
)HLSL";
void TextureCache::ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted,
void* palette, TlutFormat format)
{
const unsigned int palette_buffer_allocation_size = 512;
m_palette_stream_buffer->AllocateSpaceInBuffer(palette_buffer_allocation_size, 256);
memcpy(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation(), palette,
palette_buffer_allocation_size);
// stretch picture with increased internal resolution
D3D::SetViewportAndScissor(0, 0, unconverted->config.width, unconverted->config.height);
// D3D12: Because the second SRV slot is occupied by this buffer, and an arbitrary texture
// occupies the first SRV slot,
// we need to allocate temporary space out of our descriptor heap, place the palette SRV in the
// second slot, then copy the
// existing texture's descriptor into the first slot.
// First, allocate the (temporary) space in the descriptor heap.
D3D12_CPU_DESCRIPTOR_HANDLE srv_group_cpu_handle[2] = {};
D3D12_GPU_DESCRIPTOR_HANDLE srv_group_gpu_handle[2] = {};
D3D::gpu_descriptor_heap_mgr->AllocateGroup(srv_group_cpu_handle, 2, srv_group_gpu_handle,
nullptr, true);
srv_group_cpu_handle[1].ptr = srv_group_cpu_handle[0].ptr + D3D::resource_descriptor_size;
// Now, create the palette SRV at the appropriate offset.
D3D12_SHADER_RESOURCE_VIEW_DESC palette_buffer_srv_desc = {
DXGI_FORMAT_R16_UINT, // DXGI_FORMAT Format;
D3D12_SRV_DIMENSION_BUFFER, // D3D12_SRV_DIMENSION ViewDimension;
D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING // UINT Shader4ComponentMapping;
};
// Each 'element' is two bytes since format is R16.
palette_buffer_srv_desc.Buffer.FirstElement =
m_palette_stream_buffer->GetOffsetOfCurrentAllocation() / sizeof(u16);
palette_buffer_srv_desc.Buffer.NumElements = 256;
D3D::device12->CreateShaderResourceView(m_palette_stream_buffer->GetBuffer(),
&palette_buffer_srv_desc, srv_group_cpu_handle[1]);
// Now, copy the existing texture's descriptor into the new temporary location.
static_cast<TCacheEntry*>(unconverted)
->m_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
D3D::device12->CopyDescriptorsSimple(
1, srv_group_cpu_handle[0],
static_cast<TCacheEntry*>(unconverted)->m_texture->GetSRV12GPUCPUShadow(),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
// Finally, bind our temporary location.
D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV,
srv_group_gpu_handle[0]);
// D3D11EXISTINGTODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
// D3D12: See TextureCache::TextureCache() - because there are only two possible buffer contents
// here,
// just pre-populate the data in two parts of the same upload heap.
if ((unconverted->format & 0xf) == GX_TF_I4)
{
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress());
}
else
{
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, m_palette_uniform_buffer->GetGPUVirtualAddress() + 256);
}
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
const D3D12_RECT source_rect =
CD3DX12_RECT(0, 0, unconverted->config.width, unconverted->config.height);
D3D::SetPointCopySampler();
// Make sure we don't draw with the texture set as both a source and target.
// (This can happen because we don't unbind textures when we free them.)
static_cast<TCacheEntry*>(entry)->m_texture->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(
1, &static_cast<TCacheEntry*>(entry)->m_texture->GetRTV12(), FALSE, nullptr);
// Create texture copy
D3D::DrawShadedTexQuad(
static_cast<TCacheEntry*>(unconverted)->m_texture, &source_rect, unconverted->config.width,
unconverted->config.height, m_palette_pixel_shaders[format],
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(),
StaticShaderCache::GetCopyGeometryShader(), 1.0f, 0, DXGI_FORMAT_R8G8B8A8_UNORM, true,
static_cast<TCacheEntry*>(entry)->m_texture->GetMultisampled());
static_cast<TCacheEntry*>(entry)->m_texture->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
FramebufferManager::GetEFBColorTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_RENDER_TARGET);
FramebufferManager::GetEFBDepthTexture()->TransitionToResourceState(
D3D::current_command_list, D3D12_RESOURCE_STATE_DEPTH_WRITE);
g_renderer->RestoreAPIState();
}
D3D12_SHADER_BYTECODE GetConvertShader12(const std::string& Type)
{
std::string shader = "#define DECODE DecodePixel_";
shader.append(Type);
shader.append("\n");
shader.append(s_palette_shader_hlsl);
ID3DBlob* blob = nullptr;
D3D::CompilePixelShader(shader, &blob);
return {blob->GetBufferPointer(), blob->GetBufferSize()};
}
TextureCache::TextureCache()
{
s_encoder = std::make_unique<PSTextureEncoder>();
s_encoder->Init();
s_efb_copy_stream_buffer = std::make_unique<D3DStreamBuffer>(1024 * 1024, 1024 * 1024, nullptr);
s_efb_copy_last_cbuf_id = UINT_MAX;
s_texture_cache_entry_readback_buffer = nullptr;
s_texture_cache_entry_readback_buffer_size = 0;
m_palette_pixel_shaders[GX_TL_IA8] = GetConvertShader12(std::string("IA8"));
m_palette_pixel_shaders[GX_TL_RGB565] = GetConvertShader12(std::string("RGB565"));
m_palette_pixel_shaders[GX_TL_RGB5A3] = GetConvertShader12(std::string("RGB5A3"));
m_palette_stream_buffer = std::make_unique<D3DStreamBuffer>(
sizeof(u16) * 256 * 1024, sizeof(u16) * 256 * 1024 * 16, nullptr);
// Right now, there are only two variants of palette_uniform data. So, we'll just create an upload
// heap to permanently store both of these.
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(
((16 + 255) & ~255) *
2), // Constant Buffers have to be 256b aligned. "* 2" to create for two sets of data.
D3D12_RESOURCE_STATE_COPY_DEST,
nullptr, IID_PPV_ARGS(&m_palette_uniform_buffer)));
D3D::SetDebugObjectName12(m_palette_uniform_buffer,
"a constant buffer used in TextureCache::ConvertTexture");
// Temporarily repurpose m_palette_stream_buffer as a copy source to populate initial data here.
m_palette_stream_buffer->AllocateSpaceInBuffer(256 * 2, 256);
u8* upload_heap_data_location =
reinterpret_cast<u8*>(m_palette_stream_buffer->GetCPUAddressOfCurrentAllocation());
memset(upload_heap_data_location, 0, 256 * 2);
float paramsFormatZero[4] = {15.f};
float paramsFormatNonzero[4] = {255.f};
memcpy(upload_heap_data_location, paramsFormatZero, sizeof(paramsFormatZero));
memcpy(upload_heap_data_location + 256, paramsFormatNonzero, sizeof(paramsFormatNonzero));
D3D::current_command_list->CopyBufferRegion(
m_palette_uniform_buffer, 0, m_palette_stream_buffer->GetBuffer(),
m_palette_stream_buffer->GetOffsetOfCurrentAllocation(), 256 * 2);
DX12::D3D::ResourceBarrier(D3D::current_command_list, m_palette_uniform_buffer,
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, 0);
}
TextureCache::~TextureCache()
{
s_encoder->Shutdown();
s_encoder.reset();
s_efb_copy_stream_buffer.reset();
if (s_texture_cache_entry_readback_buffer)
{
// Safe to destroy the readback buffer immediately, as the only time it's used is blocked until
// completion.
s_texture_cache_entry_readback_buffer->Release();
s_texture_cache_entry_readback_buffer = nullptr;
s_texture_cache_entry_readback_buffer_size = 0;
}
D3D::command_list_mgr->DestroyResourceAfterCurrentCommandListExecuted(m_palette_uniform_buffer);
}
void TextureCache::BindTextures()
{
unsigned int last_texture = 0;
for (unsigned int i = 0; i < 8; ++i)
{
if (bound_textures[i] != nullptr)
{
last_texture = i;
}
}
if (last_texture == 0 && bound_textures[0] != nullptr)
{
DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(
DESCRIPTOR_TABLE_PS_SRV,
reinterpret_cast<TCacheEntry*>(bound_textures[0])->m_texture_srv_gpu_handle);
return;
}
// If more than one texture, allocate space for group.
D3D12_CPU_DESCRIPTOR_HANDLE s_group_base_texture_cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE s_group_base_texture_gpu_handle;
DX12::D3D::gpu_descriptor_heap_mgr->AllocateGroup(
&s_group_base_texture_cpu_handle, 8, &s_group_base_texture_gpu_handle, nullptr, true);
for (unsigned int stage = 0; stage < 8; stage++)
{
if (bound_textures[stage] != nullptr)
{
D3D12_CPU_DESCRIPTOR_HANDLE textureDestDescriptor;
textureDestDescriptor.ptr =
s_group_base_texture_cpu_handle.ptr + stage * D3D::resource_descriptor_size;
DX12::D3D::device12->CopyDescriptorsSimple(
1, textureDestDescriptor, reinterpret_cast<TCacheEntry*>(bound_textures[stage])
->m_texture_srv_gpu_handle_cpu_shadow,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
else
{
D3D12_CPU_DESCRIPTOR_HANDLE nullDestDescriptor;
nullDestDescriptor.ptr =
s_group_base_texture_cpu_handle.ptr + stage * D3D::resource_descriptor_size;
DX12::D3D::device12->CopyDescriptorsSimple(1, nullDestDescriptor,
DX12::D3D::null_srv_cpu_shadow,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
}
// Actually bind the textures.
DX12::D3D::current_command_list->SetGraphicsRootDescriptorTable(DESCRIPTOR_TABLE_PS_SRV,
s_group_base_texture_gpu_handle);
}
}

View File

@ -1,75 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoCommon/TextureCacheBase.h"
namespace DX12
{
class D3DStreamBuffer;
class TextureCache final : public TextureCacheBase
{
public:
TextureCache();
~TextureCache();
virtual void BindTextures();
private:
struct TCacheEntry : TCacheEntryBase
{
D3DTexture2D* const m_texture = nullptr;
D3D12_CPU_DESCRIPTOR_HANDLE m_texture_srv_cpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE m_texture_srv_gpu_handle = {};
D3D12_CPU_DESCRIPTOR_HANDLE m_texture_srv_gpu_handle_cpu_shadow = {};
TCacheEntry(const TCacheEntryConfig& config, D3DTexture2D* tex)
: TCacheEntryBase(config), m_texture(tex)
{
}
~TCacheEntry();
void CopyRectangleFromTexture(const TCacheEntryBase* source,
const MathUtil::Rectangle<int>& src_rect,
const MathUtil::Rectangle<int>& dst_rect) override;
void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer,
size_t buffer_size) override;
void FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat) override;
void Bind(unsigned int stage) override;
bool Save(const std::string& filename, unsigned int level) override;
};
TCacheEntryBase* CreateTexture(const TCacheEntryConfig& config) override;
u64 EncodeToRamFromTexture(u32 address, void* source_texture, u32 source_width, u32 source_height,
bool is_from_z_buffer, bool is_intensity_format, u32 copy_format,
int scale_by_half, const EFBRectangle& source)
{
return 0;
};
void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
TlutFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half) override;
bool CompileShaders() override { return true; }
void DeleteShaders() override {}
std::unique_ptr<D3DStreamBuffer> m_palette_stream_buffer;
ID3D12Resource* m_palette_uniform_buffer = nullptr;
D3D12_SHADER_BYTECODE m_palette_pixel_shaders[3] = {};
};
}

View File

@ -1,221 +0,0 @@
// Copyright 2010 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/VertexManager.h"
#include "Common/CommonTypes.h"
#include "VideoBackends/D3D12/BoundingBox.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
static constexpr unsigned int MAX_IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16;
static constexpr unsigned int MAX_VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 4;
void VertexManager::SetIndexBuffer()
{
D3D12_INDEX_BUFFER_VIEW ib_view = {
m_index_stream_buffer->GetBaseGPUAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
static_cast<UINT>(m_index_stream_buffer->GetSize()), // UINT SizeInBytes;
DXGI_FORMAT_R16_UINT // DXGI_FORMAT Format;
};
D3D::current_command_list->IASetIndexBuffer(&ib_view);
}
void VertexManager::CreateDeviceObjects()
{
m_vertex_draw_offset = 0;
m_index_draw_offset = 0;
m_vertex_stream_buffer = std::make_unique<D3DStreamBuffer>(MAXVBUFFERSIZE * 2, MAX_VBUFFER_SIZE,
&m_vertex_stream_buffer_reallocated);
m_index_stream_buffer = std::make_unique<D3DStreamBuffer>(MAXIBUFFERSIZE * sizeof(u16) * 2,
MAXIBUFFERSIZE * sizeof(u16) * 16,
&m_index_stream_buffer_reallocated);
SetIndexBuffer();
// Use CPU-only memory if the GPU won't be reading from the buffers,
// since reading upload heaps on the CPU is slow..
m_vertex_cpu_buffer.resize(MAXVBUFFERSIZE);
m_index_cpu_buffer.resize(MAXIBUFFERSIZE);
}
void VertexManager::DestroyDeviceObjects()
{
m_vertex_stream_buffer.reset();
m_index_stream_buffer.reset();
m_vertex_cpu_buffer.clear();
m_index_cpu_buffer.clear();
}
VertexManager::VertexManager()
{
CreateDeviceObjects();
}
VertexManager::~VertexManager()
{
DestroyDeviceObjects();
}
void VertexManager::PrepareDrawBuffers(u32 stride)
{
u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride;
u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16);
m_vertex_stream_buffer->OverrideSizeOfPreviousAllocation(vertex_data_size);
m_index_stream_buffer->OverrideSizeOfPreviousAllocation(index_data_size);
ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size);
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
}
void VertexManager::Draw(u32 stride)
{
static u32 s_previous_stride = UINT_MAX;
u32 indices = IndexGenerator::GetIndexLen();
if (D3D::command_list_mgr->GetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER) ||
s_previous_stride != stride)
{
D3D12_VERTEX_BUFFER_VIEW vb_view = {
m_vertex_stream_buffer->GetBaseGPUAddress(), // D3D12_GPU_VIRTUAL_ADDRESS BufferLocation;
static_cast<UINT>(m_vertex_stream_buffer->GetSize()), // UINT SizeInBytes;
stride // UINT StrideInBytes;
};
D3D::current_command_list->IASetVertexBuffers(0, 1, &vb_view);
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, false);
s_previous_stride = stride;
}
D3D_PRIMITIVE_TOPOLOGY d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
switch (m_current_primitive_type)
{
case PRIMITIVE_POINTS:
d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
case PRIMITIVE_LINES:
d3d_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
break;
}
if (D3D::command_list_mgr->GetCommandListPrimitiveTopology() != d3d_primitive_topology)
{
D3D::current_command_list->IASetPrimitiveTopology(d3d_primitive_topology);
D3D::command_list_mgr->SetCommandListPrimitiveTopology(d3d_primitive_topology);
}
u32 base_vertex = m_vertex_draw_offset / stride;
u32 start_index = m_index_draw_offset / sizeof(u16);
D3D::current_command_list->DrawIndexedInstanced(indices, 1, start_index, base_vertex, 0);
INCSTAT(stats.thisFrame.numDrawCalls);
}
void VertexManager::vFlush()
{
ShaderCache::LoadAndSetActiveShaders(m_current_primitive_type);
if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active)
BBox::Invalidate();
u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride();
PrepareDrawBuffers(stride);
g_renderer->ApplyState();
Draw(stride);
D3D::command_list_mgr->m_draws_since_last_execution++;
// Many Gamecube/Wii titles read from the EFB each frame to determine what new rendering work to
// submit, e.g. where sun rays are
// occluded and where they aren't. When the CPU wants to read this data (done in
// Renderer::AccessEFB), it requires that the GPU
// finish all oustanding work. As an optimization, when we detect that the CPU is likely to read
// back data this frame, we break
// up the rendering work and submit it more frequently to the GPU (via ExecuteCommandList). Thus,
// when the CPU finally needs the
// the GPU to finish all of its work, there is (hopefully) less work outstanding to wait on at
// that moment.
// D3D12TODO: Decide right threshold for drawCountSinceAsyncFlush at runtime depending on
// amount of stall measured in AccessEFB.
// We can't do this with perf queries enabled since it can leave queries open.
if (D3D::command_list_mgr->m_cpu_access_last_frame &&
D3D::command_list_mgr->m_draws_since_last_execution > 100 && !PerfQueryBase::ShouldEmulate())
{
D3D::command_list_mgr->m_draws_since_last_execution = 0;
D3D::command_list_mgr->ExecuteQueuedWork();
}
}
void VertexManager::ResetBuffer(u32 stride)
{
if (m_cull_all)
{
m_cur_buffer_pointer = m_vertex_cpu_buffer.data();
m_base_buffer_pointer = m_vertex_cpu_buffer.data();
m_end_buffer_pointer = m_vertex_cpu_buffer.data() + MAXVBUFFERSIZE;
IndexGenerator::Start(reinterpret_cast<u16*>(m_index_cpu_buffer.data()));
return;
}
m_vertex_stream_buffer->AllocateSpaceInBuffer(MAXVBUFFERSIZE, stride);
if (m_vertex_stream_buffer_reallocated)
{
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_VERTEX_BUFFER, true);
m_vertex_stream_buffer_reallocated = false;
}
m_base_buffer_pointer = static_cast<u8*>(m_vertex_stream_buffer->GetBaseCPUAddress());
m_end_buffer_pointer = m_base_buffer_pointer + m_vertex_stream_buffer->GetSize();
m_cur_buffer_pointer =
static_cast<u8*>(m_vertex_stream_buffer->GetCPUAddressOfCurrentAllocation());
m_vertex_draw_offset = static_cast<u32>(m_vertex_stream_buffer->GetOffsetOfCurrentAllocation());
m_index_stream_buffer->AllocateSpaceInBuffer(MAXIBUFFERSIZE * sizeof(u16), sizeof(u16));
if (m_index_stream_buffer_reallocated)
{
SetIndexBuffer();
m_index_stream_buffer_reallocated = false;
}
m_index_draw_offset = static_cast<u32>(m_index_stream_buffer->GetOffsetOfCurrentAllocation());
IndexGenerator::Start(
static_cast<u16*>(m_index_stream_buffer->GetCPUAddressOfCurrentAllocation()));
}
} // namespace

View File

@ -1,49 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "VideoCommon/VertexManagerBase.h"
namespace DX12
{
class D3DStreamBuffer;
class VertexManager final : public VertexManagerBase
{
public:
VertexManager();
~VertexManager();
std::unique_ptr<NativeVertexFormat>
CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override;
void CreateDeviceObjects() override;
void DestroyDeviceObjects() override;
void SetIndexBuffer();
protected:
void ResetBuffer(u32 stride) override;
private:
void PrepareDrawBuffers(u32 stride);
void Draw(u32 stride);
void vFlush() override;
u32 m_vertex_draw_offset;
u32 m_index_draw_offset;
std::unique_ptr<D3DStreamBuffer> m_vertex_stream_buffer;
std::unique_ptr<D3DStreamBuffer> m_index_stream_buffer;
bool m_vertex_stream_buffer_reallocated = false;
bool m_index_stream_buffer_reallocated = false;
std::vector<u8> m_index_cpu_buffer;
std::vector<u8> m_vertex_cpu_buffer;
};
} // namespace

View File

@ -1,30 +0,0 @@
// Copyright 2011 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <string>
#include "VideoCommon/VideoBackendBase.h"
namespace DX12
{
class VideoBackend : public VideoBackendBase
{
bool Initialize(void*) override;
void Shutdown() override;
std::string GetName() const override;
std::string GetDisplayName() const override;
void Video_Prepare() override;
void Video_Cleanup() override;
void InitBackendInfo() override;
unsigned int PeekMessages() override;
private:
void* m_window_handle;
};
}

View File

@ -1,200 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/D3D12/XFBEncoder.h"
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DState.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/FramebufferManager.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
namespace DX12
{
// YUYV data is packed into half-width RGBA, with Y values in (R,B) and UV in (G,A)
constexpr size_t XFB_TEXTURE_WIDTH = MAX_XFB_WIDTH / 2;
constexpr size_t XFB_TEXTURE_HEIGHT = MAX_XFB_HEIGHT;
// Buffer enough space for 2 XFB buffers (our frame latency)
constexpr size_t XFB_UPLOAD_BUFFER_SIZE =
Common::AlignUp(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) *
XFB_TEXTURE_HEIGHT * 2;
constexpr size_t XFB_ENCODER_PARAMS_BUFFER_SIZE = 64 * 1024;
std::unique_ptr<XFBEncoder> g_xfb_encoder;
XFBEncoder::XFBEncoder()
{
ID3D12Resource* texture;
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8G8B8A8_UNORM, XFB_TEXTURE_WIDTH,
XFB_TEXTURE_HEIGHT, 1, 1, 1, 0,
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET),
D3D12_RESOURCE_STATE_RENDER_TARGET, nullptr, IID_PPV_ARGS(&texture)));
m_yuyv_texture =
new D3DTexture2D(texture, TEXTURE_BIND_FLAG_SHADER_RESOURCE | TEXTURE_BIND_FLAG_RENDER_TARGET,
DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM);
SAFE_RELEASE(texture);
CheckHR(D3D::device12->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(
Common::AlignUp(XFB_TEXTURE_WIDTH * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) *
MAX_XFB_HEIGHT),
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readback_buffer)));
m_upload_buffer =
std::make_unique<D3DStreamBuffer>(XFB_UPLOAD_BUFFER_SIZE, XFB_UPLOAD_BUFFER_SIZE, nullptr);
m_encode_params_buffer = std::make_unique<D3DStreamBuffer>(
XFB_ENCODER_PARAMS_BUFFER_SIZE, XFB_ENCODER_PARAMS_BUFFER_SIZE, nullptr);
}
XFBEncoder::~XFBEncoder()
{
SAFE_RELEASE(m_yuyv_texture);
SAFE_RELEASE(m_readback_buffer);
}
void XFBEncoder::EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height,
D3DTexture2D* src_texture, const TargetRectangle& src_rect,
u32 src_width, u32 src_height, float gamma)
{
// src_rect is in native coordinates
// dst_pitch is in words
u32 dst_width = dst_pitch / 2;
u32 dst_texture_width = dst_width / 2;
_assert_msg_(VIDEO, dst_width <= MAX_XFB_WIDTH && dst_height <= MAX_XFB_HEIGHT,
"XFB destination does not exceed maximum size");
// Encode parameters constant buffer used by shader
struct EncodeParameters
{
float srcRect[4];
float texelSize[2];
float pad[2];
};
EncodeParameters parameters = {
{static_cast<float>(src_rect.left) / static_cast<float>(src_width),
static_cast<float>(src_rect.top) / static_cast<float>(src_height),
static_cast<float>(src_rect.right) / static_cast<float>(src_width),
static_cast<float>(src_rect.bottom) / static_cast<float>(src_height)},
{1.0f / EFB_WIDTH, 1.0f / EFB_HEIGHT},
{0.0f, 0.0f}};
m_encode_params_buffer->AllocateSpaceInBuffer(sizeof(parameters),
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
memcpy(m_encode_params_buffer->GetCPUAddressOfCurrentAllocation(), &parameters,
sizeof(parameters));
// Convert RGBA texture to YUYV intermediate texture.
// Performs downscaling through a linear filter. Probably not ideal, but it's not going to look
// perfect anyway.
CD3DX12_RECT src_texture_rect(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom);
D3D12_RESOURCE_STATES src_texture_state = src_texture->GetResourceUsageState();
m_yuyv_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &m_yuyv_texture->GetRTV12(), FALSE, nullptr);
D3D::current_command_list->SetGraphicsRootConstantBufferView(
DESCRIPTOR_TABLE_PS_CBVONE, m_encode_params_buffer->GetGPUAddressOfCurrentAllocation());
D3D::command_list_mgr->SetCommandListDirtyState(COMMAND_LIST_STATE_PS_CBV, true);
D3D::SetViewportAndScissor(0, 0, dst_texture_width, dst_height);
D3D::SetLinearCopySampler();
D3D::DrawShadedTexQuad(src_texture, &src_texture_rect, src_rect.GetWidth(), src_rect.GetHeight(),
StaticShaderCache::GetXFBEncodePixelShader(),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), {}, gamma, 0,
DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
src_texture->TransitionToResourceState(D3D::current_command_list, src_texture_state);
// Copy from YUYV intermediate texture to readback buffer. It's likely the pitch here is going to
// be different to dst_pitch.
u32 readback_pitch = static_cast<u32>(
Common::AlignUp(dst_width * sizeof(u16), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {
0, {DXGI_FORMAT_R8G8B8A8_UNORM, dst_texture_width, dst_height, 1, readback_pitch}};
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_readback_buffer, dst_footprint);
CD3DX12_TEXTURE_COPY_LOCATION src_location(m_yuyv_texture->GetTex12(), 0);
CD3DX12_BOX src_box(0, 0, dst_texture_width, dst_height);
m_yuyv_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_SOURCE);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
// Wait until the GPU completes the copy. Resets back to known state automatically.
D3D::command_list_mgr->ExecuteQueuedWork(true);
// Copy from the readback buffer to dst.
// Can't be done as one memcpy due to pitch difference.
void* readback_texture_map;
D3D12_RANGE read_range = {0, readback_pitch * dst_height};
CheckHR(m_readback_buffer->Map(0, &read_range, &readback_texture_map));
for (u32 row = 0; row < dst_height; row++)
{
const u8* row_src = reinterpret_cast<u8*>(readback_texture_map) + readback_pitch * row;
u8* row_dst = dst + dst_pitch * row;
memcpy(row_dst, row_src, std::min(dst_pitch, readback_pitch));
}
D3D12_RANGE write_range = {};
m_readback_buffer->Unmap(0, &write_range);
}
void XFBEncoder::DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width,
u32 src_height)
{
_assert_msg_(VIDEO, src_width <= MAX_XFB_WIDTH && src_height <= MAX_XFB_HEIGHT,
"XFB source does not exceed maximum size");
// Copy to XFB upload buffer. Each row has to be done separately due to pitch differences.
u32 buffer_pitch = static_cast<u32>(
Common::AlignUp(src_width / 2 * sizeof(u32), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
m_upload_buffer->AllocateSpaceInBuffer(buffer_pitch * src_height,
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT);
for (u32 row = 0; row < src_height; row++)
{
const u8* row_src = src + (src_width * 2) * row;
u8* row_dst = reinterpret_cast<u8*>(m_upload_buffer->GetCPUAddressOfCurrentAllocation()) +
buffer_pitch * row;
memcpy(row_dst, row_src, src_width * 2);
}
// Copy from upload buffer to intermediate YUYV texture.
D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = {
m_upload_buffer->GetOffsetOfCurrentAllocation(),
{DXGI_FORMAT_R8G8B8A8_UNORM, src_width / 2, src_height, 1, buffer_pitch}};
CD3DX12_TEXTURE_COPY_LOCATION src_location(m_upload_buffer->GetBuffer(), src_footprint);
CD3DX12_TEXTURE_COPY_LOCATION dst_location(m_yuyv_texture->GetTex12(), 0);
CD3DX12_BOX src_box(0, 0, src_width / 2, src_height);
m_yuyv_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_COPY_DEST);
D3D::current_command_list->CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
// Convert YUYV texture to RGBA texture with pixel shader.
CD3DX12_RECT src_texture_rect(0, 0, src_width / 2, src_height);
dst_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_RENDER_TARGET);
D3D::current_command_list->OMSetRenderTargets(1, &dst_texture->GetRTV12(), FALSE, nullptr);
D3D::SetViewportAndScissor(0, 0, src_width, src_height);
D3D::DrawShadedTexQuad(m_yuyv_texture, &src_texture_rect, XFB_TEXTURE_WIDTH, XFB_TEXTURE_HEIGHT,
StaticShaderCache::GetXFBDecodePixelShader(),
StaticShaderCache::GetSimpleVertexShader(),
StaticShaderCache::GetSimpleVertexShaderInputLayout(), {}, 1.0f, 0,
DXGI_FORMAT_R8G8B8A8_UNORM, false, false);
// XFB source textures are expected to be in shader resource state.
dst_texture->TransitionToResourceState(D3D::current_command_list,
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
}

View File

@ -1,40 +0,0 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <d3d12.h>
#include <memory>
#include "VideoBackends/D3D12/D3DStreamBuffer.h"
#include "VideoBackends/D3D12/D3DTexture.h"
#include "VideoCommon/VideoCommon.h"
namespace DX12
{
class D3DTexture2D;
class XFBEncoder
{
public:
XFBEncoder();
~XFBEncoder();
void EncodeTextureToRam(u8* dst, u32 dst_pitch, u32 dst_height, D3DTexture2D* src_texture,
const TargetRectangle& src_rect, u32 src_width, u32 src_height,
float gamma);
void DecodeToTexture(D3DTexture2D* dst_texture, const u8* src, u32 src_width, u32 src_height);
private:
D3DTexture2D* m_yuyv_texture;
ID3D12Resource* m_readback_buffer;
std::unique_ptr<D3DStreamBuffer> m_upload_buffer;
std::unique_ptr<D3DStreamBuffer> m_encode_params_buffer;
};
extern std::unique_ptr<XFBEncoder> g_xfb_encoder;
}

View File

@ -1,210 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <string>
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "VideoBackends/D3D12/BoundingBox.h"
#include "VideoBackends/D3D12/D3DBase.h"
#include "VideoBackends/D3D12/D3DCommandListManager.h"
#include "VideoBackends/D3D12/D3DUtil.h"
#include "VideoBackends/D3D12/PerfQuery.h"
#include "VideoBackends/D3D12/Render.h"
#include "VideoBackends/D3D12/ShaderCache.h"
#include "VideoBackends/D3D12/ShaderConstantsManager.h"
#include "VideoBackends/D3D12/StaticShaderCache.h"
#include "VideoBackends/D3D12/TextureCache.h"
#include "VideoBackends/D3D12/VertexManager.h"
#include "VideoBackends/D3D12/VideoBackend.h"
#include "VideoBackends/D3D12/XFBEncoder.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
namespace DX12
{
unsigned int VideoBackend::PeekMessages()
{
MSG msg;
while (PeekMessage(&msg, 0, 0, 0, PM_REMOVE))
{
if (msg.message == WM_QUIT)
return FALSE;
TranslateMessage(&msg);
DispatchMessage(&msg);
}
return TRUE;
}
std::string VideoBackend::GetName() const
{
return "D3D12";
}
std::string VideoBackend::GetDisplayName() const
{
return "Direct3D 12 (experimental)";
}
void VideoBackend::InitBackendInfo()
{
HRESULT hr = D3D::LoadDXGI();
if (FAILED(hr))
return;
hr = D3D::LoadD3D();
if (FAILED(hr))
{
D3D::UnloadDXGI();
return;
}
g_Config.backend_info.api_type = APIType::D3D;
g_Config.backend_info.MaxTextureSize = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
g_Config.backend_info.bSupportsExclusiveFullscreen = false;
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true;
g_Config.backend_info.bSupportsClipControl = true;
g_Config.backend_info.bSupportsDepthClamp = true;
g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
g_Config.backend_info.bSupportsST3CTextures = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;
hr = create_dxgi_factory(__uuidof(IDXGIFactory), (void**)&factory);
if (FAILED(hr))
{
PanicAlert("Failed to create IDXGIFactory object");
D3D::UnloadD3D();
D3D::UnloadDXGI();
return;
}
// adapters
g_Config.backend_info.Adapters.clear();
g_Config.backend_info.AAModes.clear();
while (factory->EnumAdapters((UINT)g_Config.backend_info.Adapters.size(), &ad) !=
DXGI_ERROR_NOT_FOUND)
{
const size_t adapter_index = g_Config.backend_info.Adapters.size();
DXGI_ADAPTER_DESC desc;
ad->GetDesc(&desc);
// TODO: These don't get updated on adapter change, yet
if (adapter_index == g_Config.iAdapter)
{
ID3D12Device* temp_device;
hr = d3d12_create_device(ad, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&temp_device));
if (SUCCEEDED(hr))
{
std::string samples;
std::vector<DXGI_SAMPLE_DESC> modes = D3D::EnumAAModes(temp_device);
// First iteration will be 1. This equals no AA.
for (unsigned int i = 0; i < modes.size(); ++i)
{
g_Config.backend_info.AAModes.push_back(modes[i].Count);
}
// Requires the earlydepthstencil attribute (only available in shader model 5)
g_Config.backend_info.bSupportsEarlyZ = true;
// Requires full UAV functionality (only available in shader model 5)
g_Config.backend_info.bSupportsBBox =
g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true;
// Requires the instance attribute (only available in shader model 5)
g_Config.backend_info.bSupportsGSInstancing = true;
// Sample shading requires shader model 5
g_Config.backend_info.bSupportsSSAA = true;
temp_device->Release();
}
}
g_Config.backend_info.Adapters.push_back(UTF16ToUTF8(desc.Description));
ad->Release();
}
factory->Release();
D3D::UnloadD3D();
D3D::UnloadDXGI();
}
bool VideoBackend::Initialize(void* window_handle)
{
if (window_handle == nullptr)
return false;
InitBackendInfo();
InitializeShared();
if (FAILED(D3D::Create((HWND)window_handle)))
return false;
m_window_handle = window_handle;
return true;
}
void VideoBackend::Video_Prepare()
{
// internal interfaces
g_renderer = std::make_unique<Renderer>();
g_texture_cache = std::make_unique<TextureCache>();
g_vertex_manager = std::make_unique<VertexManager>();
g_perf_query = std::make_unique<PerfQuery>();
g_xfb_encoder = std::make_unique<XFBEncoder>();
ShaderCache::Init();
ShaderConstantsManager::Init();
StaticShaderCache::Init();
StateCache::Init(); // PSO cache is populated here, after constituent shaders are loaded.
D3D::InitUtils();
BBox::Init();
}
void VideoBackend::Shutdown()
{
// TODO: should be in Video_Cleanup
// Immediately stop app from submitting work to GPU, and wait for all submitted work to complete.
// D3D12TODO: Check this.
D3D::command_list_mgr->ExecuteQueuedWork(true);
// internal interfaces
D3D::ShutdownUtils();
ShaderCache::Shutdown();
ShaderConstantsManager::Shutdown();
StaticShaderCache::Shutdown();
BBox::Shutdown();
g_xfb_encoder.reset();
g_perf_query.reset();
g_vertex_manager.reset();
g_texture_cache.reset();
g_renderer.reset();
D3D::Close();
ShutdownShared();
}
void VideoBackend::Video_Cleanup()
{
CleanupShared();
}
}

View File

@ -44,7 +44,8 @@ static VertexLoaderMap s_vertex_loader_map;
u8* cached_arraybases[12];
// Used in D3D12 backend, to populate input layouts used by cached-to-disk PSOs.
// Used in the Vulkan backend
NativeVertexFormatMap* GetNativeVertexFormatMap()
{
return &s_native_vertex_map;

View File

@ -10,7 +10,6 @@
// TODO: ugly
#ifdef _WIN32
#include "VideoBackends/D3D/VideoBackend.h"
#include "VideoBackends/D3D12/VideoBackend.h"
#endif
#include "VideoBackends/Null/VideoBackend.h"
#include "VideoBackends/OGL/VideoBackend.h"
@ -38,18 +37,10 @@ __declspec(dllexport) DWORD NvOptimusEnablement = 1;
void VideoBackendBase::PopulateList()
{
// OGL > D3D11 > D3D12 > Vulkan > SW > Null
// OGL > D3D11 > Vulkan > SW > Null
g_available_video_backends.push_back(std::make_unique<OGL::VideoBackend>());
#ifdef _WIN32
g_available_video_backends.push_back(std::make_unique<DX11::VideoBackend>());
// More robust way to check for D3D12 support than (unreliable) OS version checks.
HMODULE d3d12_module = LoadLibraryA("d3d12.dll");
if (d3d12_module != nullptr)
{
FreeLibrary(d3d12_module);
g_available_video_backends.push_back(std::make_unique<DX12::VideoBackend>());
}
#endif
#ifndef __APPLE__
g_available_video_backends.push_back(std::make_unique<Vulkan::VideoBackend>());

View File

@ -84,9 +84,6 @@
<ProjectReference Include="$(CoreDir)VideoBackends\Vulkan\Vulkan.vcxproj">
<Project>{29F29A19-F141-45AD-9679-5A2923B49DA3}</Project>
</ProjectReference>
<ProjectReference Include="$(CoreDir)VideoBackends\D3D12\D3D12.vcxproj">
<Project>{570215b7-e32f-4438-95ae-c8d955f9fca3}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View File

@ -74,8 +74,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "enet", "..\Externals\enet\e
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "curl", "..\Externals\curl\curl.vcxproj", "{BB00605C-125F-4A21-B33B-7BF418322DCB}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "D3D12", "Core\VideoBackends\D3D12\D3D12.vcxproj", "{570215B7-E32F-4438-95AE-C8D955F9FCA3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DolphinQt2", "Core\DolphinQt2\DolphinQt2.vcxproj", "{FA3FA62B-6F58-4B86-9453-4D149940A066}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "glslang", "..\Externals\glslang\glslang.vcxproj", "{D178061B-84D3-44F9-BEED-EFD18D9033F0}"
@ -220,10 +218,6 @@ Global
{BB00605C-125F-4A21-B33B-7BF418322DCB}.Debug|x64.Build.0 = Debug|x64
{BB00605C-125F-4A21-B33B-7BF418322DCB}.Release|x64.ActiveCfg = Release|x64
{BB00605C-125F-4A21-B33B-7BF418322DCB}.Release|x64.Build.0 = Release|x64
{570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.ActiveCfg = Debug|x64
{570215B7-E32F-4438-95AE-C8D955F9FCA3}.Debug|x64.Build.0 = Debug|x64
{570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.ActiveCfg = Release|x64
{570215B7-E32F-4438-95AE-C8D955F9FCA3}.Release|x64.Build.0 = Release|x64
{FA3FA62B-6F58-4B86-9453-4D149940A066}.Debug|x64.ActiveCfg = Debug|x64
{FA3FA62B-6F58-4B86-9453-4D149940A066}.Debug|x64.Build.0 = Debug|x64
{FA3FA62B-6F58-4B86-9453-4D149940A066}.Release|x64.ActiveCfg = Release|x64
@ -270,7 +264,6 @@ Global
{76563A7F-1011-4EAD-B667-7BB18D09568E} = {15670B2E-CED6-4ED5-94CE-A00B1B2B5BA6}
{CBC76802-C128-4B17-BF6C-23B08C313E5E} = {87ADDFF9-5768-4DA2-A33B-2477593D6677}
{BB00605C-125F-4A21-B33B-7BF418322DCB} = {87ADDFF9-5768-4DA2-A33B-2477593D6677}
{570215B7-E32F-4438-95AE-C8D955F9FCA3} = {AAD1BCD6-9804-44A5-A5FC-4782EA00E9D4}
{D178061B-84D3-44F9-BEED-EFD18D9033F0} = {87ADDFF9-5768-4DA2-A33B-2477593D6677}
{C636D9D1-82FE-42B5-9987-63B7D4836341} = {87ADDFF9-5768-4DA2-A33B-2477593D6677}
EndGlobalSection