From 0be0eb2b381948df605863ca16adfd7d771e2ee0 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 17:47:32 +0300 Subject: [PATCH 1/6] [GPU/D3D12] Letterboxing cropping to action-safe area --- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 35 ++++-- src/xenia/gpu/draw_util.cc | 112 +++++++++++++++++++ src/xenia/gpu/draw_util.h | 8 ++ src/xenia/ui/d3d12/d3d12_context.cc | 6 +- src/xenia/ui/window_win.cc | 27 +---- 5 files changed, 154 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 30b322f87..835c94c07 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -10,7 +10,9 @@ #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/gpu/draw_util.h" #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/xbox.h" @@ -265,22 +267,39 @@ void D3D12GraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } + uint32_t window_width, window_height; + display_context_->GetSwapChainSize(window_width, window_height); + + int32_t target_x, target_y; + uint32_t target_width, target_height; + draw_util::GetPresentArea(swap_state.width, swap_state.height, window_width, + window_height, target_x, target_y, target_width, + target_height); + // For safety. + target_x = clamp(target_x, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_y = clamp(target_y, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_width = std::min( + target_width, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_x)); + target_height = std::min( + target_height, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_y)); + auto command_list = display_context_->GetSwapCommandList(); - uint32_t swap_width, swap_height; - display_context_->GetSwapChainSize(swap_width, swap_height); + // Assuming the window has already been cleared to the needed letterbox color. D3D12_VIEWPORT viewport; - viewport.TopLeftX = 0.0f; - viewport.TopLeftY = 0.0f; - viewport.Width = float(swap_width); - viewport.Height = float(swap_height); + viewport.TopLeftX = float(target_x); + viewport.TopLeftY = float(target_y); + viewport.Width = float(target_width); + viewport.Height = float(target_height); viewport.MinDepth = 0.0f; viewport.MaxDepth = 0.0f; command_list->RSSetViewports(1, &viewport); D3D12_RECT scissor; scissor.left = 0; scissor.top = 0; - scissor.right = swap_width; - scissor.bottom = swap_height; + scissor.right = window_width; + scissor.bottom = window_height; command_list->RSSetScissorRects(1, &scissor); command_list->SetDescriptorHeaps(1, &swap_srv_heap); StretchTextureToFrontBuffer( diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 6c9ba1e73..d28df6d0e 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/draw_util.h" +#include #include #include @@ -31,6 +32,36 @@ DEFINE_bool( "for certain games like GTA IV to work).", "GPU"); +DEFINE_bool( + present_stretch, true, + "Whether to rescale the image, instead of maintaining the original pixel " + "size, when presenting to the window. When this is disabled, other " + "positioning options are ignored.", + "GPU"); +DEFINE_bool( + present_letterbox, true, + "Maintain aspect ratio when stretching by displaying bars around the image " + "when there's no more overscan area to crop out.", + "GPU"); +// https://github.com/MonoGame/MonoGame/issues/4697#issuecomment-217779403 +// Using the value from DirectXTK (5% cropped out from each side, thus 90%), +// which is not exactly the Xbox One title-safe area, but close, and within the +// action-safe area: +// https://github.com/microsoft/DirectXTK/blob/1e80a465c6960b457ef9ab6716672c1443a45024/Src/SimpleMath.cpp#L144 +// XNA TitleSafeArea is 80%, but it's very conservative, designed for CRT, and +// is the title-safe area rather than the action-safe area. +// 90% is also exactly the fraction of 16:9 height in 16:10. +DEFINE_int32( + present_safe_area_x, 90, + "Percentage of the image width that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); +DEFINE_int32( + present_safe_area_y, 90, + "Percentage of the image height that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); + namespace xe { namespace gpu { namespace draw_util { @@ -589,6 +620,87 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader( return shader; } +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out) { + if (!cvars::present_stretch) { + target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; + target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; + target_width_out = source_width; + target_height_out = source_height; + return; + } + // Prevent division by zero. + if (!source_width || !source_height) { + target_x_out = 0; + target_y_out = 0; + target_width_out = 0; + target_height_out = 0; + return; + } + if (uint64_t(window_width) * source_height > + uint64_t(source_width) * window_height) { + // The window is wider that the source - crop along Y, then letterbox or + // stretch along X. + uint32_t present_safe_area; + if (cvars::present_safe_area_y > 0 && cvars::present_safe_area_y < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_y); + } else { + present_safe_area = 100; + } + uint32_t target_height = + uint32_t(uint64_t(window_width) * source_height / source_width); + bool letterbox = false; + if (target_height * present_safe_area > window_height * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_height = window_height * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_width = + uint32_t(uint64_t(source_width) * window_height * 100 / + (source_height * present_safe_area)); + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } else { + target_x_out = 0; + target_width_out = window_width; + } + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + // The window is taller than the source - crop along X, then letterbox or + // stretch along Y. + uint32_t present_safe_area; + if (cvars::present_safe_area_x > 0 && cvars::present_safe_area_x < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_x); + } else { + present_safe_area = 100; + } + uint32_t target_width = + uint32_t(uint64_t(window_height) * source_width / source_height); + bool letterbox = false; + if (target_width * present_safe_area > window_width * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_width = window_width * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_height = + uint32_t(uint64_t(source_height) * window_width * 100 / + (source_width * present_safe_area)); + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + target_y_out = 0; + target_height_out = window_height; + } + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } +} + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 76827c093..edb880ab0 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -272,6 +272,14 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, TraceWriter& trace_writer, uint32_t resolution_scale, bool edram_16_as_minus_1_to_1, ResolveInfo& info_out); +// Taking user configuration - stretching or letterboxing, overscan region to +// crop to fill while maintaining the aspect ratio - into account, returns the +// area where the frame should be presented in the host window. +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out); + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index 506ca6141..f897a5516 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -300,9 +300,9 @@ void D3D12Context::BeginSwap() { clear_color[1] = 1.0f; clear_color[2] = 0.0f; } else { - clear_color[0] = 238.0f / 255.0f; - clear_color[1] = 238.0f / 255.0f; - clear_color[2] = 238.0f / 255.0f; + clear_color[0] = 0.0f; + clear_color[1] = 0.0f; + clear_color[2] = 0.0f; } clear_color[3] = 1.0f; swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, diff --git a/src/xenia/ui/window_win.cc b/src/xenia/ui/window_win.cc index 2e60d2e42..c86a5cec8 100644 --- a/src/xenia/ui/window_win.cc +++ b/src/xenia/ui/window_win.cc @@ -253,20 +253,6 @@ bool Win32Window::ReleaseMouse() { bool Win32Window::is_fullscreen() const { return fullscreen_; } -// https://blogs.msdn.microsoft.com/oldnewthing/20131017-00/?p=2903 -BOOL UnadjustWindowRect(LPRECT prc, DWORD dwStyle, BOOL fMenu) { - RECT rc; - SetRectEmpty(&rc); - BOOL fRc = AdjustWindowRect(&rc, dwStyle, fMenu); - if (fRc) { - prc->left -= rc.left; - prc->top -= rc.top; - prc->right -= rc.right; - prc->bottom -= rc.bottom; - } - return fRc; -} - void Win32Window::ToggleFullscreen(bool fullscreen) { if (fullscreen == is_fullscreen()) { return; @@ -288,9 +274,6 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { AdjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), false); MoveWindow(hwnd_, rc.left, rc.top, rc.right - rc.left, rc.bottom - rc.top, TRUE); - - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } } else { // Reinstate borders, resize to 1280x720 @@ -301,15 +284,13 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { if (main_menu) { ::SetMenu(hwnd_, main_menu->handle()); } - - auto& rc = windowed_pos_.rcNormalPosition; - bool has_menu = main_menu_ ? true : false; - UnadjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), has_menu); - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } fullscreen_ = fullscreen; + + // width_ and height_ will be updated by the WM_SIZE handler - + // windowed_pos_.rcNormalPosition is also not the correct source for them when + // switching from fullscreen to maximized. } bool Win32Window::is_bordered() const { From 74804ae7101c19ab29f0c1fe5f9d62d0b143503c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:23:45 +0300 Subject: [PATCH 2/6] [DXBC] ROV: Fix depth being overwritten if stencil failed --- src/xenia/gpu/dxbc_shader_translator_om.cc | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index bb83200ee..24963008f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -733,12 +733,6 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), DxbcSrc::LU(~uint32_t(1 << i))); - // temp.x? = resulting sample depth after the depth test - // temp.y = polygon offset if not writing to oDepth - // temp.z = viewport maximum depth if not writing to oDepth - // temp.w = old depth/stencil - // sample_temp.x = free - DxbcOpMov(sample_depth_stencil_dest, sample_temp_x_src); } DxbcOpEndIf(); // Create packed depth/stencil, with the stencil value unchanged at this @@ -977,6 +971,25 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Close the stencil test check. DxbcOpEndIf(); + // Check if the depth/stencil has failed not to modify the depth if it has. + // sample_temp.x = whether depth/stencil has passed for this sample + DxbcOpAnd(sample_temp_x_dest, + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << i)); + // If the depth/stencil test has failed, don't change the depth. + // sample_temp.x = free + DxbcOpIf(false, sample_temp_x_src); + { + // Copy the new stencil over the old depth. + // temp.x? = resulting sample depth/stencil + // temp.y = polygon offset if not writing to oDepth + // temp.z = viewport maximum depth if not writing to oDepth + // temp.w = old depth/stencil + DxbcOpBFI(sample_depth_stencil_dest, DxbcSrc::LU(8), DxbcSrc::LU(0), + sample_depth_stencil_src, temp_w_src); + } + // Close the depth/stencil passing check. + DxbcOpEndIf(); // Check if the new depth/stencil is different, and thus needs to be // written, to temp.w. // temp.x? = resulting sample depth/stencil From b3f84f4a9c5d8cf96dcda8ae3f6e89681d8ff0d1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:34:36 +0300 Subject: [PATCH 3/6] [GPU] Rename present_stretch to present_rescale --- src/xenia/gpu/draw_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index d28df6d0e..6aaa1b856 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -33,7 +33,7 @@ DEFINE_bool( "GPU"); DEFINE_bool( - present_stretch, true, + present_rescale, true, "Whether to rescale the image, instead of maintaining the original pixel " "size, when presenting to the window. When this is disabled, other " "positioning options are ignored.", @@ -624,7 +624,7 @@ void GetPresentArea(uint32_t source_width, uint32_t source_height, uint32_t window_width, uint32_t window_height, int32_t& target_x_out, int32_t& target_y_out, uint32_t& target_width_out, uint32_t& target_height_out) { - if (!cvars::present_stretch) { + if (!cvars::present_rescale) { target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; target_width_out = source_width; From 38e0cc4941fc69b48555695eae2adaf4a86b726a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:52:30 +0300 Subject: [PATCH 4/6] [D3D12] Non-zeroed heaps on 2004 --- docs/building.md | 2 +- premake5.lua | 6 ++-- .../gpu/d3d12/d3d12_command_processor.cc | 28 +++++++++------- src/xenia/gpu/d3d12/primitive_converter.cc | 12 ++++--- src/xenia/gpu/d3d12/render_target_cache.cc | 33 +++++++++++-------- src/xenia/gpu/d3d12/shared_memory.cc | 19 ++++++----- src/xenia/gpu/d3d12/texture_cache.cc | 12 ++++--- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 22 +++++++------ src/xenia/ui/d3d12/d3d12_provider.cc | 26 +++++++++++---- src/xenia/ui/d3d12/d3d12_provider.h | 4 +++ src/xenia/ui/d3d12/pools.cc | 11 ++++--- src/xenia/ui/d3d12/pools.h | 6 ++-- 12 files changed, 110 insertions(+), 71 deletions(-) diff --git a/docs/building.md b/docs/building.md index d8334793f..6aafc521e 100644 --- a/docs/building.md +++ b/docs/building.md @@ -12,7 +12,7 @@ drivers. * [Visual Studio 2019 or Visual Studio 2017](https://www.visualstudio.com/downloads/) * [Python 3.6+](https://www.python.org/downloads/) * Ensure Python is in PATH. -* Windows 10 SDK +* Windows 10 SDK version 10.0.19041.0 (for Visual Studio 2019, this or any newer version) ``` git clone https://github.com/xenia-project/xenia.git diff --git a/premake5.lua b/premake5.lua index 4af8c50bd..22f1fa7f1 100644 --- a/premake5.lua +++ b/premake5.lua @@ -202,10 +202,10 @@ solution("xenia") platforms({"Linux"}) elseif os.istarget("windows") then platforms({"Windows"}) - -- Minimum version to support ID3D12GraphicsCommandList1 (for - -- SetSamplePositions). + -- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions. + -- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED. filter("action:vs2017") - systemversion("10.0.15063.0") + systemversion("10.0.19041.0") filter("action:vs2019") systemversion("10.0") filter({}) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e9831347d..23163a609 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -646,14 +646,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( size = xe::align(size, kScratchBufferSizeIncrement); - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, state, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20); return nullptr; } @@ -889,7 +891,7 @@ bool D3D12CommandProcessor::SetupContext() { // Initialize resource binding. constant_buffer_pool_ = - std::make_unique(device, 1024 * 1024); + std::make_unique(provider, 1024 * 1024); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -1181,6 +1183,9 @@ bool D3D12CommandProcessor::SetupContext() { return false; } + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); + // Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are // hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too. // https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf @@ -1202,7 +1207,7 @@ bool D3D12CommandProcessor::SetupContext() { // The first action will be uploading. gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &gamma_ramp_desc, gamma_ramp_texture_state_, nullptr, IID_PPV_ARGS(&gamma_ramp_texture_)))) { XELOGE("Failed to create the gamma ramp texture"); @@ -1218,7 +1223,7 @@ bool D3D12CommandProcessor::SetupContext() { ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gamma_ramp_upload_)))) { XELOGE("Failed to create the gamma ramp upload buffer"); @@ -1246,7 +1251,7 @@ bool D3D12CommandProcessor::SetupContext() { swap_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; // Can be sampled at any time, switch to render target when needed, then back. if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &swap_texture_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&swap_texture_)))) { XELOGE("Failed to create the command processor front buffer"); @@ -4286,15 +4291,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { } size = xe::align(size, kReadbackBufferSizeIncrement); if (size > readback_buffer_size_) { - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB readback buffer", size >> 20); return nullptr; } diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 4884865d0..ab2138b47 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -47,14 +47,16 @@ PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor& command_processor, PrimitiveConverter::~PrimitiveConverter() { Shutdown(); } bool PrimitiveConverter::Initialize() { - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); // There can be at most 65535 indices in a Xenos draw call, but they can be up // to 4 bytes large, and conversion can add more indices (almost triple the // count for triangle strips, for instance). buffer_pool_ = - std::make_unique(device, 4 * 1024 * 1024); + std::make_unique(provider, 4 * 1024 * 1024); // Create the static index buffer for non-indexed drawing. D3D12_RESOURCE_DESC static_ib_desc; @@ -62,7 +64,7 @@ bool PrimitiveConverter::Initialize() { static_ib_desc, kStaticIBTotalCount * sizeof(uint16_t), D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&static_ib_upload_)))) { XELOGE( @@ -108,7 +110,7 @@ bool PrimitiveConverter::Initialize() { // Not uploaded yet. static_ib_upload_submission_ = UINT64_MAX; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&static_ib_)))) { XELOGE("Failed to create the primitive conversion static index buffer"); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index bd25b738a..cfc7f6f47 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -137,8 +137,6 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { uint32_t edram_buffer_size = GetEdramBufferSize(); // Create the buffer for reinterpreting EDRAM contents. - // No need to clear it in the first frame, memory is zeroed out when allocated - // on Windows. D3D12_RESOURCE_DESC edram_buffer_desc; ui::d3d12::util::FillBufferResourceDesc( edram_buffer_desc, edram_buffer_size, @@ -147,8 +145,15 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { edram_buffer_state_ = edram_rov_used_ ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + // Request zeroed (though no guarantee) when not using ROV so the host 32-bit + // depth buffer will be initialized to deterministic values (because it's + // involved in comparison with converted 24-bit values - whether the 32-bit + // value is up to date is determined by whether it's equal to the 24-bit + // value in the main EDRAM buffer when converted to 24-bit). if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, + edram_rov_used_ ? provider.GetHeapFlagCreateNotZeroed() + : D3D12_HEAP_FLAG_NONE, &edram_buffer_desc, edram_buffer_state_, nullptr, IID_PPV_ARGS(&edram_buffer_)))) { XELOGE("Failed to create the EDRAM buffer"); @@ -1451,10 +1456,11 @@ bool RenderTargetCache::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc, xenos::kEdramSizeBytes, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &edram_snapshot_download_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) { @@ -1493,10 +1499,9 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { return; } auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); - auto device = provider.GetDevice(); if (!edram_snapshot_restore_pool_) { edram_snapshot_restore_pool_ = - std::make_unique(device, + std::make_unique(provider, xenos::kEdramSizeBytes); } ID3D12Resource* upload_buffer; @@ -1603,14 +1608,15 @@ bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) { if (heaps_[heap_index] != nullptr) { return true; } - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeap4MBPages << 22; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; // TODO(Triang3l): If real MSAA is added, alignment must be 4 MB. heap_desc.Alignment = 0; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED( device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) { XELOGE("Failed to create a {} MB heap for render targets", @@ -1756,8 +1762,9 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( } #else if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &resource_desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { XELOGE( "Failed to create a committed resource for {}x{} {} render target with " "format {}", diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index f2d2e6296..c24336664 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -73,8 +73,9 @@ bool SharedMemory::Initialize() { "resources yet."); } if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, buffer_state_, nullptr, IID_PPV_ARGS(&buffer_)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, buffer_state_, + nullptr, IID_PPV_ARGS(&buffer_)))) { XELOGE("Shared memory: Failed to create the 512 MB buffer"); Shutdown(); return false; @@ -153,7 +154,7 @@ bool SharedMemory::Initialize() { system_page_flags_.resize((page_count_ + 63) / 64); upload_buffer_pool_ = std::make_unique( - device, + provider, xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_)); memory_invalidation_callback_handle_ = @@ -370,7 +371,8 @@ bool SharedMemory::EnsureTilesResident(uint32_t start, uint32_t length) { D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) { XELOGE("Shared memory: Failed to create a tile heap"); return false; @@ -890,11 +892,12 @@ bool SharedMemory::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc( gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &gpu_written_buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&trace_gpu_written_buffer_)))) { XELOGE( "Shared memory: Failed to create a {} KB GPU-written memory download " diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0f096fb49..821a0136f 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -1633,7 +1633,8 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled, D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kScaledResolveHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&scaled_resolve_heaps_[i])))) { XELOGE("Texture cache: Failed to create a scaled resolve tile heap"); @@ -1953,14 +1954,15 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { // Untiling through a buffer instead of using unordered access because copying // is not done that often. desc.Flags = D3D12_RESOURCE_FLAG_NONE; - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); // Assuming untiling will be the next operation. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST; ID3D12Resource* resource; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &desc, - state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { LogTextureKeyAction(key, "Failed to create"); return nullptr; } diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index f91c79677..3c0ea8a3a 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -33,7 +33,7 @@ class D3D12ImmediateTexture : public ImmediateTexture { ImmediateTextureFilter filter, bool repeat); ~D3D12ImmediateTexture() override; - bool Initialize(ID3D12Device* device); + bool Initialize(D3D12Provider& provider); void Shutdown(); ID3D12Resource* GetResource() const { return resource_; } @@ -59,7 +59,7 @@ D3D12ImmediateTexture::D3D12ImmediateTexture(uint32_t width, uint32_t height, D3D12ImmediateTexture::~D3D12ImmediateTexture() { Shutdown(); } -bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { +bool D3D12ImmediateTexture::Initialize(D3D12Provider& provider) { // The first operation will likely be copying the contents. state_ = D3D12_RESOURCE_STATE_COPY_DEST; @@ -75,9 +75,9 @@ bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { resource_desc.SampleDesc.Quality = 0; resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; - if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &resource_desc, - state_, nullptr, IID_PPV_ARGS(&resource_)))) { + if (FAILED(provider.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), + &resource_desc, state_, nullptr, IID_PPV_ARGS(&resource_)))) { XELOGE("Failed to create a {}x{} texture for immediate drawing", width, height); return false; @@ -288,7 +288,7 @@ bool D3D12ImmediateDrawer::Initialize() { // Create pools for draws. vertex_buffer_pool_ = - std::make_unique(device, 2 * 1024 * 1024); + std::make_unique(provider, 2 * 1024 * 1024); texture_descriptor_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048); texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; @@ -326,7 +326,7 @@ std::unique_ptr D3D12ImmediateDrawer::CreateTexture( const uint8_t* data) { auto texture = std::make_unique(width, height, filter, repeat); - texture->Initialize(context_.GetD3D12Provider().GetDevice()); + texture->Initialize(context_.GetD3D12Provider()); if (data != nullptr) { UpdateTexture(texture.get(), data); } @@ -343,7 +343,8 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, } uint32_t width = d3d_texture->width, height = d3d_texture->height; - auto device = context_.GetD3D12Provider().GetDevice(); + auto& provider = context_.GetD3D12Provider(); + auto device = provider.GetDevice(); // Create and fill the upload buffer. D3D12_RESOURCE_DESC texture_desc = texture_resource->GetDesc(); @@ -356,8 +357,9 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, &buffer_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buffer)))) { + &util::kHeapPropertiesUpload, provider.GetHeapFlagCreateNotZeroed(), + &buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE( "Failed to create an upload buffer for a {}x{} texture for " "immediate drawing", diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 77e4e70aa..0231a5bec 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -409,17 +409,29 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by + // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, + &options7, sizeof(options7)))) { + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } XELOGD3D( - "Direct3D 12 device features:\n" - "Max GPU virtual address bits per resource: {}\n" - "Programmable sample positions: tier {}\n" - "Rasterizer-ordered views: {}\n" - "Resource binding: tier {}\n" - "Tiled resources: tier {}\n", + "Direct3D 12 device and OS features:\n" + "* Max GPU virtual address bits per resource: {}\n" + "* Programmable sample positions: tier {}\n" + "* Rasterizer-ordered views: {}\n" + "* Resource binding: tier {}\n" + "* Tiled resources: tier {}\n" + "* Non-zeroed heap creation: {}\n", virtual_address_bits_per_resource_, uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + ? "yes" + : "no"); // Get the graphics analysis interface, will silently fail if PIX is not // attached. diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 122f16e2f..1c8694fd0 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -84,6 +84,9 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } + D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { + return heap_flag_create_not_zeroed_; + } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -164,6 +167,7 @@ class D3D12Provider : public GraphicsProvider { D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index deff80b12..7b892caa9 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -19,8 +19,8 @@ namespace xe { namespace ui { namespace d3d12 { -UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size) - : device_(device), page_size_(page_size) {} +UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size) + : provider_(provider), page_size_(page_size) {} UploadBufferPool::~UploadBufferPool() { ClearCache(); } @@ -101,9 +101,10 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, util::FillBufferResourceDesc(new_buffer_desc, page_size_, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* new_buffer; - if (FAILED(device_->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, - &new_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + if (FAILED(provider_.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesUpload, + provider_.GetHeapFlagCreateNotZeroed(), &new_buffer_desc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&new_buffer)))) { XELOGE("Failed to create a D3D upload buffer with {} bytes", page_size_); diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 4499bd08f..21606cc42 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -12,7 +12,7 @@ #include -#include "xenia/ui/d3d12/d3d12_api.h" +#include "xenia/ui/d3d12/d3d12_provider.h" namespace xe { namespace ui { @@ -23,7 +23,7 @@ namespace d3d12 { class UploadBufferPool { public: - UploadBufferPool(ID3D12Device* device, uint32_t page_size); + UploadBufferPool(D3D12Provider& provider, uint32_t page_size); ~UploadBufferPool(); void Reclaim(uint64_t completed_submission_index); @@ -41,7 +41,7 @@ class UploadBufferPool { D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); private: - ID3D12Device* device_; + D3D12Provider& provider_; uint32_t page_size_; struct Page { From acb1fc059fd950c92464af6dc1640f896cdf89c9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:57:00 +0300 Subject: [PATCH 5/6] [D3D12] Make the feature list in the log sorted again --- src/xenia/ui/d3d12/d3d12_provider.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 0231a5bec..cb5287e14 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -420,18 +420,17 @@ bool D3D12Provider::Initialize() { XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" + "* Non-zeroed heap creation: {}\n" "* Programmable sample positions: tier {}\n" "* Rasterizer-ordered views: {}\n" "* Resource binding: tier {}\n" - "* Tiled resources: tier {}\n" - "* Non-zeroed heap creation: {}\n", + "* Tiled resources: tier {}\n", virtual_address_bits_per_resource_, + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) ? "yes" + : "no", uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), - (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) - ? "yes" - : "no"); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); // Get the graphics analysis interface, will silently fail if PIX is not // attached. From dfbe36a8aa7e515a411f69278f2056a4feafc990 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 14 Sep 2020 23:27:19 +0300 Subject: [PATCH 6/6] [UI/D3D12] Small refactoring, allow BeginSwap to return false if no surface --- src/xenia/ui/d3d12/d3d12_context.cc | 215 ++++++++++++-------------- src/xenia/ui/d3d12/d3d12_context.h | 7 +- src/xenia/ui/d3d12/d3d12_provider.cc | 16 +- src/xenia/ui/d3d12/d3d12_provider.h | 8 +- src/xenia/ui/graphics_context.cc | 18 +++ src/xenia/ui/graphics_context.h | 5 +- src/xenia/ui/vulkan/vulkan_context.cc | 4 +- src/xenia/ui/vulkan/vulkan_context.h | 2 +- src/xenia/ui/window.cc | 6 +- 9 files changed, 149 insertions(+), 132 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index f897a5516..7764afa44 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -9,9 +9,6 @@ #include "xenia/ui/d3d12/d3d12_context.h" -#include - -#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" @@ -19,9 +16,6 @@ #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/window.h" -DEFINE_bool(d3d12_random_clear_color, false, - "Randomize presentation back buffer clear color.", "D3D12"); - namespace xe { namespace ui { namespace d3d12 { @@ -32,110 +26,112 @@ D3D12Context::D3D12Context(D3D12Provider* provider, Window* target_window) D3D12Context::~D3D12Context() { Shutdown(); } bool D3D12Context::Initialize() { + context_lost_ = false; + + if (!target_window_) { + return true; + } + auto& provider = GetD3D12Provider(); auto dxgi_factory = provider.GetDXGIFactory(); auto device = provider.GetDevice(); auto direct_queue = provider.GetDirectQueue(); - context_lost_ = false; + swap_fence_current_value_ = 1; + swap_fence_completed_value_ = 0; + swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); + if (swap_fence_completion_event_ == nullptr) { + XELOGE("Failed to create the composition fence completion event"); + Shutdown(); + return false; + } + // Create a fence for transient resources of compositing. + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&swap_fence_)))) { + XELOGE("Failed to create the composition fence"); + Shutdown(); + return false; + } - if (target_window_) { - swap_fence_current_value_ = 1; - swap_fence_completed_value_ = 0; - swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); - if (swap_fence_completion_event_ == nullptr) { - XELOGE("Failed to create the composition fence completion event"); - Shutdown(); - return false; - } - // Create a fence for transient resources of compositing. - if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&swap_fence_)))) { - XELOGE("Failed to create the composition fence"); - Shutdown(); - return false; - } - - // Create the swap chain. - swap_chain_width_ = target_window_->scaled_width(); - swap_chain_height_ = target_window_->scaled_height(); - DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; - swap_chain_desc.Width = swap_chain_width_; - swap_chain_desc.Height = swap_chain_height_; - swap_chain_desc.Format = kSwapChainFormat; - swap_chain_desc.Stereo = FALSE; - swap_chain_desc.SampleDesc.Count = 1; - swap_chain_desc.SampleDesc.Quality = 0; - swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swap_chain_desc.BufferCount = kSwapChainBufferCount; - swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; - swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - swap_chain_desc.Flags = 0; - IDXGISwapChain1* swap_chain_1; - if (FAILED(dxgi_factory->CreateSwapChainForHwnd( - provider.GetDirectQueue(), - static_cast(target_window_->native_handle()), - &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { - XELOGE("Failed to create a DXGI swap chain"); - Shutdown(); - return false; - } - if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { - XELOGE("Failed to get version 3 of the DXGI swap chain interface"); - swap_chain_1->Release(); - Shutdown(); - return false; - } + // Create the swap chain. + swap_chain_width_ = target_window_->scaled_width(); + swap_chain_height_ = target_window_->scaled_height(); + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; + swap_chain_desc.Width = swap_chain_width_; + swap_chain_desc.Height = swap_chain_height_; + swap_chain_desc.Format = kSwapChainFormat; + swap_chain_desc.Stereo = FALSE; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.BufferCount = kSwapChainBufferCount; + swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + swap_chain_desc.Flags = 0; + IDXGISwapChain1* swap_chain_1; + if (FAILED(dxgi_factory->CreateSwapChainForHwnd( + provider.GetDirectQueue(), + reinterpret_cast(target_window_->native_handle()), + &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { + XELOGE("Failed to create a DXGI swap chain"); + Shutdown(); + return false; + } + if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { + XELOGE("Failed to get version 3 of the DXGI swap chain interface"); swap_chain_1->Release(); + Shutdown(); + return false; + } + swap_chain_1->Release(); - // Create a heap for RTV descriptors of swap chain buffers. - D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; - rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; - rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - rtv_heap_desc.NodeMask = 0; - if (FAILED(device->CreateDescriptorHeap( - &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { - XELOGE("Failed to create swap chain RTV descriptor heap"); + // Create a heap for RTV descriptors of swap chain buffers. + D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; + rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; + rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + rtv_heap_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { + XELOGE("Failed to create swap chain RTV descriptor heap"); + Shutdown(); + return false; + } + swap_chain_rtv_heap_start_ = + swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + + // Get the buffers and create their RTV descriptors. + if (!InitializeSwapChainBuffers()) { + Shutdown(); + return false; + } + + // Create the command list for compositing. + for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { + if (FAILED(device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&swap_command_allocators_[i])))) { + XELOGE("Failed to create a composition command allocator"); Shutdown(); return false; } - swap_chain_rtv_heap_start_ = - swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + } + if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + swap_command_allocators_[0], nullptr, + IID_PPV_ARGS(&swap_command_list_)))) { + XELOGE("Failed to create the composition graphics command list"); + Shutdown(); + return false; + } + // Initially in open state, wait until BeginSwap. + swap_command_list_->Close(); - // Get the buffers and create their RTV descriptors. - if (!InitializeSwapChainBuffers()) { - Shutdown(); - return false; - } - - // Create the command list for compositing. - for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { - if (FAILED(device->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&swap_command_allocators_[i])))) { - XELOGE("Failed to create a composition command allocator"); - Shutdown(); - return false; - } - } - if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - swap_command_allocators_[0], nullptr, - IID_PPV_ARGS(&swap_command_list_)))) { - XELOGE("Failed to create the composition graphics command list"); - Shutdown(); - return false; - } - // Initially in open state, wait until BeginSwap. - swap_command_list_->Close(); - - // Initialize the immediate mode drawer if not offscreen. - immediate_drawer_ = std::make_unique(*this); - if (!immediate_drawer_->Initialize()) { - Shutdown(); - return false; - } + // Initialize the immediate mode drawer if not offscreen. + immediate_drawer_ = std::make_unique(*this); + if (!immediate_drawer_->Initialize()) { + Shutdown(); + return false; } return true; @@ -223,9 +219,11 @@ ImmediateDrawer* D3D12Context::immediate_drawer() { return immediate_drawer_.get(); } -void D3D12Context::BeginSwap() { +bool D3D12Context::WasLost() { return context_lost_; } + +bool D3D12Context::BeginSwap() { if (!target_window_ || context_lost_) { - return; + return false; } // Resize the swap chain if the window is resized. @@ -252,13 +250,13 @@ void D3D12Context::BeginSwap() { kSwapChainBufferCount, target_window_width, target_window_height, kSwapChainFormat, 0))) { context_lost_ = true; - return; + return false; } swap_chain_width_ = target_window_width; swap_chain_height_ = target_window_height; if (!InitializeSwapChainBuffers()) { context_lost_ = true; - return; + return false; } } @@ -295,18 +293,11 @@ void D3D12Context::BeginSwap() { D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV(); swap_command_list_->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, nullptr); float clear_color[4]; - if (cvars::d3d12_random_clear_color) { - clear_color[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - clear_color[1] = 1.0f; - clear_color[2] = 0.0f; - } else { - clear_color[0] = 0.0f; - clear_color[1] = 0.0f; - clear_color[2] = 0.0f; - } - clear_color[3] = 1.0f; + GetClearColor(clear_color); swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, nullptr); + + return true; } void D3D12Context::EndSwap() { diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index 4cf13d87e..c9f235b97 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -28,9 +28,9 @@ class D3D12Context : public GraphicsContext { ImmediateDrawer* immediate_drawer() override; - bool WasLost() override { return context_lost_; } + bool WasLost() override; - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; @@ -69,11 +69,10 @@ class D3D12Context : public GraphicsContext { private: friend class D3D12Provider; - explicit D3D12Context(D3D12Provider* provider, Window* target_window); + bool Initialize(); private: - bool Initialize(); bool InitializeSwapChainBuffers(); void Shutdown(); diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index cb5287e14..d1f6594ca 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -383,6 +383,14 @@ bool D3D12Provider::Initialize() { device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); // Check if optional features are supported. + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by + // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, + &options7, sizeof(options7)))) { + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } rasterizer_ordered_views_supported_ = false; resource_binding_tier_ = D3D12_RESOURCE_BINDING_TIER_1; tiled_resources_tier_ = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED; @@ -409,14 +417,6 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } - // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by - // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; - D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; - if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, - &options7, sizeof(options7)))) { - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - } XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 1c8694fd0..c8332801c 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -68,6 +68,9 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; } // Device features. + D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { + return heap_flag_create_not_zeroed_; + } D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER GetProgrammableSamplePositionsTier() const { return programmable_sample_positions_tier_; @@ -84,9 +87,6 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } - D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { - return heap_flag_create_not_zeroed_; - } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -162,12 +162,12 @@ class D3D12Provider : public GraphicsProvider { uint32_t adapter_vendor_id_; + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER programmable_sample_positions_tier_; bool rasterizer_ordered_views_supported_; D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; - D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 diff --git a/src/xenia/ui/graphics_context.cc b/src/xenia/ui/graphics_context.cc index 73980cd37..7f5ab07b6 100644 --- a/src/xenia/ui/graphics_context.cc +++ b/src/xenia/ui/graphics_context.cc @@ -9,8 +9,13 @@ #include "xenia/ui/graphics_context.h" +#include + +#include "xenia/base/cvar.h" #include "xenia/ui/graphics_provider.h" +DEFINE_bool(random_clear_color, false, "Randomize window clear color.", "UI"); + namespace xe { namespace ui { @@ -26,5 +31,18 @@ bool GraphicsContext::MakeCurrent() { return true; } void GraphicsContext::ClearCurrent() {} +void GraphicsContext::GetClearColor(float* rgba) { + if (cvars::random_clear_color) { + rgba[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + rgba[1] = 1.0f; + rgba[2] = 0.0f; + } else { + rgba[0] = 0.0f; + rgba[1] = 0.0f; + rgba[2] = 0.0f; + } + rgba[3] = 1.0f; +} + } // namespace ui } // namespace xe diff --git a/src/xenia/ui/graphics_context.h b/src/xenia/ui/graphics_context.h index 383338770..0ed5bd881 100644 --- a/src/xenia/ui/graphics_context.h +++ b/src/xenia/ui/graphics_context.h @@ -51,7 +51,8 @@ class GraphicsContext { // This context must be made current in order for this call to work properly. virtual bool WasLost() = 0; - virtual void BeginSwap() = 0; + // Returns true if able to draw now (the target surface is available). + virtual bool BeginSwap() = 0; virtual void EndSwap() = 0; virtual std::unique_ptr Capture() = 0; @@ -59,6 +60,8 @@ class GraphicsContext { protected: explicit GraphicsContext(GraphicsProvider* provider, Window* target_window); + static void GetClearColor(float* rgba); + GraphicsProvider* provider_ = nullptr; Window* target_window_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 339f099ef..50f51ad74 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -141,7 +141,7 @@ bool VulkanContext::MakeCurrent() { void VulkanContext::ClearCurrent() {} -void VulkanContext::BeginSwap() { +bool VulkanContext::BeginSwap() { SCOPE_profile_cpu_f("gpu"); auto provider = static_cast(provider_); auto device = provider->device(); @@ -170,6 +170,8 @@ void VulkanContext::BeginSwap() { // TODO(benvanik): use a fence instead? May not be possible with target image. std::lock_guard queue_lock(device->primary_queue_mutex()); status = vkQueueWaitIdle(device->primary_queue()); + + return true; } void VulkanContext::EndSwap() { diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 3665ffd78..f5658bdd1 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -40,7 +40,7 @@ class VulkanContext : public GraphicsContext { bool WasLost() override { return context_lost_; } - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; diff --git a/src/xenia/ui/window.cc b/src/xenia/ui/window.cc index 8be8900c8..1273b61f0 100644 --- a/src/xenia/ui/window.cc +++ b/src/xenia/ui/window.cc @@ -200,11 +200,15 @@ void Window::OnPaint(UIEvent* e) { io.DisplaySize = ImVec2(static_cast(scaled_width()), static_cast(scaled_height())); - context_->BeginSwap(); + bool can_swap = context_->BeginSwap(); if (context_->WasLost()) { on_context_lost(e); return; } + if (!can_swap) { + // Surface not available. + return; + } ImGui::NewFrame();