From 52efbcf741c021c20b03b282cc7c4a006172be60 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 1 Sep 2020 12:44:37 +0300 Subject: [PATCH 1/8] [Memory] Fix Protect range calculation --- src/xenia/memory.cc | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 834091a07..7e60797f5 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -1145,12 +1145,38 @@ bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) { bool BaseHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, uint32_t* old_protect) { - uint32_t page_count = xe::round_up(size, page_size_) / page_size_; + if (!size) { + XELOGE("BaseHeap::Protect failed due to zero size"); + return false; + } + + // From the VirtualProtect MSDN page: + // + // "The region of affected pages includes all pages containing one or more + // bytes in the range from the lpAddress parameter to (lpAddress+dwSize). + // This means that a 2-byte range straddling a page boundary causes the + // protection attributes of both pages to be changed." + // + // "The access protection value can be set only on committed pages. If the + // state of any page in the specified region is not committed, the function + // fails and returns without modifying the access protection of any pages in + // the specified region." + uint32_t start_page_number = (address - heap_base_) / page_size_; - uint32_t end_page_number = start_page_number + page_count - 1; - start_page_number = - std::min(uint32_t(page_table_.size()) - 1, start_page_number); - end_page_number = std::min(uint32_t(page_table_.size()) - 1, end_page_number); + if (start_page_number >= page_table_.size()) { + XELOGE("BaseHeap::Protect failed due to out-of-bounds base address {:08X}", + address); + return false; + } + uint32_t end_page_number = + uint32_t((uint64_t(address) + size - 1 - heap_base_) / page_size_); + if (end_page_number >= page_table_.size()) { + XELOGE( + "BaseHeap::Protect failed due to out-of-bounds range ({:08X} bytes " + "from {:08x})", + size, address); + return false; + } auto global_lock = global_critical_region_.Acquire(); @@ -1173,6 +1199,7 @@ bool BaseHeap::Protect(uint32_t address, uint32_t size, uint32_t protect, // Attempt host change (hopefully won't fail). // We can only do this if our size matches system page granularity. + uint32_t page_count = end_page_number - start_page_number + 1; if (page_size_ == xe::memory::page_size() || (((page_count * page_size_) % xe::memory::page_size() == 0) && ((start_page_number * page_size_) % xe::memory::page_size() == 0))) { From 0be0eb2b381948df605863ca16adfd7d771e2ee0 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 17:47:32 +0300 Subject: [PATCH 2/8] [GPU/D3D12] Letterboxing cropping to action-safe area --- src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 35 ++++-- src/xenia/gpu/draw_util.cc | 112 +++++++++++++++++++ src/xenia/gpu/draw_util.h | 8 ++ src/xenia/ui/d3d12/d3d12_context.cc | 6 +- src/xenia/ui/window_win.cc | 27 +---- 5 files changed, 154 insertions(+), 34 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 30b322f87..835c94c07 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -10,7 +10,9 @@ #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include "xenia/gpu/draw_util.h" #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/xbox.h" @@ -265,22 +267,39 @@ void D3D12GraphicsSystem::Swap(xe::ui::UIEvent* e) { return; } + uint32_t window_width, window_height; + display_context_->GetSwapChainSize(window_width, window_height); + + int32_t target_x, target_y; + uint32_t target_width, target_height; + draw_util::GetPresentArea(swap_state.width, swap_state.height, window_width, + window_height, target_x, target_y, target_width, + target_height); + // For safety. + target_x = clamp(target_x, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_y = clamp(target_y, int32_t(D3D12_VIEWPORT_BOUNDS_MIN), + int32_t(D3D12_VIEWPORT_BOUNDS_MAX)); + target_width = std::min( + target_width, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_x)); + target_height = std::min( + target_height, uint32_t(int32_t(D3D12_VIEWPORT_BOUNDS_MAX) - target_y)); + auto command_list = display_context_->GetSwapCommandList(); - uint32_t swap_width, swap_height; - display_context_->GetSwapChainSize(swap_width, swap_height); + // Assuming the window has already been cleared to the needed letterbox color. D3D12_VIEWPORT viewport; - viewport.TopLeftX = 0.0f; - viewport.TopLeftY = 0.0f; - viewport.Width = float(swap_width); - viewport.Height = float(swap_height); + viewport.TopLeftX = float(target_x); + viewport.TopLeftY = float(target_y); + viewport.Width = float(target_width); + viewport.Height = float(target_height); viewport.MinDepth = 0.0f; viewport.MaxDepth = 0.0f; command_list->RSSetViewports(1, &viewport); D3D12_RECT scissor; scissor.left = 0; scissor.top = 0; - scissor.right = swap_width; - scissor.bottom = swap_height; + scissor.right = window_width; + scissor.bottom = window_height; command_list->RSSetScissorRects(1, &scissor); command_list->SetDescriptorHeaps(1, &swap_srv_heap); StretchTextureToFrontBuffer( diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index 6c9ba1e73..d28df6d0e 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/draw_util.h" +#include #include #include @@ -31,6 +32,36 @@ DEFINE_bool( "for certain games like GTA IV to work).", "GPU"); +DEFINE_bool( + present_stretch, true, + "Whether to rescale the image, instead of maintaining the original pixel " + "size, when presenting to the window. When this is disabled, other " + "positioning options are ignored.", + "GPU"); +DEFINE_bool( + present_letterbox, true, + "Maintain aspect ratio when stretching by displaying bars around the image " + "when there's no more overscan area to crop out.", + "GPU"); +// https://github.com/MonoGame/MonoGame/issues/4697#issuecomment-217779403 +// Using the value from DirectXTK (5% cropped out from each side, thus 90%), +// which is not exactly the Xbox One title-safe area, but close, and within the +// action-safe area: +// https://github.com/microsoft/DirectXTK/blob/1e80a465c6960b457ef9ab6716672c1443a45024/Src/SimpleMath.cpp#L144 +// XNA TitleSafeArea is 80%, but it's very conservative, designed for CRT, and +// is the title-safe area rather than the action-safe area. +// 90% is also exactly the fraction of 16:9 height in 16:10. +DEFINE_int32( + present_safe_area_x, 90, + "Percentage of the image width that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); +DEFINE_int32( + present_safe_area_y, 90, + "Percentage of the image height that can be kept when presenting to " + "maintain aspect ratio without letterboxing or stretching.", + "GPU"); + namespace xe { namespace gpu { namespace draw_util { @@ -589,6 +620,87 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader( return shader; } +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out) { + if (!cvars::present_stretch) { + target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; + target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; + target_width_out = source_width; + target_height_out = source_height; + return; + } + // Prevent division by zero. + if (!source_width || !source_height) { + target_x_out = 0; + target_y_out = 0; + target_width_out = 0; + target_height_out = 0; + return; + } + if (uint64_t(window_width) * source_height > + uint64_t(source_width) * window_height) { + // The window is wider that the source - crop along Y, then letterbox or + // stretch along X. + uint32_t present_safe_area; + if (cvars::present_safe_area_y > 0 && cvars::present_safe_area_y < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_y); + } else { + present_safe_area = 100; + } + uint32_t target_height = + uint32_t(uint64_t(window_width) * source_height / source_width); + bool letterbox = false; + if (target_height * present_safe_area > window_height * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_height = window_height * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_width = + uint32_t(uint64_t(source_width) * window_height * 100 / + (source_height * present_safe_area)); + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } else { + target_x_out = 0; + target_width_out = window_width; + } + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + // The window is taller than the source - crop along X, then letterbox or + // stretch along Y. + uint32_t present_safe_area; + if (cvars::present_safe_area_x > 0 && cvars::present_safe_area_x < 100) { + present_safe_area = uint32_t(cvars::present_safe_area_x); + } else { + present_safe_area = 100; + } + uint32_t target_width = + uint32_t(uint64_t(window_height) * source_width / source_height); + bool letterbox = false; + if (target_width * present_safe_area > window_width * 100) { + // Don't crop out more than the safe area margin - letterbox or stretch. + target_width = window_width * 100 / present_safe_area; + letterbox = true; + } + if (letterbox && cvars::present_letterbox) { + uint32_t target_height = + uint32_t(uint64_t(source_height) * window_width * 100 / + (source_width * present_safe_area)); + target_y_out = (int32_t(window_height) - int32_t(target_height)) / 2; + target_height_out = target_height; + } else { + target_y_out = 0; + target_height_out = window_height; + } + target_x_out = (int32_t(window_width) - int32_t(target_width)) / 2; + target_width_out = target_width; + } +} + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 76827c093..edb880ab0 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -272,6 +272,14 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, TraceWriter& trace_writer, uint32_t resolution_scale, bool edram_16_as_minus_1_to_1, ResolveInfo& info_out); +// Taking user configuration - stretching or letterboxing, overscan region to +// crop to fill while maintaining the aspect ratio - into account, returns the +// area where the frame should be presented in the host window. +void GetPresentArea(uint32_t source_width, uint32_t source_height, + uint32_t window_width, uint32_t window_height, + int32_t& target_x_out, int32_t& target_y_out, + uint32_t& target_width_out, uint32_t& target_height_out); + } // namespace draw_util } // namespace gpu } // namespace xe diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index 506ca6141..f897a5516 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -300,9 +300,9 @@ void D3D12Context::BeginSwap() { clear_color[1] = 1.0f; clear_color[2] = 0.0f; } else { - clear_color[0] = 238.0f / 255.0f; - clear_color[1] = 238.0f / 255.0f; - clear_color[2] = 238.0f / 255.0f; + clear_color[0] = 0.0f; + clear_color[1] = 0.0f; + clear_color[2] = 0.0f; } clear_color[3] = 1.0f; swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, diff --git a/src/xenia/ui/window_win.cc b/src/xenia/ui/window_win.cc index 2e60d2e42..c86a5cec8 100644 --- a/src/xenia/ui/window_win.cc +++ b/src/xenia/ui/window_win.cc @@ -253,20 +253,6 @@ bool Win32Window::ReleaseMouse() { bool Win32Window::is_fullscreen() const { return fullscreen_; } -// https://blogs.msdn.microsoft.com/oldnewthing/20131017-00/?p=2903 -BOOL UnadjustWindowRect(LPRECT prc, DWORD dwStyle, BOOL fMenu) { - RECT rc; - SetRectEmpty(&rc); - BOOL fRc = AdjustWindowRect(&rc, dwStyle, fMenu); - if (fRc) { - prc->left -= rc.left; - prc->top -= rc.top; - prc->right -= rc.right; - prc->bottom -= rc.bottom; - } - return fRc; -} - void Win32Window::ToggleFullscreen(bool fullscreen) { if (fullscreen == is_fullscreen()) { return; @@ -288,9 +274,6 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { AdjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), false); MoveWindow(hwnd_, rc.left, rc.top, rc.right - rc.left, rc.bottom - rc.top, TRUE); - - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } } else { // Reinstate borders, resize to 1280x720 @@ -301,15 +284,13 @@ void Win32Window::ToggleFullscreen(bool fullscreen) { if (main_menu) { ::SetMenu(hwnd_, main_menu->handle()); } - - auto& rc = windowed_pos_.rcNormalPosition; - bool has_menu = main_menu_ ? true : false; - UnadjustWindowRect(&rc, GetWindowLong(hwnd_, GWL_STYLE), has_menu); - width_ = rc.right - rc.left; - height_ = rc.bottom - rc.top; } fullscreen_ = fullscreen; + + // width_ and height_ will be updated by the WM_SIZE handler - + // windowed_pos_.rcNormalPosition is also not the correct source for them when + // switching from fullscreen to maximized. } bool Win32Window::is_bordered() const { From 74804ae7101c19ab29f0c1fe5f9d62d0b143503c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:23:45 +0300 Subject: [PATCH 3/8] [DXBC] ROV: Fix depth being overwritten if stencil failed --- src/xenia/gpu/dxbc_shader_translator_om.cc | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index bb83200ee..24963008f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -733,12 +733,6 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), DxbcSrc::LU(~uint32_t(1 << i))); - // temp.x? = resulting sample depth after the depth test - // temp.y = polygon offset if not writing to oDepth - // temp.z = viewport maximum depth if not writing to oDepth - // temp.w = old depth/stencil - // sample_temp.x = free - DxbcOpMov(sample_depth_stencil_dest, sample_temp_x_src); } DxbcOpEndIf(); // Create packed depth/stencil, with the stencil value unchanged at this @@ -977,6 +971,25 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Close the stencil test check. DxbcOpEndIf(); + // Check if the depth/stencil has failed not to modify the depth if it has. + // sample_temp.x = whether depth/stencil has passed for this sample + DxbcOpAnd(sample_temp_x_dest, + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << i)); + // If the depth/stencil test has failed, don't change the depth. + // sample_temp.x = free + DxbcOpIf(false, sample_temp_x_src); + { + // Copy the new stencil over the old depth. + // temp.x? = resulting sample depth/stencil + // temp.y = polygon offset if not writing to oDepth + // temp.z = viewport maximum depth if not writing to oDepth + // temp.w = old depth/stencil + DxbcOpBFI(sample_depth_stencil_dest, DxbcSrc::LU(8), DxbcSrc::LU(0), + sample_depth_stencil_src, temp_w_src); + } + // Close the depth/stencil passing check. + DxbcOpEndIf(); // Check if the new depth/stencil is different, and thus needs to be // written, to temp.w. // temp.x? = resulting sample depth/stencil From b3f84f4a9c5d8cf96dcda8ae3f6e89681d8ff0d1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 5 Sep 2020 19:34:36 +0300 Subject: [PATCH 4/8] [GPU] Rename present_stretch to present_rescale --- src/xenia/gpu/draw_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index d28df6d0e..6aaa1b856 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -33,7 +33,7 @@ DEFINE_bool( "GPU"); DEFINE_bool( - present_stretch, true, + present_rescale, true, "Whether to rescale the image, instead of maintaining the original pixel " "size, when presenting to the window. When this is disabled, other " "positioning options are ignored.", @@ -624,7 +624,7 @@ void GetPresentArea(uint32_t source_width, uint32_t source_height, uint32_t window_width, uint32_t window_height, int32_t& target_x_out, int32_t& target_y_out, uint32_t& target_width_out, uint32_t& target_height_out) { - if (!cvars::present_stretch) { + if (!cvars::present_rescale) { target_x_out = (int32_t(window_width) - int32_t(source_width)) / 2; target_y_out = (int32_t(window_height) - int32_t(source_height)) / 2; target_width_out = source_width; From 38e0cc4941fc69b48555695eae2adaf4a86b726a Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:52:30 +0300 Subject: [PATCH 5/8] [D3D12] Non-zeroed heaps on 2004 --- docs/building.md | 2 +- premake5.lua | 6 ++-- .../gpu/d3d12/d3d12_command_processor.cc | 28 +++++++++------- src/xenia/gpu/d3d12/primitive_converter.cc | 12 ++++--- src/xenia/gpu/d3d12/render_target_cache.cc | 33 +++++++++++-------- src/xenia/gpu/d3d12/shared_memory.cc | 19 ++++++----- src/xenia/gpu/d3d12/texture_cache.cc | 12 ++++--- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 22 +++++++------ src/xenia/ui/d3d12/d3d12_provider.cc | 26 +++++++++++---- src/xenia/ui/d3d12/d3d12_provider.h | 4 +++ src/xenia/ui/d3d12/pools.cc | 11 ++++--- src/xenia/ui/d3d12/pools.h | 6 ++-- 12 files changed, 110 insertions(+), 71 deletions(-) diff --git a/docs/building.md b/docs/building.md index d8334793f..6aafc521e 100644 --- a/docs/building.md +++ b/docs/building.md @@ -12,7 +12,7 @@ drivers. * [Visual Studio 2019 or Visual Studio 2017](https://www.visualstudio.com/downloads/) * [Python 3.6+](https://www.python.org/downloads/) * Ensure Python is in PATH. -* Windows 10 SDK +* Windows 10 SDK version 10.0.19041.0 (for Visual Studio 2019, this or any newer version) ``` git clone https://github.com/xenia-project/xenia.git diff --git a/premake5.lua b/premake5.lua index 4af8c50bd..22f1fa7f1 100644 --- a/premake5.lua +++ b/premake5.lua @@ -202,10 +202,10 @@ solution("xenia") platforms({"Linux"}) elseif os.istarget("windows") then platforms({"Windows"}) - -- Minimum version to support ID3D12GraphicsCommandList1 (for - -- SetSamplePositions). + -- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions. + -- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED. filter("action:vs2017") - systemversion("10.0.15063.0") + systemversion("10.0.19041.0") filter("action:vs2019") systemversion("10.0") filter({}) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index e9831347d..23163a609 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -646,14 +646,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( size = xe::align(size, kScratchBufferSizeIncrement); - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, state, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20); return nullptr; } @@ -889,7 +891,7 @@ bool D3D12CommandProcessor::SetupContext() { // Initialize resource binding. constant_buffer_pool_ = - std::make_unique(device, 1024 * 1024); + std::make_unique(provider, 1024 * 1024); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -1181,6 +1183,9 @@ bool D3D12CommandProcessor::SetupContext() { return false; } + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); + // Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are // hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too. // https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf @@ -1202,7 +1207,7 @@ bool D3D12CommandProcessor::SetupContext() { // The first action will be uploading. gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &gamma_ramp_desc, gamma_ramp_texture_state_, nullptr, IID_PPV_ARGS(&gamma_ramp_texture_)))) { XELOGE("Failed to create the gamma ramp texture"); @@ -1218,7 +1223,7 @@ bool D3D12CommandProcessor::SetupContext() { ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gamma_ramp_upload_)))) { XELOGE("Failed to create the gamma ramp upload buffer"); @@ -1246,7 +1251,7 @@ bool D3D12CommandProcessor::SetupContext() { swap_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; // Can be sampled at any time, switch to render target when needed, then back. if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &swap_texture_desc, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&swap_texture_)))) { XELOGE("Failed to create the command processor front buffer"); @@ -4286,15 +4291,16 @@ ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { } size = xe::align(size, kReadbackBufferSizeIncrement); if (size > readback_buffer_size_) { - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&buffer)))) { + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB readback buffer", size >> 20); return nullptr; } diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 4884865d0..ab2138b47 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -47,14 +47,16 @@ PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor& command_processor, PrimitiveConverter::~PrimitiveConverter() { Shutdown(); } bool PrimitiveConverter::Initialize() { - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = + provider.GetHeapFlagCreateNotZeroed(); // There can be at most 65535 indices in a Xenos draw call, but they can be up // to 4 bytes large, and conversion can add more indices (almost triple the // count for triangle strips, for instance). buffer_pool_ = - std::make_unique(device, 4 * 1024 * 1024); + std::make_unique(provider, 4 * 1024 * 1024); // Create the static index buffer for non-indexed drawing. D3D12_RESOURCE_DESC static_ib_desc; @@ -62,7 +64,7 @@ bool PrimitiveConverter::Initialize() { static_ib_desc, kStaticIBTotalCount * sizeof(uint16_t), D3D12_RESOURCE_FLAG_NONE); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&static_ib_upload_)))) { XELOGE( @@ -108,7 +110,7 @@ bool PrimitiveConverter::Initialize() { // Not uploaded yet. static_ib_upload_submission_ = UINT64_MAX; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &static_ib_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&static_ib_)))) { XELOGE("Failed to create the primitive conversion static index buffer"); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index bd25b738a..cfc7f6f47 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -137,8 +137,6 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { uint32_t edram_buffer_size = GetEdramBufferSize(); // Create the buffer for reinterpreting EDRAM contents. - // No need to clear it in the first frame, memory is zeroed out when allocated - // on Windows. D3D12_RESOURCE_DESC edram_buffer_desc; ui::d3d12::util::FillBufferResourceDesc( edram_buffer_desc, edram_buffer_size, @@ -147,8 +145,15 @@ bool RenderTargetCache::Initialize(const TextureCache& texture_cache) { edram_buffer_state_ = edram_rov_used_ ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + // Request zeroed (though no guarantee) when not using ROV so the host 32-bit + // depth buffer will be initialized to deterministic values (because it's + // involved in comparison with converted 24-bit values - whether the 32-bit + // value is up to date is determined by whether it's equal to the 24-bit + // value in the main EDRAM buffer when converted to 24-bit). if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesDefault, + edram_rov_used_ ? provider.GetHeapFlagCreateNotZeroed() + : D3D12_HEAP_FLAG_NONE, &edram_buffer_desc, edram_buffer_state_, nullptr, IID_PPV_ARGS(&edram_buffer_)))) { XELOGE("Failed to create the EDRAM buffer"); @@ -1451,10 +1456,11 @@ bool RenderTargetCache::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc(edram_snapshot_download_buffer_desc, xenos::kEdramSizeBytes, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &edram_snapshot_download_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&edram_snapshot_download_buffer_)))) { @@ -1493,10 +1499,9 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { return; } auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); - auto device = provider.GetDevice(); if (!edram_snapshot_restore_pool_) { edram_snapshot_restore_pool_ = - std::make_unique(device, + std::make_unique(provider, xenos::kEdramSizeBytes); } ID3D12Resource* upload_buffer; @@ -1603,14 +1608,15 @@ bool RenderTargetCache::MakeHeapResident(uint32_t heap_index) { if (heaps_[heap_index] != nullptr) { return true; } - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeap4MBPages << 22; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; // TODO(Triang3l): If real MSAA is added, alignment must be 4 MB. heap_desc.Alignment = 0; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED( device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[heap_index])))) { XELOGE("Failed to create a {} MB heap for render targets", @@ -1756,8 +1762,9 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget( } #else if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &resource_desc, state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &resource_desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { XELOGE( "Failed to create a committed resource for {}x{} {} render target with " "format {}", diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index f2d2e6296..c24336664 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -73,8 +73,9 @@ bool SharedMemory::Initialize() { "resources yet."); } if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, - &buffer_desc, buffer_state_, nullptr, IID_PPV_ARGS(&buffer_)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, buffer_state_, + nullptr, IID_PPV_ARGS(&buffer_)))) { XELOGE("Shared memory: Failed to create the 512 MB buffer"); Shutdown(); return false; @@ -153,7 +154,7 @@ bool SharedMemory::Initialize() { system_page_flags_.resize((page_count_ + 63) / 64); upload_buffer_pool_ = std::make_unique( - device, + provider, xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_)); memory_invalidation_callback_handle_ = @@ -370,7 +371,8 @@ bool SharedMemory::EnsureTilesResident(uint32_t start, uint32_t length) { D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) { XELOGE("Shared memory: Failed to create a tile heap"); return false; @@ -890,11 +892,12 @@ bool SharedMemory::InitializeTraceSubmitDownloads() { ui::d3d12::util::FillBufferResourceDesc( gpu_written_buffer_desc, gpu_written_page_count << page_size_log2_, D3D12_RESOURCE_FLAG_NONE); - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesReadback, D3D12_HEAP_FLAG_NONE, - &gpu_written_buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + &ui::d3d12::util::kHeapPropertiesReadback, + provider.GetHeapFlagCreateNotZeroed(), &gpu_written_buffer_desc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&trace_gpu_written_buffer_)))) { XELOGE( "Shared memory: Failed to create a {} KB GPU-written memory download " diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0f096fb49..821a0136f 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -1633,7 +1633,8 @@ bool TextureCache::EnsureScaledResolveBufferResident(uint32_t start_unscaled, D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = kScaledResolveHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS | + provider.GetHeapFlagCreateNotZeroed(); if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&scaled_resolve_heaps_[i])))) { XELOGE("Texture cache: Failed to create a scaled resolve tile heap"); @@ -1953,14 +1954,15 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) { // Untiling through a buffer instead of using unordered access because copying // is not done that often. desc.Flags = D3D12_RESOURCE_FLAG_NONE; - auto device = - command_processor_.GetD3D12Context().GetD3D12Provider().GetDevice(); + auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); + auto device = provider.GetDevice(); // Assuming untiling will be the next operation. D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COPY_DEST; ID3D12Resource* resource; if (FAILED(device->CreateCommittedResource( - &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &desc, - state, nullptr, IID_PPV_ARGS(&resource)))) { + &ui::d3d12::util::kHeapPropertiesDefault, + provider.GetHeapFlagCreateNotZeroed(), &desc, state, nullptr, + IID_PPV_ARGS(&resource)))) { LogTextureKeyAction(key, "Failed to create"); return nullptr; } diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index f91c79677..3c0ea8a3a 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -33,7 +33,7 @@ class D3D12ImmediateTexture : public ImmediateTexture { ImmediateTextureFilter filter, bool repeat); ~D3D12ImmediateTexture() override; - bool Initialize(ID3D12Device* device); + bool Initialize(D3D12Provider& provider); void Shutdown(); ID3D12Resource* GetResource() const { return resource_; } @@ -59,7 +59,7 @@ D3D12ImmediateTexture::D3D12ImmediateTexture(uint32_t width, uint32_t height, D3D12ImmediateTexture::~D3D12ImmediateTexture() { Shutdown(); } -bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { +bool D3D12ImmediateTexture::Initialize(D3D12Provider& provider) { // The first operation will likely be copying the contents. state_ = D3D12_RESOURCE_STATE_COPY_DEST; @@ -75,9 +75,9 @@ bool D3D12ImmediateTexture::Initialize(ID3D12Device* device) { resource_desc.SampleDesc.Quality = 0; resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE; - if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, &resource_desc, - state_, nullptr, IID_PPV_ARGS(&resource_)))) { + if (FAILED(provider.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), + &resource_desc, state_, nullptr, IID_PPV_ARGS(&resource_)))) { XELOGE("Failed to create a {}x{} texture for immediate drawing", width, height); return false; @@ -288,7 +288,7 @@ bool D3D12ImmediateDrawer::Initialize() { // Create pools for draws. vertex_buffer_pool_ = - std::make_unique(device, 2 * 1024 * 1024); + std::make_unique(provider, 2 * 1024 * 1024); texture_descriptor_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048); texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; @@ -326,7 +326,7 @@ std::unique_ptr D3D12ImmediateDrawer::CreateTexture( const uint8_t* data) { auto texture = std::make_unique(width, height, filter, repeat); - texture->Initialize(context_.GetD3D12Provider().GetDevice()); + texture->Initialize(context_.GetD3D12Provider()); if (data != nullptr) { UpdateTexture(texture.get(), data); } @@ -343,7 +343,8 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, } uint32_t width = d3d_texture->width, height = d3d_texture->height; - auto device = context_.GetD3D12Provider().GetDevice(); + auto& provider = context_.GetD3D12Provider(); + auto device = provider.GetDevice(); // Create and fill the upload buffer. D3D12_RESOURCE_DESC texture_desc = texture_resource->GetDesc(); @@ -356,8 +357,9 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, &buffer_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buffer)))) { + &util::kHeapPropertiesUpload, provider.GetHeapFlagCreateNotZeroed(), + &buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&buffer)))) { XELOGE( "Failed to create an upload buffer for a {}x{} texture for " "immediate drawing", diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 77e4e70aa..0231a5bec 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -409,17 +409,29 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by + // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, + &options7, sizeof(options7)))) { + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } XELOGD3D( - "Direct3D 12 device features:\n" - "Max GPU virtual address bits per resource: {}\n" - "Programmable sample positions: tier {}\n" - "Rasterizer-ordered views: {}\n" - "Resource binding: tier {}\n" - "Tiled resources: tier {}\n", + "Direct3D 12 device and OS features:\n" + "* Max GPU virtual address bits per resource: {}\n" + "* Programmable sample positions: tier {}\n" + "* Rasterizer-ordered views: {}\n" + "* Resource binding: tier {}\n" + "* Tiled resources: tier {}\n" + "* Non-zeroed heap creation: {}\n", virtual_address_bits_per_resource_, uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) + ? "yes" + : "no"); // Get the graphics analysis interface, will silently fail if PIX is not // attached. diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 122f16e2f..1c8694fd0 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -84,6 +84,9 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } + D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { + return heap_flag_create_not_zeroed_; + } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -164,6 +167,7 @@ class D3D12Provider : public GraphicsProvider { D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index deff80b12..7b892caa9 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -19,8 +19,8 @@ namespace xe { namespace ui { namespace d3d12 { -UploadBufferPool::UploadBufferPool(ID3D12Device* device, uint32_t page_size) - : device_(device), page_size_(page_size) {} +UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size) + : provider_(provider), page_size_(page_size) {} UploadBufferPool::~UploadBufferPool() { ClearCache(); } @@ -101,9 +101,10 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, util::FillBufferResourceDesc(new_buffer_desc, page_size_, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* new_buffer; - if (FAILED(device_->CreateCommittedResource( - &util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, - &new_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + if (FAILED(provider_.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesUpload, + provider_.GetHeapFlagCreateNotZeroed(), &new_buffer_desc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&new_buffer)))) { XELOGE("Failed to create a D3D upload buffer with {} bytes", page_size_); diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 4499bd08f..21606cc42 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -12,7 +12,7 @@ #include -#include "xenia/ui/d3d12/d3d12_api.h" +#include "xenia/ui/d3d12/d3d12_provider.h" namespace xe { namespace ui { @@ -23,7 +23,7 @@ namespace d3d12 { class UploadBufferPool { public: - UploadBufferPool(ID3D12Device* device, uint32_t page_size); + UploadBufferPool(D3D12Provider& provider, uint32_t page_size); ~UploadBufferPool(); void Reclaim(uint64_t completed_submission_index); @@ -41,7 +41,7 @@ class UploadBufferPool { D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); private: - ID3D12Device* device_; + D3D12Provider& provider_; uint32_t page_size_; struct Page { From acb1fc059fd950c92464af6dc1640f896cdf89c9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 6 Sep 2020 00:57:00 +0300 Subject: [PATCH 6/8] [D3D12] Make the feature list in the log sorted again --- src/xenia/ui/d3d12/d3d12_provider.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index 0231a5bec..cb5287e14 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -420,18 +420,17 @@ bool D3D12Provider::Initialize() { XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" + "* Non-zeroed heap creation: {}\n" "* Programmable sample positions: tier {}\n" "* Rasterizer-ordered views: {}\n" "* Resource binding: tier {}\n" - "* Tiled resources: tier {}\n" - "* Non-zeroed heap creation: {}\n", + "* Tiled resources: tier {}\n", virtual_address_bits_per_resource_, + (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) ? "yes" + : "no", uint32_t(programmable_sample_positions_tier_), rasterizer_ordered_views_supported_ ? "yes" : "no", - uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_), - (heap_flag_create_not_zeroed_ & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) - ? "yes" - : "no"); + uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_)); // Get the graphics analysis interface, will silently fail if PIX is not // attached. From dfbe36a8aa7e515a411f69278f2056a4feafc990 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 14 Sep 2020 23:27:19 +0300 Subject: [PATCH 7/8] [UI/D3D12] Small refactoring, allow BeginSwap to return false if no surface --- src/xenia/ui/d3d12/d3d12_context.cc | 215 ++++++++++++-------------- src/xenia/ui/d3d12/d3d12_context.h | 7 +- src/xenia/ui/d3d12/d3d12_provider.cc | 16 +- src/xenia/ui/d3d12/d3d12_provider.h | 8 +- src/xenia/ui/graphics_context.cc | 18 +++ src/xenia/ui/graphics_context.h | 5 +- src/xenia/ui/vulkan/vulkan_context.cc | 4 +- src/xenia/ui/vulkan/vulkan_context.h | 2 +- src/xenia/ui/window.cc | 6 +- 9 files changed, 149 insertions(+), 132 deletions(-) diff --git a/src/xenia/ui/d3d12/d3d12_context.cc b/src/xenia/ui/d3d12/d3d12_context.cc index f897a5516..7764afa44 100644 --- a/src/xenia/ui/d3d12/d3d12_context.cc +++ b/src/xenia/ui/d3d12/d3d12_context.cc @@ -9,9 +9,6 @@ #include "xenia/ui/d3d12/d3d12_context.h" -#include - -#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/ui/d3d12/d3d12_immediate_drawer.h" @@ -19,9 +16,6 @@ #include "xenia/ui/d3d12/d3d12_util.h" #include "xenia/ui/window.h" -DEFINE_bool(d3d12_random_clear_color, false, - "Randomize presentation back buffer clear color.", "D3D12"); - namespace xe { namespace ui { namespace d3d12 { @@ -32,110 +26,112 @@ D3D12Context::D3D12Context(D3D12Provider* provider, Window* target_window) D3D12Context::~D3D12Context() { Shutdown(); } bool D3D12Context::Initialize() { + context_lost_ = false; + + if (!target_window_) { + return true; + } + auto& provider = GetD3D12Provider(); auto dxgi_factory = provider.GetDXGIFactory(); auto device = provider.GetDevice(); auto direct_queue = provider.GetDirectQueue(); - context_lost_ = false; + swap_fence_current_value_ = 1; + swap_fence_completed_value_ = 0; + swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); + if (swap_fence_completion_event_ == nullptr) { + XELOGE("Failed to create the composition fence completion event"); + Shutdown(); + return false; + } + // Create a fence for transient resources of compositing. + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&swap_fence_)))) { + XELOGE("Failed to create the composition fence"); + Shutdown(); + return false; + } - if (target_window_) { - swap_fence_current_value_ = 1; - swap_fence_completed_value_ = 0; - swap_fence_completion_event_ = CreateEvent(nullptr, false, false, nullptr); - if (swap_fence_completion_event_ == nullptr) { - XELOGE("Failed to create the composition fence completion event"); - Shutdown(); - return false; - } - // Create a fence for transient resources of compositing. - if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&swap_fence_)))) { - XELOGE("Failed to create the composition fence"); - Shutdown(); - return false; - } - - // Create the swap chain. - swap_chain_width_ = target_window_->scaled_width(); - swap_chain_height_ = target_window_->scaled_height(); - DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; - swap_chain_desc.Width = swap_chain_width_; - swap_chain_desc.Height = swap_chain_height_; - swap_chain_desc.Format = kSwapChainFormat; - swap_chain_desc.Stereo = FALSE; - swap_chain_desc.SampleDesc.Count = 1; - swap_chain_desc.SampleDesc.Quality = 0; - swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swap_chain_desc.BufferCount = kSwapChainBufferCount; - swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; - swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - swap_chain_desc.Flags = 0; - IDXGISwapChain1* swap_chain_1; - if (FAILED(dxgi_factory->CreateSwapChainForHwnd( - provider.GetDirectQueue(), - static_cast(target_window_->native_handle()), - &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { - XELOGE("Failed to create a DXGI swap chain"); - Shutdown(); - return false; - } - if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { - XELOGE("Failed to get version 3 of the DXGI swap chain interface"); - swap_chain_1->Release(); - Shutdown(); - return false; - } + // Create the swap chain. + swap_chain_width_ = target_window_->scaled_width(); + swap_chain_height_ = target_window_->scaled_height(); + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; + swap_chain_desc.Width = swap_chain_width_; + swap_chain_desc.Height = swap_chain_height_; + swap_chain_desc.Format = kSwapChainFormat; + swap_chain_desc.Stereo = FALSE; + swap_chain_desc.SampleDesc.Count = 1; + swap_chain_desc.SampleDesc.Quality = 0; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.BufferCount = kSwapChainBufferCount; + swap_chain_desc.Scaling = DXGI_SCALING_STRETCH; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + swap_chain_desc.Flags = 0; + IDXGISwapChain1* swap_chain_1; + if (FAILED(dxgi_factory->CreateSwapChainForHwnd( + provider.GetDirectQueue(), + reinterpret_cast(target_window_->native_handle()), + &swap_chain_desc, nullptr, nullptr, &swap_chain_1))) { + XELOGE("Failed to create a DXGI swap chain"); + Shutdown(); + return false; + } + if (FAILED(swap_chain_1->QueryInterface(IID_PPV_ARGS(&swap_chain_)))) { + XELOGE("Failed to get version 3 of the DXGI swap chain interface"); swap_chain_1->Release(); + Shutdown(); + return false; + } + swap_chain_1->Release(); - // Create a heap for RTV descriptors of swap chain buffers. - D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; - rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; - rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - rtv_heap_desc.NodeMask = 0; - if (FAILED(device->CreateDescriptorHeap( - &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { - XELOGE("Failed to create swap chain RTV descriptor heap"); + // Create a heap for RTV descriptors of swap chain buffers. + D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc; + rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtv_heap_desc.NumDescriptors = kSwapChainBufferCount; + rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + rtv_heap_desc.NodeMask = 0; + if (FAILED(device->CreateDescriptorHeap( + &rtv_heap_desc, IID_PPV_ARGS(&swap_chain_rtv_heap_)))) { + XELOGE("Failed to create swap chain RTV descriptor heap"); + Shutdown(); + return false; + } + swap_chain_rtv_heap_start_ = + swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + + // Get the buffers and create their RTV descriptors. + if (!InitializeSwapChainBuffers()) { + Shutdown(); + return false; + } + + // Create the command list for compositing. + for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { + if (FAILED(device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&swap_command_allocators_[i])))) { + XELOGE("Failed to create a composition command allocator"); Shutdown(); return false; } - swap_chain_rtv_heap_start_ = - swap_chain_rtv_heap_->GetCPUDescriptorHandleForHeapStart(); + } + if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + swap_command_allocators_[0], nullptr, + IID_PPV_ARGS(&swap_command_list_)))) { + XELOGE("Failed to create the composition graphics command list"); + Shutdown(); + return false; + } + // Initially in open state, wait until BeginSwap. + swap_command_list_->Close(); - // Get the buffers and create their RTV descriptors. - if (!InitializeSwapChainBuffers()) { - Shutdown(); - return false; - } - - // Create the command list for compositing. - for (uint32_t i = 0; i < kSwapCommandAllocatorCount; ++i) { - if (FAILED(device->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&swap_command_allocators_[i])))) { - XELOGE("Failed to create a composition command allocator"); - Shutdown(); - return false; - } - } - if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - swap_command_allocators_[0], nullptr, - IID_PPV_ARGS(&swap_command_list_)))) { - XELOGE("Failed to create the composition graphics command list"); - Shutdown(); - return false; - } - // Initially in open state, wait until BeginSwap. - swap_command_list_->Close(); - - // Initialize the immediate mode drawer if not offscreen. - immediate_drawer_ = std::make_unique(*this); - if (!immediate_drawer_->Initialize()) { - Shutdown(); - return false; - } + // Initialize the immediate mode drawer if not offscreen. + immediate_drawer_ = std::make_unique(*this); + if (!immediate_drawer_->Initialize()) { + Shutdown(); + return false; } return true; @@ -223,9 +219,11 @@ ImmediateDrawer* D3D12Context::immediate_drawer() { return immediate_drawer_.get(); } -void D3D12Context::BeginSwap() { +bool D3D12Context::WasLost() { return context_lost_; } + +bool D3D12Context::BeginSwap() { if (!target_window_ || context_lost_) { - return; + return false; } // Resize the swap chain if the window is resized. @@ -252,13 +250,13 @@ void D3D12Context::BeginSwap() { kSwapChainBufferCount, target_window_width, target_window_height, kSwapChainFormat, 0))) { context_lost_ = true; - return; + return false; } swap_chain_width_ = target_window_width; swap_chain_height_ = target_window_height; if (!InitializeSwapChainBuffers()) { context_lost_ = true; - return; + return false; } } @@ -295,18 +293,11 @@ void D3D12Context::BeginSwap() { D3D12_CPU_DESCRIPTOR_HANDLE back_buffer_rtv = GetSwapChainBackBufferRTV(); swap_command_list_->OMSetRenderTargets(1, &back_buffer_rtv, TRUE, nullptr); float clear_color[4]; - if (cvars::d3d12_random_clear_color) { - clear_color[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) - clear_color[1] = 1.0f; - clear_color[2] = 0.0f; - } else { - clear_color[0] = 0.0f; - clear_color[1] = 0.0f; - clear_color[2] = 0.0f; - } - clear_color[3] = 1.0f; + GetClearColor(clear_color); swap_command_list_->ClearRenderTargetView(back_buffer_rtv, clear_color, 0, nullptr); + + return true; } void D3D12Context::EndSwap() { diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index 4cf13d87e..c9f235b97 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -28,9 +28,9 @@ class D3D12Context : public GraphicsContext { ImmediateDrawer* immediate_drawer() override; - bool WasLost() override { return context_lost_; } + bool WasLost() override; - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; @@ -69,11 +69,10 @@ class D3D12Context : public GraphicsContext { private: friend class D3D12Provider; - explicit D3D12Context(D3D12Provider* provider, Window* target_window); + bool Initialize(); private: - bool Initialize(); bool InitializeSwapChainBuffers(); void Shutdown(); diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index cb5287e14..d1f6594ca 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -383,6 +383,14 @@ bool D3D12Provider::Initialize() { device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); // Check if optional features are supported. + // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by + // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, + &options7, sizeof(options7)))) { + heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + } rasterizer_ordered_views_supported_ = false; resource_binding_tier_ = D3D12_RESOURCE_BINDING_TIER_1; tiled_resources_tier_ = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED; @@ -409,14 +417,6 @@ bool D3D12Provider::Initialize() { virtual_address_bits_per_resource_ = virtual_address_support.MaxGPUVirtualAddressBitsPerResource; } - // D3D12_HEAP_FLAG_CREATE_NOT_ZEROED requires Windows 10 2004 (indicated by - // the availability of ID3D12Device8 or D3D12_FEATURE_D3D12_OPTIONS7). - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_NONE; - D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7; - if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, - &options7, sizeof(options7)))) { - heap_flag_create_not_zeroed_ = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - } XELOGD3D( "Direct3D 12 device and OS features:\n" "* Max GPU virtual address bits per resource: {}\n" diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index 1c8694fd0..c8332801c 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -68,6 +68,9 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; } // Device features. + D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { + return heap_flag_create_not_zeroed_; + } D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER GetProgrammableSamplePositionsTier() const { return programmable_sample_positions_tier_; @@ -84,9 +87,6 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetVirtualAddressBitsPerResource() const { return virtual_address_bits_per_resource_; } - D3D12_HEAP_FLAGS GetHeapFlagCreateNotZeroed() const { - return heap_flag_create_not_zeroed_; - } // Proxies for Direct3D 12 functions since they are loaded dynamically. inline HRESULT SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, @@ -162,12 +162,12 @@ class D3D12Provider : public GraphicsProvider { uint32_t adapter_vendor_id_; + D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER programmable_sample_positions_tier_; bool rasterizer_ordered_views_supported_; D3D12_RESOURCE_BINDING_TIER resource_binding_tier_; D3D12_TILED_RESOURCES_TIER tiled_resources_tier_; uint32_t virtual_address_bits_per_resource_; - D3D12_HEAP_FLAGS heap_flag_create_not_zeroed_; }; } // namespace d3d12 diff --git a/src/xenia/ui/graphics_context.cc b/src/xenia/ui/graphics_context.cc index 73980cd37..7f5ab07b6 100644 --- a/src/xenia/ui/graphics_context.cc +++ b/src/xenia/ui/graphics_context.cc @@ -9,8 +9,13 @@ #include "xenia/ui/graphics_context.h" +#include + +#include "xenia/base/cvar.h" #include "xenia/ui/graphics_provider.h" +DEFINE_bool(random_clear_color, false, "Randomize window clear color.", "UI"); + namespace xe { namespace ui { @@ -26,5 +31,18 @@ bool GraphicsContext::MakeCurrent() { return true; } void GraphicsContext::ClearCurrent() {} +void GraphicsContext::GetClearColor(float* rgba) { + if (cvars::random_clear_color) { + rgba[0] = rand() / float(RAND_MAX); // NOLINT(runtime/threadsafe_fn) + rgba[1] = 1.0f; + rgba[2] = 0.0f; + } else { + rgba[0] = 0.0f; + rgba[1] = 0.0f; + rgba[2] = 0.0f; + } + rgba[3] = 1.0f; +} + } // namespace ui } // namespace xe diff --git a/src/xenia/ui/graphics_context.h b/src/xenia/ui/graphics_context.h index 383338770..0ed5bd881 100644 --- a/src/xenia/ui/graphics_context.h +++ b/src/xenia/ui/graphics_context.h @@ -51,7 +51,8 @@ class GraphicsContext { // This context must be made current in order for this call to work properly. virtual bool WasLost() = 0; - virtual void BeginSwap() = 0; + // Returns true if able to draw now (the target surface is available). + virtual bool BeginSwap() = 0; virtual void EndSwap() = 0; virtual std::unique_ptr Capture() = 0; @@ -59,6 +60,8 @@ class GraphicsContext { protected: explicit GraphicsContext(GraphicsProvider* provider, Window* target_window); + static void GetClearColor(float* rgba); + GraphicsProvider* provider_ = nullptr; Window* target_window_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index 339f099ef..50f51ad74 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -141,7 +141,7 @@ bool VulkanContext::MakeCurrent() { void VulkanContext::ClearCurrent() {} -void VulkanContext::BeginSwap() { +bool VulkanContext::BeginSwap() { SCOPE_profile_cpu_f("gpu"); auto provider = static_cast(provider_); auto device = provider->device(); @@ -170,6 +170,8 @@ void VulkanContext::BeginSwap() { // TODO(benvanik): use a fence instead? May not be possible with target image. std::lock_guard queue_lock(device->primary_queue_mutex()); status = vkQueueWaitIdle(device->primary_queue()); + + return true; } void VulkanContext::EndSwap() { diff --git a/src/xenia/ui/vulkan/vulkan_context.h b/src/xenia/ui/vulkan/vulkan_context.h index 3665ffd78..f5658bdd1 100644 --- a/src/xenia/ui/vulkan/vulkan_context.h +++ b/src/xenia/ui/vulkan/vulkan_context.h @@ -40,7 +40,7 @@ class VulkanContext : public GraphicsContext { bool WasLost() override { return context_lost_; } - void BeginSwap() override; + bool BeginSwap() override; void EndSwap() override; std::unique_ptr Capture() override; diff --git a/src/xenia/ui/window.cc b/src/xenia/ui/window.cc index 8be8900c8..1273b61f0 100644 --- a/src/xenia/ui/window.cc +++ b/src/xenia/ui/window.cc @@ -200,11 +200,15 @@ void Window::OnPaint(UIEvent* e) { io.DisplaySize = ImVec2(static_cast(scaled_width()), static_cast(scaled_height())); - context_->BeginSwap(); + bool can_swap = context_->BeginSwap(); if (context_->WasLost()) { on_context_lost(e); return; } + if (!can_swap) { + // Surface not available. + return; + } ImGui::NewFrame(); From 2cebd3cabecfeedc643b4116debcb3fa6c5cc33e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 15 Sep 2020 22:13:53 +0300 Subject: [PATCH 8/8] [D3D12] Unify UploadBufferPool page size (2 MB), add alignment parameter --- src/xenia/base/math.h | 19 +++-- .../gpu/d3d12/d3d12_command_processor.cc | 72 ++++++++++--------- src/xenia/gpu/d3d12/primitive_converter.cc | 16 +++-- src/xenia/gpu/d3d12/render_target_cache.cc | 2 +- src/xenia/gpu/d3d12/shared_memory.cc | 9 +-- src/xenia/gpu/d3d12/texture_cache.cc | 6 +- src/xenia/ui/d3d12/d3d12_immediate_drawer.cc | 10 ++- src/xenia/ui/d3d12/pools.cc | 43 +++++++---- src/xenia/ui/d3d12/pools.h | 13 ++-- 9 files changed, 111 insertions(+), 79 deletions(-) diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index c33e27019..e2d321702 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -26,23 +26,28 @@ namespace xe { template -size_t countof(T (&arr)[N]) { +constexpr size_t countof(T (&arr)[N]) { return std::extent::value; } +template +constexpr bool is_pow2(T value) { + return (value & (value - 1)) == 0; +} + // Rounds up the given value to the given alignment. template -T align(T value, T alignment) { +constexpr T align(T value, T alignment) { return (value + alignment - 1) & ~(alignment - 1); } // Rounds the given number up to the next highest multiple. template -T round_up(T value, V multiple) { +constexpr T round_up(T value, V multiple) { return value ? (((value + multiple - 1) / multiple) * multiple) : multiple; } -inline float saturate(float value) { +constexpr float saturate(float value) { return std::max(std::min(1.0f, value), -1.0f); } @@ -62,7 +67,7 @@ T next_pow2(T value) { #if __cpp_lib_gcd_lcm template -inline constexpr T greatest_common_divisor(T a, T b) { +constexpr T greatest_common_divisor(T a, T b) { return std::gcd(a, b); } #else @@ -77,14 +82,14 @@ constexpr T greatest_common_divisor(T a, T b) { #endif template -inline constexpr void reduce_fraction(T& numerator, T& denominator) { +constexpr void reduce_fraction(T& numerator, T& denominator) { auto gcd = greatest_common_divisor(numerator, denominator); numerator /= gcd; denominator /= gcd; } template -inline constexpr void reduce_fraction(std::pair& fraction) { +constexpr void reduce_fraction(std::pair& fraction) { reduce_fraction(fraction.first, fraction.second); } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 23163a609..0ce75ea77 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -890,8 +890,10 @@ bool D3D12CommandProcessor::SetupContext() { cvars::d3d12_edram_rov && provider.AreRasterizerOrderedViewsSupported(); // Initialize resource binding. - constant_buffer_pool_ = - std::make_unique(provider, 1024 * 1024); + constant_buffer_pool_ = std::make_unique( + provider, std::max(ui::d3d12::UploadBufferPool::kDefaultPageSize, + uint32_t(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4 * + sizeof(float)))); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -3519,13 +3521,6 @@ bool D3D12CommandProcessor::UpdateBindings( const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; - // Even if the shader doesn't need any float constants, a valid binding must - // still be provided, so if the first draw in the frame with the current root - // signature doesn't have float constants at all, still allocate an empty - // buffer. - uint32_t float_constant_size_vertex = xe::align( - uint32_t(std::max(float_constant_count_vertex, 1u) * 4 * sizeof(float)), - 256u); for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_vertex_[i] != float_constant_map_vertex.float_bitmap[i]) { @@ -3557,15 +3552,13 @@ bool D3D12CommandProcessor::UpdateBindings( std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); } - uint32_t float_constant_size_pixel = xe::align( - uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)), - 256u); // Write the constant buffer data. if (!cbuffer_binding_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->Request( - frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u), - nullptr, nullptr, &cbuffer_binding_system_.address); + frame_current_, sizeof(system_constants_), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_system_.address); if (system_constants == nullptr) { return false; } @@ -3576,8 +3569,15 @@ bool D3D12CommandProcessor::UpdateBindings( ~(1u << root_parameter_system_constants); } if (!cbuffer_binding_float_vertex_.up_to_date) { + // Even if the shader doesn't need any float constants, a valid binding must + // still be provided, so if the first draw in the frame with the current + // root signature doesn't have float constants at all, still allocate an + // empty buffer. uint8_t* float_constants = constant_buffer_pool_->Request( - frame_current_, float_constant_size_vertex, nullptr, nullptr, + frame_current_, + uint32_t(std::max(float_constant_count_vertex, uint32_t(1)) * 4 * + sizeof(float)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_vertex_.address); if (float_constants == nullptr) { return false; @@ -3603,7 +3603,10 @@ bool D3D12CommandProcessor::UpdateBindings( } if (!cbuffer_binding_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( - frame_current_, float_constant_size_pixel, nullptr, nullptr, + frame_current_, + uint32_t(std::max(float_constant_count_pixel, uint32_t(1)) * 4 * + sizeof(float)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_pixel_.address); if (float_constants == nullptr) { return false; @@ -3632,28 +3635,33 @@ bool D3D12CommandProcessor::UpdateBindings( ~(1u << root_parameter_float_constants_pixel); } if (!cbuffer_binding_bool_loop_.up_to_date) { - uint8_t* bool_loop_constants = - constant_buffer_pool_->Request(frame_current_, 256, nullptr, nullptr, - &cbuffer_binding_bool_loop_.address); + constexpr uint32_t kBoolLoopConstantsSize = (8 + 32) * sizeof(uint32_t); + uint8_t* bool_loop_constants = constant_buffer_pool_->Request( + frame_current_, kBoolLoopConstantsSize, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_bool_loop_.address); if (bool_loop_constants == nullptr) { return false; } std::memcpy(bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - (8 + 32) * sizeof(uint32_t)); + kBoolLoopConstantsSize); cbuffer_binding_bool_loop_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_bool_loop_constants); } if (!cbuffer_binding_fetch_.up_to_date) { + constexpr uint32_t kFetchConstantsSize = 32 * 6 * sizeof(uint32_t); uint8_t* fetch_constants = constant_buffer_pool_->Request( - frame_current_, 768, nullptr, nullptr, &cbuffer_binding_fetch_.address); + frame_current_, kFetchConstantsSize, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_fetch_.address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, - 32 * 6 * sizeof(uint32_t)); + kFetchConstantsSize); cbuffer_binding_fetch_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_fetch_constants); @@ -3885,12 +3893,10 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, - xe::align( - uint32_t(std::max(texture_count_vertex + sampler_count_vertex, - uint32_t(1)) * - sizeof(uint32_t)), - uint32_t(256)), - nullptr, nullptr, + uint32_t(std::max(texture_count_vertex + sampler_count_vertex, + uint32_t(1)) * + sizeof(uint32_t)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_vertex_.address)); if (!descriptor_indices) { return false; @@ -3923,12 +3929,10 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, - xe::align( - uint32_t(std::max(texture_count_pixel + sampler_count_pixel, - uint32_t(1)) * - sizeof(uint32_t)), - uint32_t(256)), - nullptr, nullptr, + uint32_t(std::max(texture_count_pixel + sampler_count_pixel, + uint32_t(1)) * + sizeof(uint32_t)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_pixel_.address)); if (!descriptor_indices) { return false; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index ab2138b47..947d0666f 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -52,11 +52,13 @@ bool PrimitiveConverter::Initialize() { D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = provider.GetHeapFlagCreateNotZeroed(); - // There can be at most 65535 indices in a Xenos draw call, but they can be up - // to 4 bytes large, and conversion can add more indices (almost triple the - // count for triangle strips, for instance). - buffer_pool_ = - std::make_unique(provider, 4 * 1024 * 1024); + // There can be at most 65535 indices in a Xenos draw call (16 bit index + // count), but they can be up to 4 bytes large, and conversion can add more + // indices (almost triple the count for triangle strips or fans, for + // instance). + buffer_pool_ = std::make_unique( + provider, std::max(uint32_t(65535 * 3 * sizeof(uint32_t)), + ui::d3d12::UploadBufferPool::kDefaultPageSize)); // Create the static index buffer for non-indexed drawing. D3D12_RESOURCE_DESC static_ib_desc; @@ -697,8 +699,8 @@ void* PrimitiveConverter::AllocateIndices( } D3D12_GPU_VIRTUAL_ADDRESS gpu_address; uint8_t* mapping = - buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, nullptr, - nullptr, &gpu_address); + buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, 16, + nullptr, nullptr, &gpu_address); if (mapping == nullptr) { XELOGE("Failed to allocate space for {} converted {}-bit vertex indices", count, format == xenos::IndexFormat::kInt32 ? 32 : 16); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index cfc7f6f47..2cb4cfc61 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -1507,7 +1507,7 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { ID3D12Resource* upload_buffer; uint32_t upload_buffer_offset; void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request( - command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, + command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, 1, &upload_buffer, &upload_buffer_offset, nullptr); if (!upload_buffer_mapping) { XELOGE("Failed to get a buffer for restoring a EDRAM snapshot"); diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index c24336664..6c9c735b1 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -154,8 +154,8 @@ bool SharedMemory::Initialize() { system_page_flags_.resize((page_count_ + 63) / 64); upload_buffer_pool_ = std::make_unique( - provider, - xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_)); + provider, xe::align(ui::d3d12::UploadBufferPool::kDefaultPageSize, + uint32_t(1) << page_size_log2_)); memory_invalidation_callback_handle_ = memory_.RegisterPhysicalMemoryInvalidationCallback( @@ -442,8 +442,9 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { uint32_t upload_buffer_offset, upload_buffer_size; uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( command_processor_.GetCurrentSubmission(), - upload_range_length << page_size_log2_, &upload_buffer, - &upload_buffer_offset, &upload_buffer_size, nullptr); + upload_range_length << page_size_log2_, + uint32_t(1) << page_size_log2_, &upload_buffer, &upload_buffer_offset, + &upload_buffer_size, nullptr); if (upload_buffer_mapping == nullptr) { XELOGE("Shared memory: Failed to get an upload buffer"); return false; diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 821a0136f..5556c2d8d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -2396,9 +2396,9 @@ bool TextureCache::LoadTextureData(Texture* texture) { } D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; uint8_t* cbuffer_mapping = cbuffer_pool.Request( - command_processor_.GetCurrentFrame(), - xe::align(uint32_t(sizeof(load_constants)), uint32_t(256)), nullptr, - nullptr, &cbuffer_gpu_address); + command_processor_.GetCurrentFrame(), sizeof(load_constants), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_gpu_address); if (cbuffer_mapping == nullptr) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index 3c0ea8a3a..625b3ce69 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -287,8 +287,7 @@ bool D3D12ImmediateDrawer::Initialize() { device->CreateSampler(&sampler_desc, sampler_handle); // Create pools for draws. - vertex_buffer_pool_ = - std::make_unique(provider, 2 * 1024 * 1024); + vertex_buffer_pool_ = std::make_unique(provider); texture_descriptor_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048); texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; @@ -506,8 +505,8 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { vertex_buffer_view.SizeInBytes = batch.vertex_count * uint32_t(sizeof(ImmediateVertex)); void* vertex_buffer_mapping = vertex_buffer_pool_->Request( - current_fence_value, vertex_buffer_view.SizeInBytes, nullptr, nullptr, - &vertex_buffer_view.BufferLocation); + current_fence_value, vertex_buffer_view.SizeInBytes, sizeof(uint32_t), + nullptr, nullptr, &vertex_buffer_view.BufferLocation); if (vertex_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for {} vertices in the immediate drawer", batch.vertex_count); @@ -524,8 +523,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t); index_buffer_view.Format = DXGI_FORMAT_R16_UINT; void* index_buffer_mapping = vertex_buffer_pool_->Request( - current_fence_value, - xe::align(index_buffer_view.SizeInBytes, UINT(sizeof(uint32_t))), + current_fence_value, index_buffer_view.SizeInBytes, sizeof(uint16_t), nullptr, nullptr, &index_buffer_view.BufferLocation); if (index_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for {} indices in the immediate drawer", diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index 7b892caa9..b646b1c6c 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -13,14 +13,20 @@ #include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" #include "xenia/ui/d3d12/d3d12_util.h" namespace xe { namespace ui { namespace d3d12 { +// Align to D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT not to waste any space if +// it's smaller (the size of the heap backing the buffer will be aligned to +// D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT anyway). UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size) - : provider_(provider), page_size_(page_size) {} + : provider_(provider), + page_size_(xe::align( + page_size, uint32_t(D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT))) {} UploadBufferPool::~UploadBufferPool() { ClearCache(); } @@ -68,9 +74,13 @@ void UploadBufferPool::ClearCache() { } uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, + uint32_t alignment, ID3D12Resource** buffer_out, uint32_t* offset_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { + assert_not_zero(alignment); + assert_true(xe::is_pow2(alignment)); + size = xe::align(size, alignment); assert_true(size <= page_size_); if (size > page_size_) { return nullptr; @@ -79,7 +89,8 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, submission_index >= writable_first_->last_submission_index); assert_true(!submitted_last_ || submission_index >= submitted_last_->last_submission_index); - if (page_size_ - current_page_used_ < size || !writable_first_) { + uint32_t current_page_used_aligned = xe::align(current_page_used_, alignment); + if (current_page_used_aligned + size > page_size_ || !writable_first_) { // Start a new page if can't fit all the bytes or don't have an open page. if (writable_first_) { // Close the page that was current. @@ -128,33 +139,39 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, writable_last_ = writable_first_; } current_page_used_ = 0; + current_page_used_aligned = 0; } writable_first_->last_submission_index = submission_index; if (buffer_out) { *buffer_out = writable_first_->buffer; } if (offset_out) { - *offset_out = current_page_used_; + *offset_out = current_page_used_aligned; } if (gpu_address_out) { - *gpu_address_out = writable_first_->gpu_address + current_page_used_; + *gpu_address_out = writable_first_->gpu_address + current_page_used_aligned; } - uint8_t* mapping = - reinterpret_cast(writable_first_->mapping) + current_page_used_; - current_page_used_ += size; + uint8_t* mapping = reinterpret_cast(writable_first_->mapping) + + current_page_used_aligned; + current_page_used_ = current_page_used_aligned + size; return mapping; } uint8_t* UploadBufferPool::RequestPartial( - uint64_t submission_index, uint32_t size, ID3D12Resource** buffer_out, - uint32_t* offset_out, uint32_t* size_out, + uint64_t submission_index, uint32_t size, uint32_t alignment, + ID3D12Resource** buffer_out, uint32_t* offset_out, uint32_t* size_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { + assert_not_zero(alignment); + assert_true(xe::is_pow2(alignment)); + size = xe::align(size, alignment); size = std::min(size, page_size_); - if (current_page_used_ < page_size_) { - size = std::min(size, page_size_ - current_page_used_); + uint32_t current_page_used_aligned = xe::align(current_page_used_, alignment); + if (current_page_used_aligned + alignment <= page_size_) { + size = std::min( + size, (page_size_ - current_page_used_aligned) & ~(alignment - 1)); } - uint8_t* mapping = - Request(submission_index, size, buffer_out, offset_out, gpu_address_out); + uint8_t* mapping = Request(submission_index, size, alignment, buffer_out, + offset_out, gpu_address_out); if (!mapping) { return nullptr; } diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 21606cc42..e0bb39928 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -23,7 +23,12 @@ namespace d3d12 { class UploadBufferPool { public: - UploadBufferPool(D3D12Provider& provider, uint32_t page_size); + // Taken from the Direct3D 12 MiniEngine sample (LinearAllocator + // kCpuAllocatorPageSize). Large enough for most cases. + static constexpr uint32_t kDefaultPageSize = 2 * 1024 * 1024; + + UploadBufferPool(D3D12Provider& provider, + uint32_t page_size = kDefaultPageSize); ~UploadBufferPool(); void Reclaim(uint64_t completed_submission_index); @@ -31,13 +36,13 @@ class UploadBufferPool { // Request to write data in a single piece, creating a new page if the current // one doesn't have enough free space. - uint8_t* Request(uint64_t submission_index, uint32_t size, + uint8_t* Request(uint64_t submission_index, uint32_t size, uint32_t alignment, ID3D12Resource** buffer_out, uint32_t* offset_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); // Request to write data in multiple parts, filling the buffer entirely. uint8_t* RequestPartial(uint64_t submission_index, uint32_t size, - ID3D12Resource** buffer_out, uint32_t* offset_out, - uint32_t* size_out, + uint32_t alignment, ID3D12Resource** buffer_out, + uint32_t* offset_out, uint32_t* size_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); private: