From a11b070fee56af2871b6ea4b38b28bb50d83d560 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 29 Jun 2022 23:38:06 +0300 Subject: [PATCH 1/4] [GPU] Align texture extents in loading to host buffer texel size accessed by the shader --- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 23 ++++++++++++++++------ src/xenia/gpu/d3d12/d3d12_texture_cache.h | 8 +++++--- src/xenia/gpu/texture_cache.cc | 17 ++++++++++++---- src/xenia/gpu/texture_cache.h | 5 +++-- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index c34343670..47c47e032 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1147,7 +1147,8 @@ bool D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported( } bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( - uint32_t start_unscaled, uint32_t length_unscaled) { + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (length_unscaled == 0) { @@ -1162,8 +1163,12 @@ bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t first_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; - uint64_t last_scaled = uint64_t(start_unscaled + (length_unscaled - 1)) * - draw_resolution_scale_area; + uint64_t length_scaled_alignment_bits = + (UINT64_C(1) << length_scaled_alignment_log2) - 1; + uint64_t last_scaled = (uint64_t(start_unscaled + (length_unscaled - 1)) * + draw_resolution_scale_area + + length_scaled_alignment_bits) & + ~length_scaled_alignment_bits; const ui::d3d12::D3D12Provider& provider = command_processor_.GetD3D12Provider(); @@ -1273,7 +1278,8 @@ bool D3D12TextureCache::EnsureScaledResolveMemoryCommitted( } bool D3D12TextureCache::MakeScaledResolveRangeCurrent( - uint32_t start_unscaled, uint32_t length_unscaled) { + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2) { assert_true(IsDrawResolutionScaled()); if (!length_unscaled || start_unscaled >= SharedMemory::kBufferSize || @@ -1286,8 +1292,12 @@ bool D3D12TextureCache::MakeScaledResolveRangeCurrent( uint32_t draw_resolution_scale_area = draw_resolution_scale_x() * draw_resolution_scale_y(); uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area; + uint64_t length_scaled_alignment_bits = + (UINT64_C(1) << length_scaled_alignment_log2) - 1; uint64_t length_scaled = - uint64_t(length_unscaled) * draw_resolution_scale_area; + (uint64_t(length_unscaled) * draw_resolution_scale_area + + length_scaled_alignment_bits) & + ~length_scaled_alignment_bits; uint64_t last_scaled = start_scaled + (length_scaled - 1); // Get one or two buffers that can hold the whole range. @@ -1855,7 +1865,8 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, if (texture_resolution_scaled && (is_base || !scaled_mips_source_set_up)) { uint32_t guest_size_unscaled = is_base ? d3d12_texture.GetGuestBaseSize() : d3d12_texture.GetGuestMipsSize(); - if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled)) { + if (!MakeScaledResolveRangeCurrent(guest_address, guest_size_unscaled, + load_shader_info.source_bpe_log2)) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); return false; diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index 9b22b1e9b..6a14948fe 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -130,14 +130,16 @@ class D3D12TextureCache final : public TextureCache { uint32_t& scale_x, uint32_t& scale_y, const ui::d3d12::D3D12Provider& provider); // Ensures the tiles backing the range in the buffers are allocated. - bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled, - uint32_t length_unscaled) override; + bool EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) override; // Makes the specified range of up to 1-2 GB currently accessible on the GPU. // One draw call can access only at most one range - the same memory is // accessible through different buffers based on the range needed, so aliasing // barriers are required. bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled, - uint32_t length_unscaled); + uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0); // These functions create a view of the range specified in the last successful // MakeScaledResolveRangeCurrent call because that function must be called // before this. diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index 030ae2be7..18fac01d9 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -656,6 +656,13 @@ bool TextureCache::LoadTextureData(Texture& texture) { TextureKey texture_key = texture.key(); + // Implementation may load multiple blocks at once via accesses of up to 128 + // bits (R32G32B32A32_UINT), so aligning the size to this value to make sure + // if the texture is small (especially if it's linear), the last blocks won't + // be cut off (hosts may return 0, 0, 0, 0 for the whole R32G32B32A32_UINT + // access for the non-16-aligned tail even if 1...15 bytes are actually + // provided for it). + // Request uploading of the texture data to the shared memory. // This is also necessary when resolution scaling is used - the texture cache // relies on shared memory for invalidation of both unscaled and scaled @@ -666,7 +673,8 @@ bool TextureCache::LoadTextureData(Texture& texture) { bool base_resolved = texture.GetBaseResolved(); if (base_outdated) { if (!shared_memory().RequestRange( - texture_key.base_page << 12, texture.GetGuestBaseSize(), + texture_key.base_page << 12, + xe::align(texture.GetGuestBaseSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &base_resolved)) { return false; } @@ -674,7 +682,8 @@ bool TextureCache::LoadTextureData(Texture& texture) { bool mips_resolved = texture.GetMipsResolved(); if (mips_outdated) { if (!shared_memory().RequestRange( - texture_key.mip_page << 12, texture.GetGuestMipsSize(), + texture_key.mip_page << 12, + xe::align(texture.GetGuestMipsSize(), UINT32_C(16)), texture_key.scaled_resolve ? nullptr : &mips_resolved)) { return false; } @@ -685,11 +694,11 @@ bool TextureCache::LoadTextureData(Texture& texture) { // by an actual resolve, but is still included in the texture size, so the // GPU won't be trying to access unmapped memory. if (!EnsureScaledResolveMemoryCommitted(texture_key.base_page << 12, - texture.GetGuestBaseSize())) { + texture.GetGuestBaseSize(), 4)) { return false; } if (!EnsureScaledResolveMemoryCommitted(texture_key.mip_page << 12, - texture.GetGuestMipsSize())) { + texture.GetGuestMipsSize(), 4)) { return false; } } diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h index 510fa2d86..a8392071e 100644 --- a/src/xenia/gpu/texture_cache.h +++ b/src/xenia/gpu/texture_cache.h @@ -82,8 +82,9 @@ class TextureCache { void MarkRangeAsResolved(uint32_t start_unscaled, uint32_t length_unscaled); // Ensures the memory backing the range in the scaled resolve address space is // allocated and returns whether it is. - virtual bool EnsureScaledResolveMemoryCommitted(uint32_t start_unscaled, - uint32_t length_unscaled) { + virtual bool EnsureScaledResolveMemoryCommitted( + uint32_t start_unscaled, uint32_t length_unscaled, + uint32_t length_scaled_alignment_log2 = 0) { return false; } From 7e691d5ef16b72bb425211531dfe592deea2f852 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 30 Jun 2022 22:15:01 +0300 Subject: [PATCH 2/4] [DXBC] Handle NaN in not equal alpha test as passed --- src/xenia/gpu/dxbc_shader_translator_om.cc | 46 ++++++++++++++-------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index eb3e0438d..3b6c1d6bd 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -3012,31 +3012,43 @@ void DxbcShaderTranslator::CompletePixelShader() { // checked, but let's assume this means "always", not "less, equal or // greater". // TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9. - a_.OpINE(alpha_test_op_dest, alpha_test_mask_src, dxbc::Src::LU(0b111)); + a_.OpINE(alpha_test_op_dest, alpha_test_mask_src, + dxbc::Src::LU(uint32_t(xenos::CompareFunction::kAlways))); // Don't do the test if the mode is "always". a_.OpIf(true, alpha_test_op_src); { - // Do the test. Can't use subtraction and sign because of float specials. + // Do the test. dxbc::Src alpha_src( dxbc::Src::R(system_temps_color_[0], dxbc::Src::kWWWW)); dxbc::Src alpha_test_reference_src(LoadSystemConstant( SystemConstants::Index::kAlphaTestReference, offsetof(SystemConstants, alpha_test_reference), dxbc::Src::kXXXX)); - // Less than. - a_.OpLT(alpha_test_op_dest, alpha_src, alpha_test_reference_src); - a_.OpOr(alpha_test_op_dest, alpha_test_op_src, - dxbc::Src::LU(~uint32_t(1 << 0))); - a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); - // Equals to. - a_.OpEq(alpha_test_op_dest, alpha_src, alpha_test_reference_src); - a_.OpOr(alpha_test_op_dest, alpha_test_op_src, - dxbc::Src::LU(~uint32_t(1 << 1))); - a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); - // Greater than. - a_.OpLT(alpha_test_op_dest, alpha_test_reference_src, alpha_src); - a_.OpOr(alpha_test_op_dest, alpha_test_op_src, - dxbc::Src::LU(~uint32_t(1 << 2))); - a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + // Handle "not equal" specially (specifically as "not equal" so it's true + // for NaN, not "less or greater" which is false for NaN). + a_.OpIEq(alpha_test_op_dest, alpha_test_mask_src, + dxbc::Src::LU(uint32_t(xenos::CompareFunction::kNotEqual))); + a_.OpIf(true, alpha_test_op_src); + { a_.OpNE(alpha_test_mask_dest, alpha_src, alpha_test_reference_src); } + a_.OpElse(); + { + // Less than. + a_.OpLT(alpha_test_op_dest, alpha_src, alpha_test_reference_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 0))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + // Equals to. + a_.OpEq(alpha_test_op_dest, alpha_src, alpha_test_reference_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 1))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + // Greater than. + a_.OpLT(alpha_test_op_dest, alpha_test_reference_src, alpha_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 2))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + } + // Close the "not equal" check. + a_.OpEndIf(); // Discard the pixel if it has failed the test. if (edram_rov_used_) { a_.OpRetC(false, alpha_test_mask_src); From 28670d8ec2559c5b596626a0fe642d5f2b316396 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 1 Jul 2022 12:50:45 +0300 Subject: [PATCH 3/4] [UI] Presenter: Rename display size to aspect ratio --- .../gpu/d3d12/d3d12_command_processor.cc | 5 +-- src/xenia/ui/presenter.cc | 34 +++++++++++-------- src/xenia/ui/presenter.h | 22 ++++++------ 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 284f6c110..79c0be085 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1745,12 +1745,9 @@ void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, } D3D12_RESOURCE_DESC swap_texture_desc = swap_texture_resource->GetDesc(); - uint32_t draw_resolution_scale_max = - std::max(texture_cache_->draw_resolution_scale_x(), - texture_cache_->draw_resolution_scale_y()); presenter->RefreshGuestOutput( uint32_t(swap_texture_desc.Width), uint32_t(swap_texture_desc.Height), - 1280 * draw_resolution_scale_max, 720 * draw_resolution_scale_max, + 1280, 720, [this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource, &swap_texture_desc]( ui::Presenter::GuestOutputRefreshContext& context) -> bool { diff --git a/src/xenia/ui/presenter.cc b/src/xenia/ui/presenter.cc index 661a37a8a..766a632cf 100644 --- a/src/xenia/ui/presenter.cc +++ b/src/xenia/ui/presenter.cc @@ -354,14 +354,14 @@ void Presenter::PaintFromUIThread(bool force_paint) { bool Presenter::RefreshGuestOutput( uint32_t frontbuffer_width, uint32_t frontbuffer_height, - uint32_t screen_width, uint32_t screen_height, + uint32_t display_aspect_ratio_x, uint32_t display_aspect_ratio_y, std::function refresher) { GuestOutputProperties& writable_properties = guest_output_properties_[guest_output_mailbox_writable_]; writable_properties.frontbuffer_width = frontbuffer_width; writable_properties.frontbuffer_height = frontbuffer_height; - writable_properties.screen_width = screen_width; - writable_properties.screen_height = screen_height; + writable_properties.display_aspect_ratio_x = display_aspect_ratio_x; + writable_properties.display_aspect_ratio_y = display_aspect_ratio_y; writable_properties.is_8bpc = false; bool is_active = writable_properties.IsActive(); if (is_active) { @@ -706,8 +706,10 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( // All host location calculations are DPI-independent, conceptually depending // only on the aspect ratios, not the absolute values. uint32_t output_width, output_height; - if (uint64_t(surface_width_in_paint_connection_) * properties.screen_height > - uint64_t(properties.screen_width) * surface_height_in_paint_connection_) { + if (uint64_t(surface_width_in_paint_connection_) * + properties.display_aspect_ratio_y > + uint64_t(surface_height_in_paint_connection_) * + properties.display_aspect_ratio_x) { // The window is wider that the source - crop along Y to preserve the aspect // ratio while stretching throughout the entire surface's width, then limit // the Y cropping via letterboxing or stretching along X. @@ -719,9 +721,9 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( } // Scale the desired width by the H:W aspect ratio (inverse of W:H) to get // the height. - output_height = - rescale_unsigned(surface_width_in_paint_connection_, - properties.screen_height, properties.screen_width); + output_height = rescale_unsigned(surface_width_in_paint_connection_, + properties.display_aspect_ratio_y, + properties.display_aspect_ratio_x); bool letterbox = false; if (output_height * present_safe_area > surface_height_in_paint_connection_ * 100) { @@ -732,8 +734,9 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( } if (letterbox && cvars::present_letterbox) { output_width = rescale_unsigned( - properties.screen_width, surface_height_in_paint_connection_ * 100, - properties.screen_height * present_safe_area); + surface_height_in_paint_connection_ * 100, + properties.display_aspect_ratio_x, + properties.display_aspect_ratio_y * present_safe_area); // output_width might have been rounded up already by rescale_unsigned, so // rounding down in this division. flow.output_x = (int32_t(surface_width_in_paint_connection_) - @@ -759,9 +762,9 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( present_safe_area = 100; } // Scale the desired height by the W:H aspect ratio to get the width. - output_width = - rescale_unsigned(surface_height_in_paint_connection_, - properties.screen_width, properties.screen_height); + output_width = rescale_unsigned(surface_height_in_paint_connection_, + properties.display_aspect_ratio_x, + properties.display_aspect_ratio_y); bool letterbox = false; if (output_width * present_safe_area > surface_width_in_paint_connection_ * 100) { @@ -772,8 +775,9 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( } if (letterbox && cvars::present_letterbox) { output_height = rescale_unsigned( - properties.screen_height, surface_width_in_paint_connection_ * 100, - properties.screen_width * present_safe_area); + surface_width_in_paint_connection_ * 100, + properties.display_aspect_ratio_y, + properties.display_aspect_ratio_x * present_safe_area); // output_height might have been rounded up already by rescale_unsigned, // so rounding down in this division. flow.output_y = (int32_t(surface_height_in_paint_connection_) - diff --git a/src/xenia/ui/presenter.h b/src/xenia/ui/presenter.h index 5ead8cb90..bb2fc7740 100644 --- a/src/xenia/ui/presenter.h +++ b/src/xenia/ui/presenter.h @@ -299,7 +299,9 @@ class Presenter { void PaintFromUIThread(bool force_paint = false); // Pass 0 as width or height to disable guest output until the next refresh - // with an actual size. The callback will receive a backend-specific context, + // with an actual size. The display aspect ratio may be specified like 16:9 or + // like 1280:720, both are accepted, for simplicity, the guest display size + // may just be passed. The callback will receive a backend-specific context, // and will not be called in case of an error such as the wrong size, or if // guest output is disabled. Returns whether the callback was called and it // returned true. The callback must submit all updating work to the host GPU @@ -307,7 +309,7 @@ class Presenter { // primitives required by the GuestOutputRefreshContext implementation. bool RefreshGuestOutput( uint32_t frontbuffer_width, uint32_t frontbuffer_height, - uint32_t screen_width, uint32_t screen_height, + uint32_t display_aspect_ratio_x, uint32_t display_aspect_ratio_y, std::function refresher); // The implementation must be callable from any thread, including from // multiple at the same time, and it should acquire the latest guest output @@ -354,24 +356,24 @@ class Presenter { // this frame. uint32_t frontbuffer_width; uint32_t frontbuffer_height; - // Guest screen size (primarily for the target aspect ratio, which may be - // different than that of the frontbuffer). - uint32_t screen_width; - uint32_t screen_height; + // Guest display aspect ratio numerator and denominator (both 16:9 and + // 1280:720 kinds of values are accepted). + uint32_t display_aspect_ratio_x; + uint32_t display_aspect_ratio_y; bool is_8bpc; GuestOutputProperties() { SetToInactive(); } bool IsActive() const { - return frontbuffer_width && frontbuffer_height && screen_width && - screen_height; + return frontbuffer_width && frontbuffer_height && + display_aspect_ratio_x && display_aspect_ratio_y; } void SetToInactive() { frontbuffer_width = 0; frontbuffer_height = 0; - screen_width = 0; - screen_height = 0; + display_aspect_ratio_x = 0; + display_aspect_ratio_y = 0; is_8bpc = false; } }; From e37e3ef3821ab3a1458f9dec27aaa6ac6b6eeecf Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 1 Jul 2022 19:50:19 +0300 Subject: [PATCH 4/4] [GPU] Display swap output in the trace viewer Resolve output is unreliable because resolving may be done to a subregion of a texture and even to 3D textures, and to any color format --- src/xenia/app/emulator_window.cc | 1 + src/xenia/gpu/command_processor.cc | 4 +--- src/xenia/gpu/command_processor.h | 3 --- src/xenia/gpu/d3d12/d3d12_command_processor.cc | 4 ---- src/xenia/gpu/trace_player.cc | 17 ++--------------- src/xenia/gpu/trace_player.h | 9 +-------- src/xenia/gpu/trace_viewer.cc | 15 +++++++-------- src/xenia/gpu/trace_viewer.h | 2 -- .../gpu/vulkan/vulkan_command_processor.cc | 8 -------- src/xenia/gpu/vulkan/vulkan_command_processor.h | 3 --- src/xenia/ui/presenter.cc | 6 ++++-- src/xenia/ui/presenter.h | 11 +++++++++-- 12 files changed, 25 insertions(+), 58 deletions(-) diff --git a/src/xenia/app/emulator_window.cc b/src/xenia/app/emulator_window.cc index 7e6d9e6b7..cab817a10 100644 --- a/src/xenia/app/emulator_window.cc +++ b/src/xenia/app/emulator_window.cc @@ -683,6 +683,7 @@ EmulatorWindow::GetGuestOutputPaintEffectForCvarValue( ui::Presenter::GuestOutputPaintConfig EmulatorWindow::GetGuestOutputPaintConfigForCvars() { ui::Presenter::GuestOutputPaintConfig paint_config; + paint_config.SetAllowOverscanCutoff(true); paint_config.SetEffect(GetGuestOutputPaintEffectForCvarValue( cvars::postprocess_scaling_and_sharpening)); paint_config.SetCasAdditionalSharpness( diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index bb452c13d..8e9bc6067 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -916,9 +916,7 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingBuffer* reader, uint32_t frontbuffer_height = reader->ReadAndSwap(); reader->AdvanceRead((count - 4) * sizeof(uint32_t)); - if (!ignore_swap_) { - IssueSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height); - } + IssueSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height); ++counter_; return true; diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 367ed9ee2..ffc8eeffa 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -90,7 +90,6 @@ class CommandProcessor { virtual void ClearCaches(); - void SetIgnoreSwap(bool ignore_swap) { ignore_swap_ = ignore_swap; } // "Desired" is for the external thread managing the post-processing effect. SwapPostEffect GetDesiredSwapPostEffect() const { return swap_post_effect_desired_; @@ -265,8 +264,6 @@ class CommandProcessor { std::atomic worker_running_; kernel::object_ref worker_thread_; - bool ignore_swap_ = false; - std::queue> pending_fns_; // MicroEngine binary from PM4_ME_INIT diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 79c0be085..955c2667b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1718,10 +1718,6 @@ void D3D12CommandProcessor::OnGammaRampPWLValueWritten() { void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { - // FIXME(Triang3l): frontbuffer_ptr is currently unreliable, in the trace - // player it's set to 0, but it's not needed anyway since the fetch constant - // contains the address. - SCOPE_profile_cpu_f("gpu"); ui::Presenter* presenter = graphics_system_->presenter(); diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc index b1aa8f615..c127d4c56 100644 --- a/src/xenia/gpu/trace_player.cc +++ b/src/xenia/gpu/trace_player.cc @@ -89,19 +89,15 @@ void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size, TracePlaybackMode playback_mode, bool clear_caches) { playing_trace_ = true; - // Pass a copy of present_last_copy_ to the thread so it's not accessible by - // multiple threads at once. - bool present_last_copy = present_last_copy_; graphics_system_->command_processor()->CallInThread([=]() { - PlayTraceOnThread(trace_data, trace_size, playback_mode, clear_caches, - present_last_copy); + PlayTraceOnThread(trace_data, trace_size, playback_mode, clear_caches); }); } void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, TracePlaybackMode playback_mode, - bool clear_caches, bool present_last_copy) { + bool clear_caches) { auto memory = graphics_system_->memory(); auto command_processor = graphics_system_->command_processor(); @@ -109,10 +105,6 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, command_processor->ClearCaches(); } - if (present_last_copy) { - command_processor->SetIgnoreSwap(true); - } - playback_percent_ = 0; auto trace_end = trace_data + trace_size; @@ -252,11 +244,6 @@ void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, playing_trace_ = false; - if (present_last_copy) { - command_processor->SetIgnoreSwap(false); - command_processor->IssueSwap(0, 1280, 720); - } - playback_event_->Set(); } diff --git a/src/xenia/gpu/trace_player.h b/src/xenia/gpu/trace_player.h index 4bb5fdd2c..9117e88a4 100644 --- a/src/xenia/gpu/trace_player.h +++ b/src/xenia/gpu/trace_player.h @@ -32,9 +32,6 @@ class TracePlayer : public TraceReader { TracePlayer(GraphicsSystem* graphics_system); GraphicsSystem* graphics_system() const { return graphics_system_; } - void SetPresentLastCopy(bool present_last_copy) { - present_last_copy_ = present_last_copy; - } int current_frame_index() const { return current_frame_index_; } int current_command_index() const { return current_command_index_; } bool is_playing_trace() const { return playing_trace_; } @@ -53,13 +50,9 @@ class TracePlayer : public TraceReader { void PlayTrace(const uint8_t* trace_data, size_t trace_size, TracePlaybackMode playback_mode, bool clear_caches); void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode, bool clear_caches, - bool present_last_copy); + TracePlaybackMode playback_mode, bool clear_caches); GraphicsSystem* graphics_system_; - // Whether to present the results of the latest resolve instead of displaying - // the front buffer from the trace. - bool present_last_copy_ = false; int current_frame_index_; int current_command_index_; bool playing_trace_ = false; diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index a4733ecbf..70f68c184 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -114,7 +114,7 @@ bool TraceViewer::Setup() { // Main display window. assert_true(app_context().IsInUIThread()); window_ = xe::ui::Window::Create(app_context(), "xenia-gpu-trace-viewer", - 1920, 1200); + 1920, 1080); window_->AddListener(&window_listener_); window_->AddInputListener(&window_listener_, kZOrderTraceViewerInput); if (!window_->Open()) { @@ -135,28 +135,27 @@ bool TraceViewer::Setup() { graphics_system_ = emulator_->graphics_system(); player_ = std::make_unique(graphics_system_); - player_->SetPresentLastCopy(true); // Setup drawing to the window. - xe::ui::GraphicsProvider& graphics_provider = *graphics_system_->provider(); - presenter_ = graphics_provider.CreatePresenter(); - if (!presenter_) { + ui::Presenter* presenter = graphics_system_->presenter(); + if (!presenter) { XELOGE("Failed to initialize the presenter"); return false; } + xe::ui::GraphicsProvider& graphics_provider = *graphics_system_->provider(); immediate_drawer_ = graphics_provider.CreateImmediateDrawer(); if (!immediate_drawer_) { XELOGE("Failed to initialize the immediate drawer"); return false; } - immediate_drawer_->SetPresenter(presenter_.get()); + immediate_drawer_->SetPresenter(presenter); imgui_drawer_ = std::make_unique(window_.get(), kZOrderImGui); - imgui_drawer_->SetPresenterAndImmediateDrawer(presenter_.get(), + imgui_drawer_->SetPresenterAndImmediateDrawer(presenter, immediate_drawer_.get()); trace_viewer_dialog_ = std::unique_ptr( new TraceViewerDialog(imgui_drawer_.get(), *this)); - window_->SetPresenter(presenter_.get()); + window_->SetPresenter(presenter); return true; } diff --git a/src/xenia/gpu/trace_viewer.h b/src/xenia/gpu/trace_viewer.h index ecca12792..8ef0e9b86 100644 --- a/src/xenia/gpu/trace_viewer.h +++ b/src/xenia/gpu/trace_viewer.h @@ -21,7 +21,6 @@ #include "xenia/ui/imgui_dialog.h" #include "xenia/ui/imgui_drawer.h" #include "xenia/ui/immediate_drawer.h" -#include "xenia/ui/presenter.h" #include "xenia/ui/window.h" #include "xenia/ui/window_listener.h" #include "xenia/ui/windowed_app.h" @@ -129,7 +128,6 @@ class TraceViewer : public xe::ui::WindowedApp { GraphicsSystem* graphics_system_ = nullptr; std::unique_ptr player_; - std::unique_ptr presenter_; std::unique_ptr immediate_drawer_; std::unique_ptr imgui_drawer_; std::unique_ptr trace_viewer_dialog_; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 43a7a2ab7..2dcf09f0e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -269,11 +269,6 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, return; } - if (!frontbuffer_ptr) { - // Trace viewer does this. - frontbuffer_ptr = last_copy_base_; - } - std::vector submit_buffers; if (frame_open_) { // TODO(DrChat): If the setup buffer is empty, don't bother queueing it up. @@ -1108,9 +1103,6 @@ bool VulkanCommandProcessor::IssueCopy() { texture->in_flight_fence = current_batch_fence_; - // For debugging purposes only (trace viewer) - last_copy_base_ = texture->texture_info.memory.base_address; - if (!frame_open_) { BeginFrame(); } else if (current_render_state_) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 4359a3c90..062ef0f61 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -106,9 +106,6 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t coher_base_vc_ = 0; uint32_t coher_size_vc_ = 0; - // Last copy base address, for debugging only. - uint32_t last_copy_base_ = 0; - bool capturing_ = false; bool trace_requested_ = false; bool cache_clear_requested_ = false; diff --git a/src/xenia/ui/presenter.cc b/src/xenia/ui/presenter.cc index 766a632cf..ab28ca4b4 100644 --- a/src/xenia/ui/presenter.cc +++ b/src/xenia/ui/presenter.cc @@ -714,7 +714,8 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( // ratio while stretching throughout the entire surface's width, then limit // the Y cropping via letterboxing or stretching along X. uint32_t present_safe_area; - if (cvars::present_safe_area_y > 0 && cvars::present_safe_area_y < 100) { + if (config.GetAllowOverscanCutoff() && cvars::present_safe_area_y > 0 && + cvars::present_safe_area_y < 100) { present_safe_area = uint32_t(cvars::present_safe_area_y); } else { present_safe_area = 100; @@ -756,7 +757,8 @@ Presenter::GuestOutputPaintFlow Presenter::GetGuestOutputPaintFlow( // aspect ratio while stretching throughout the entire surface's height, // then limit the X cropping via letterboxing or stretching along Y. uint32_t present_safe_area; - if (cvars::present_safe_area_x > 0 && cvars::present_safe_area_x < 100) { + if (config.GetAllowOverscanCutoff() && cvars::present_safe_area_x > 0 && + cvars::present_safe_area_x < 100) { present_safe_area = uint32_t(cvars::present_safe_area_x); } else { present_safe_area = 100; diff --git a/src/xenia/ui/presenter.h b/src/xenia/ui/presenter.h index bb2fc7740..9862546ab 100644 --- a/src/xenia/ui/presenter.h +++ b/src/xenia/ui/presenter.h @@ -228,6 +228,11 @@ class Presenter { // In the sharpness setters, min / max with a constant as the first argument // also drops NaNs. + bool GetAllowOverscanCutoff() const { return allow_overscan_cutoff_; } + void SetAllowOverscanCutoff(bool new_allow_overscan_cutoff) { + allow_overscan_cutoff_ = new_allow_overscan_cutoff; + } + Effect GetEffect() const { return effect_; } void SetEffect(Effect new_effect) { effect_ = new_effect; } @@ -265,8 +270,10 @@ class Presenter { void SetDither(bool new_dither) { dither_ = new_dither; } private: - // Tools, rather than the emulator itself, must use kBilinear as the image - // must be as close to the original front buffer as possible. + // Tools, rather than the emulator itself, must not allow overscan cutoff + // and must use the kBilinear effect as the image must be as close to the + // original front buffer as possible. + bool allow_overscan_cutoff_ = false; Effect effect_ = Effect::kBilinear; float cas_additional_sharpness_ = kCasAdditionalSharpnessDefault; uint32_t fsr_max_upsampling_passes_ = kFsrMaxUpscalingPassesMax;