diff --git a/premake5.lua b/premake5.lua index a7809dc82..8e80c090d 100644 --- a/premake5.lua +++ b/premake5.lua @@ -223,7 +223,9 @@ solution("xenia") platforms({"Linux"}) elseif os.is("windows") then platforms({"Windows"}) - systemversion("10.0.10240.0") + -- Minimum version to support ID3D12GraphicsCommandList1 (for + -- SetSamplePositions). + systemversion("10.0.15063.0") end configurations({"Checked", "Debug", "Release"}) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 31f46a1fa..f6e3e0a96 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include + #include #include @@ -20,6 +22,10 @@ #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/xenos.h" +// Disabled because the current positions look worse than sampling at centers. +DEFINE_bool(d3d12_programmable_sample_positions, false, + "Enable custom SSAA sample positions where available"); + namespace xe { namespace gpu { namespace d3d12 { @@ -43,6 +49,15 @@ ID3D12GraphicsCommandList* D3D12CommandProcessor::GetCurrentCommandList() return command_lists_[current_queue_frame_]->GetCommandList(); } +ID3D12GraphicsCommandList1* D3D12CommandProcessor::GetCurrentCommandList1() + const { + assert_true(current_queue_frame_ != UINT_MAX); + if (current_queue_frame_ == UINT_MAX) { + return nullptr; + } + return command_lists_[current_queue_frame_]->GetCommandList1(); +} + void D3D12CommandProcessor::PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, D3D12_RESOURCE_STATES new_state, UINT subresource) { @@ -469,6 +484,61 @@ void D3D12CommandProcessor::ReleaseScratchGPUBuffer( } } +void D3D12CommandProcessor::SetSamplePositions(MsaaSamples sample_positions) { + if (current_sample_positions_ == sample_positions) { + return; + } + if (FLAGS_d3d12_programmable_sample_positions) { + auto provider = GetD3D12Context()->GetD3D12Provider(); + auto tier = provider->GetProgrammableSamplePositionsTier(); + auto command_list = GetCurrentCommandList1(); + if (tier >= 2 && command_list != nullptr) { + // Depth buffer transitions are affected by sample positions. + SubmitBarriers(); + // Standard sample positions in Direct3D 10.1, but adjusted to take the + // fact that SSAA samples are already shifted by 1/4 of a pixel. + // TODO(Triang3l): Find what sample positions are used by Xenos, though + // they are not necessarily better. The purpose is just to make 2x SSAA + // work a little bit better for tall stairs. + // FIXME(Triang3l): This is currently even uglier than without custom + // sample positions. + if (sample_positions >= MsaaSamples::k2X) { + // Sample 1 is lower-left on Xenos, but upper-right in Direct3D 12. + D3D12_SAMPLE_POSITION d3d_sample_positions[4]; + if (sample_positions >= MsaaSamples::k4X) { + // Upper-left. + d3d_sample_positions[0].X = -2 + 4; + d3d_sample_positions[0].Y = -6 + 4; + // Upper-right. + d3d_sample_positions[1].X = 6 - 4; + d3d_sample_positions[1].Y = -2 + 4; + // Lower-left. + d3d_sample_positions[2].X = -6 + 4; + d3d_sample_positions[2].Y = 2 - 4; + // Lower-right. + d3d_sample_positions[3].X = 2 - 4; + d3d_sample_positions[3].Y = 6 - 4; + } else { + // Upper. + d3d_sample_positions[0].X = -4; + d3d_sample_positions[0].Y = -4 + 4; + d3d_sample_positions[1].X = -4; + d3d_sample_positions[1].Y = -4 + 4; + // Lower. + d3d_sample_positions[2].X = 4; + d3d_sample_positions[2].Y = 4 - 4; + d3d_sample_positions[3].X = 4; + d3d_sample_positions[3].Y = 4 - 4; + } + command_list->SetSamplePositions(1, 4, d3d_sample_positions); + } else { + command_list->SetSamplePositions(0, 0, nullptr); + } + } + } + current_sample_positions_ = sample_positions; +} + void D3D12CommandProcessor::SetComputePipeline(ID3D12PipelineState* pipeline) { if (current_pipeline_ != pipeline) { GetCurrentCommandList()->SetPipelineState(pipeline); @@ -1028,6 +1098,10 @@ bool D3D12CommandProcessor::BeginFrame() { ff_blend_factor_update_needed_ = true; ff_stencil_ref_update_needed_ = true; + // Since a new command list is being started, sample positions are reset to + // centers. + current_sample_positions_ = MsaaSamples::k1X; + // Reset bindings, particularly because the buffers backing them are recycled. current_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index f04c3166a..0784c48c5 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -46,6 +46,7 @@ class D3D12CommandProcessor : public CommandProcessor { // Returns the drawing command list for the currently open frame. ID3D12GraphicsCommandList* GetCurrentCommandList() const; + ID3D12GraphicsCommandList1* GetCurrentCommandList1() const; void PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, @@ -86,6 +87,10 @@ class D3D12CommandProcessor : public CommandProcessor { void ReleaseScratchGPUBuffer(ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state); + // Sets the current SSAA sample positions, needs to be done before setting + // render targets or copying to depth render targets. + void SetSamplePositions(MsaaSamples sample_positions); + // Sets the current pipeline state to a compute pipeline. This is for cache // invalidation primarily. A frame must be open. void SetComputePipeline(ID3D12PipelineState* pipeline); @@ -233,6 +238,9 @@ class D3D12CommandProcessor : public CommandProcessor { bool ff_blend_factor_update_needed_; bool ff_stencil_ref_update_needed_; + // Current SSAA sample positions (to be updated by the render target cache). + MsaaSamples current_sample_positions_; + // Currently bound graphics or compute pipeline. ID3D12PipelineState* current_pipeline_; // Currently bound graphics root signature. diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 32e103fb1..2f8683b2e 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -730,6 +730,10 @@ bool RenderTargetCache::UpdateRenderTargets() { binding.render_target->resource); } + // Sample positions when loading depth must match sample positions when + // drawing. + command_processor_->SetSamplePositions(msaa_samples); + // Load the contents of the new render targets from the EDRAM buffer (will // change the state of the render targets to copy destination). RenderTarget* load_render_targets[5]; @@ -1394,8 +1398,9 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, descriptor_cpu_start); command_list->SetGraphicsRootDescriptorTable(1, descriptor_gpu_start); - command_processor_->SetExternalGraphicsPipeline(resolve_pipeline); command_processor_->SubmitBarriers(); + command_processor_->SetSamplePositions(MsaaSamples::k1X); + command_processor_->SetExternalGraphicsPipeline(resolve_pipeline); command_list->OMSetRenderTargets(1, &resolve_target->rtv_handle, TRUE, nullptr); D3D12_VIEWPORT viewport; diff --git a/src/xenia/ui/d3d12/command_list.cc b/src/xenia/ui/d3d12/command_list.cc index d438fb777..315416b80 100644 --- a/src/xenia/ui/d3d12/command_list.cc +++ b/src/xenia/ui/d3d12/command_list.cc @@ -31,6 +31,9 @@ CommandList::CommandList(ID3D12Device* device, ID3D12CommandQueue* queue, : device_(device), queue_(queue), type_(type) {} CommandList::~CommandList() { + if (command_list_1_ != nullptr) { + command_list_1_->Release(); + } if (command_list_ != nullptr) { command_list_->Release(); } @@ -52,6 +55,8 @@ bool CommandList::Initialize() { command_allocator_ = nullptr; return false; } + // Optional - added in Creators Update (SDK 10.0.15063.0). + command_list_->QueryInterface(IID_PPV_ARGS(&command_list_1_)); // A command list is initially open, need to close it before resetting. command_list_->Close(); return true; diff --git a/src/xenia/ui/d3d12/command_list.h b/src/xenia/ui/d3d12/command_list.h index 10ac0d24b..6fc209794 100644 --- a/src/xenia/ui/d3d12/command_list.h +++ b/src/xenia/ui/d3d12/command_list.h @@ -27,6 +27,9 @@ class CommandList { D3D12_COMMAND_LIST_TYPE type); ID3D12GraphicsCommandList* GetCommandList() const { return command_list_; } + ID3D12GraphicsCommandList1* GetCommandList1() const { + return command_list_1_; + } ID3D12GraphicsCommandList* BeginRecording(); void AbortRecording(); @@ -43,6 +46,7 @@ class CommandList { ID3D12CommandAllocator* command_allocator_ = nullptr; ID3D12GraphicsCommandList* command_list_ = nullptr; + ID3D12GraphicsCommandList1* command_list_1_ = nullptr; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/d3d12_provider.cc b/src/xenia/ui/d3d12/d3d12_provider.cc index fbed99826..a840118f5 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.cc +++ b/src/xenia/ui/d3d12/d3d12_provider.cc @@ -146,6 +146,18 @@ bool D3D12Provider::Initialize() { descriptor_size_dsv_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + // Check if programmable sample positions are supported (added in Creators + // Update). + programmable_sample_positions_tier_ = 0; + D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2; + if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, + &options2, sizeof(options2)))) { + programmable_sample_positions_tier_ = + uint32_t(options2.ProgrammableSamplePositionsTier); + } + XELOGD3D("Direct3D 12 device supports programmable sample positions tier %u", + programmable_sample_positions_tier_); + return true; } diff --git a/src/xenia/ui/d3d12/d3d12_provider.h b/src/xenia/ui/d3d12/d3d12_provider.h index c98bb2d2a..37d683e1e 100644 --- a/src/xenia/ui/d3d12/d3d12_provider.h +++ b/src/xenia/ui/d3d12/d3d12_provider.h @@ -38,6 +38,10 @@ class D3D12Provider : public GraphicsProvider { uint32_t GetDescriptorSizeRTV() const { return descriptor_size_rtv_; } uint32_t GetDescriptorSizeDSV() const { return descriptor_size_dsv_; } + uint32_t GetProgrammableSamplePositionsTier() const { + return programmable_sample_positions_tier_; + } + private: explicit D3D12Provider(Window* main_window); @@ -52,6 +56,8 @@ class D3D12Provider : public GraphicsProvider { uint32_t descriptor_size_sampler_; uint32_t descriptor_size_rtv_; uint32_t descriptor_size_dsv_; + + uint32_t programmable_sample_positions_tier_; }; } // namespace d3d12