From b77ffe3df6b78f680bfcb31a4706b40e290712cd Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 3 Oct 2018 14:36:17 +0300 Subject: [PATCH] [D3D12] Output gamma ramp --- src/xenia/gpu/command_processor.cc | 28 ++- src/xenia/gpu/command_processor.h | 5 +- .../gpu/d3d12/d3d12_command_processor.cc | 162 +++++++++++++++++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 11 ++ src/xenia/gpu/d3d12/d3d12_graphics_system.cc | 86 ++++++++-- src/xenia/gpu/d3d12/d3d12_graphics_system.h | 11 +- .../d3d12/shaders/dxbc/stretch_gamma_ps.cso | Bin 0 -> 1296 bytes .../gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h | 112 ++++++++++++ .../d3d12/shaders/dxbc/stretch_gamma_ps.txt | 57 ++++++ .../gpu/d3d12/shaders/dxbc/stretch_ps.cso | Bin 676 -> 688 bytes src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h | 25 +-- .../gpu/d3d12/shaders/dxbc/stretch_ps.txt | 2 +- src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl | 5 +- .../gpu/d3d12/shaders/stretch_gamma.ps.hlsl | 19 ++ src/xenia/gpu/d3d12/texture_cache.cc | 4 +- src/xenia/gpu/d3d12/texture_cache.h | 3 +- src/xenia/gpu/dxbc_shader_translator.cc | 4 - 17 files changed, 483 insertions(+), 51 deletions(-) create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h create mode 100644 src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt create mode 100644 src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index bdb66deb5..944170dfd 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -49,6 +49,24 @@ bool CommandProcessor::Initialize( std::unique_ptr context) { context_ = std::move(context); + // Initialize the gamma ramps to their default (linear) values - taken from + // what games set when starting. + for (uint32_t i = 0; i < 256; ++i) { + uint32_t value = i * 1023 / 255; + gamma_ramp_.normal[i].value = value | (value << 10) | (value << 20); + } + for (uint32_t i = 0; i < 128; ++i) { + uint32_t value = (i * 65535 / 127) & ~63; + if (i < 127) { + value |= 0x200 << 16; + } + for (uint32_t j = 0; j < 3; ++j) { + gamma_ramp_.pwl[i].values[j].value = value; + } + } + dirty_gamma_ramp_normal_ = true; + dirty_gamma_ramp_pwl_ = true; + worker_running_ = true; worker_thread_ = kernel::object_ref( new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { @@ -301,25 +319,23 @@ void CommandProcessor::UpdateGammaRampValue(GammaRampType type, assert_true(mask_lo == 0 || mask_lo == 7); assert_true(mask_hi == 0); - auto subindex = gamma_ramp_rw_subindex_; - if (mask_lo) { switch (type) { case GammaRampType::kNormal: assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 0); gamma_ramp_.normal[index].value = value; + dirty_gamma_ramp_normal_ = true; break; case GammaRampType::kPWL: assert_true(regs->values[XE_GPU_REG_DC_LUT_RW_MODE].u32 == 1); - gamma_ramp_.pwl[index].values[subindex].value = value; + gamma_ramp_.pwl[index].values[gamma_ramp_rw_subindex_].value = value; + gamma_ramp_rw_subindex_ = (gamma_ramp_rw_subindex_ + 1) % 3; + dirty_gamma_ramp_pwl_ = true; break; default: assert_unhandled_case(type); } } - - gamma_ramp_rw_subindex_ = (subindex + 1) % 3; - dirty_gamma_ramp_ = true; } void CommandProcessor::MakeCoherent() { diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 1236e70a8..759c52e05 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -99,7 +99,7 @@ struct GammaRamp { }; NormalEntry normal[256]; - PWLEntry pwl[256]; + PWLEntry pwl[128]; }; class CommandProcessor { @@ -286,7 +286,8 @@ class CommandProcessor { GammaRamp gamma_ramp_ = {}; int gamma_ramp_rw_subindex_ = 0; - bool dirty_gamma_ramp_ = true; + bool dirty_gamma_ramp_normal_ = true; + bool dirty_gamma_ramp_pwl_ = true; }; } // namespace gpu diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 4749c3a19..c8a8edf30 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -639,6 +639,55 @@ bool D3D12CommandProcessor::SetupContext() { return false; } + // Create gamma ramp resources. The PWL gamma ramp is 16-bit, but 6 bits are + // hardwired to zero, so DXGI_FORMAT_R10G10B10A2_UNORM can be used for it too. + // https://www.x.org/docs/AMD/old/42590_m76_rrg_1.01o.pdf + dirty_gamma_ramp_normal_ = true; + dirty_gamma_ramp_pwl_ = true; + D3D12_RESOURCE_DESC gamma_ramp_desc; + gamma_ramp_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + gamma_ramp_desc.Alignment = 0; + gamma_ramp_desc.Width = 256; + gamma_ramp_desc.Height = 1; + gamma_ramp_desc.DepthOrArraySize = 1; + // Normal gamma is 256x1, PWL gamma is 128x1. + gamma_ramp_desc.MipLevels = 2; + gamma_ramp_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; + gamma_ramp_desc.SampleDesc.Count = 1; + gamma_ramp_desc.SampleDesc.Quality = 0; + gamma_ramp_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + gamma_ramp_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + // The first action will be uploading. + gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesDefault, D3D12_HEAP_FLAG_NONE, + &gamma_ramp_desc, gamma_ramp_texture_state_, nullptr, + IID_PPV_ARGS(&gamma_ramp_texture_)))) { + XELOGE("Failed to create the gamma ramp texture"); + return false; + } + // Get the layout for the upload buffer. + gamma_ramp_desc.DepthOrArraySize = ui::d3d12::D3D12Context::kQueuedFrames; + UINT64 gamma_ramp_upload_size; + device->GetCopyableFootprints( + &gamma_ramp_desc, 0, ui::d3d12::D3D12Context::kQueuedFrames * 2, 0, + gamma_ramp_footprints_, nullptr, nullptr, &gamma_ramp_upload_size); + // Create the upload buffer for the gamma ramp. + ui::d3d12::util::FillBufferResourceDesc( + gamma_ramp_desc, gamma_ramp_upload_size, D3D12_RESOURCE_FLAG_NONE); + if (FAILED(device->CreateCommittedResource( + &ui::d3d12::util::kHeapPropertiesUpload, D3D12_HEAP_FLAG_NONE, + &gamma_ramp_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&gamma_ramp_upload_)))) { + XELOGE("Failed to create the gamma ramp upload buffer"); + return false; + } + if (FAILED(gamma_ramp_upload_->Map( + 0, nullptr, reinterpret_cast(&gamma_ramp_upload_mapping_)))) { + XELOGE("Failed to map the gamma ramp upload buffer"); + return false; + } + D3D12_RESOURCE_DESC swap_texture_desc; swap_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; swap_texture_desc.Alignment = 0; @@ -729,6 +778,17 @@ void D3D12CommandProcessor::ShutdownContext() { ui::d3d12::util::ReleaseAndNull(swap_texture_rtv_descriptor_heap_); ui::d3d12::util::ReleaseAndNull(swap_texture_); + // Don't need the data anymore, so zero range. + if (gamma_ramp_upload_mapping_ != nullptr) { + D3D12_RANGE gamma_ramp_written_range; + gamma_ramp_written_range.Begin = 0; + gamma_ramp_written_range.End = 0; + gamma_ramp_upload_->Unmap(0, &gamma_ramp_written_range); + gamma_ramp_upload_mapping_ = nullptr; + } + ui::d3d12::util::ReleaseAndNull(gamma_ramp_upload_); + ui::d3d12::util::ReleaseAndNull(gamma_ramp_texture_); + sampler_heap_pool_.reset(); view_heap_pool_.reset(); constant_buffer_pool_.reset(); @@ -787,6 +847,12 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); } + } else if (index == XE_GPU_REG_DC_LUT_PWL_DATA) { + UpdateGammaRampValue(GammaRampType::kPWL, value); + } else if (index == XE_GPU_REG_DC_LUT_30_COLOR) { + UpdateGammaRampValue(GammaRampType::kNormal, value); + } else if (index == XE_GPU_REG_DC_LUT_RW_MODE) { + gamma_ramp_rw_subindex_ = 0; } } @@ -798,19 +864,95 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // In case the swap command is the only one in the frame. BeginFrame(); - D3D12_CPU_DESCRIPTOR_HANDLE frontbuffer_cpu_handle; - D3D12_GPU_DESCRIPTOR_HANDLE frontbuffer_gpu_handle; - if (RequestViewDescriptors(0, 1, 1, frontbuffer_cpu_handle, - frontbuffer_gpu_handle) != 0) { - if (texture_cache_->RequestSwapTexture(frontbuffer_cpu_handle)) { - auto command_list = GetCurrentCommandList(); + auto provider = GetD3D12Context()->GetD3D12Provider(); + auto device = provider->GetDevice(); + auto command_list = GetCurrentCommandList(); + + // Upload the new gamma ramps. + if (dirty_gamma_ramp_normal_) { + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = + gamma_ramp_footprints_[current_queue_frame_ * 2]; + std::memcpy(gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset, + gamma_ramp_.normal, 256 * sizeof(uint32_t)); + PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, + D3D12_RESOURCE_STATE_COPY_DEST); + gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; + location_source.pResource = gamma_ramp_upload_; + location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + location_source.PlacedFootprint = gamma_ramp_footprint; + location_dest.pResource = gamma_ramp_texture_; + location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + location_dest.SubresourceIndex = 0; + command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, + nullptr); + dirty_gamma_ramp_normal_ = false; + } + if (dirty_gamma_ramp_pwl_) { + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& gamma_ramp_footprint = + gamma_ramp_footprints_[current_queue_frame_ * 2 + 1]; + volatile uint32_t* mapping = reinterpret_cast( + gamma_ramp_upload_mapping_ + gamma_ramp_footprint.Offset); + for (uint32_t i = 0; i < 128; ++i) { + mapping[i] = (gamma_ramp_.pwl[i].values[0].base >> 6) | + (uint32_t(gamma_ramp_.pwl[i].values[1].base >> 6) << 10) | + (uint32_t(gamma_ramp_.pwl[i].values[2].base >> 6) << 20); + } + PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, + D3D12_RESOURCE_STATE_COPY_DEST); + gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_COPY_DEST; + D3D12_TEXTURE_COPY_LOCATION location_source, location_dest; + location_source.pResource = gamma_ramp_upload_; + location_source.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + location_source.PlacedFootprint = gamma_ramp_footprint; + location_dest.pResource = gamma_ramp_texture_; + location_dest.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + location_dest.SubresourceIndex = 1; + command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, + nullptr); + dirty_gamma_ramp_pwl_ = false; + } + + D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start; + D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start; + if (RequestViewDescriptors(0, 2, 2, descriptor_cpu_start, + descriptor_gpu_start) != 0) { + TextureFormat frontbuffer_format; + if (texture_cache_->RequestSwapTexture(descriptor_cpu_start, + frontbuffer_format)) { render_target_cache_->UnbindRenderTargets(); + + // Create the gamma ramp texture descriptor. + // This is according to D3D::InitializePresentationParameters from a game + // executable, which initializes the normal gamma ramp for 8_8_8_8 output + // and the PWL gamma ramp for 2_10_10_10. + bool use_pwl_gamma_ramp = + frontbuffer_format == TextureFormat::k_2_10_10_10 || + frontbuffer_format == TextureFormat::k_2_10_10_10_AS_16_16_16_16; + D3D12_SHADER_RESOURCE_VIEW_DESC gamma_ramp_srv_desc; + gamma_ramp_srv_desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; + gamma_ramp_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + gamma_ramp_srv_desc.Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + gamma_ramp_srv_desc.Texture1D.MostDetailedMip = + use_pwl_gamma_ramp ? 1 : 0; + gamma_ramp_srv_desc.Texture1D.MipLevels = 1; + gamma_ramp_srv_desc.Texture1D.ResourceMinLODClamp = 0.0f; + device->CreateShaderResourceView( + gamma_ramp_texture_, &gamma_ramp_srv_desc, + provider->OffsetViewDescriptor(descriptor_cpu_start, 1)); + // The swap texture is kept as an SRV because the graphics system may draw // with it at any time. It's switched to RTV and back when needed. PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + PushTransitionBarrier(gamma_ramp_texture_, gamma_ramp_texture_state_, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + gamma_ramp_texture_state_ = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; SubmitBarriers(); + + // Draw the stretching rectangle. command_list->OMSetRenderTargets(1, &swap_texture_rtv_, TRUE, nullptr); D3D12_VIEWPORT viewport; viewport.TopLeftX = 0.0f; @@ -828,8 +970,12 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, command_list->RSSetScissorRects(1, &scissor); D3D12GraphicsSystem* graphics_system = static_cast(graphics_system_); - graphics_system->StretchTextureToFrontBuffer(frontbuffer_gpu_handle, - command_list); + D3D12_GPU_DESCRIPTOR_HANDLE gamma_ramp_gpu_handle = + provider->OffsetViewDescriptor(descriptor_gpu_start, 1); + graphics_system->StretchTextureToFrontBuffer( + descriptor_gpu_start, &gamma_ramp_gpu_handle, + use_pwl_gamma_ramp ? (1.0f / 128.0f) : (1.0f / 256.0f), command_list); + PushTransitionBarrier(swap_texture_, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); // Don't care about graphics state because the frame is ending anyway. diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 376e54e33..45b24e8e0 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -217,6 +217,17 @@ class D3D12CommandProcessor : public CommandProcessor { std::unique_ptr view_heap_pool_ = nullptr; std::unique_ptr sampler_heap_pool_ = nullptr; + // Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL + // ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D. + ID3D12Resource* gamma_ramp_texture_ = nullptr; + D3D12_RESOURCE_STATES gamma_ramp_texture_state_; + // Upload buffer for an image that is the same as gamma_ramp_, but with + // ui::d3d12::D3D12Context::kQueuedFrames array layers. + ID3D12Resource* gamma_ramp_upload_ = nullptr; + uint8_t* gamma_ramp_upload_mapping_ = nullptr; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT + gamma_ramp_footprints_[ui::d3d12::D3D12Context::kQueuedFrames * 2]; + static constexpr uint32_t kSwapTextureWidth = 1280; static constexpr uint32_t kSwapTextureHeight = 720; ID3D12Resource* swap_texture_ = nullptr; diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index d312e345c..7a96befa6 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -20,6 +20,7 @@ namespace d3d12 { // Generated with `xb buildhlsl`. #include "xenia/gpu/d3d12/shaders/dxbc/fullscreen_vs.h" +#include "xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h" #include "xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h" D3D12GraphicsSystem::D3D12GraphicsSystem() {} @@ -43,20 +44,23 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, target_window->context()); } - // Create the stretch pipeline root signature. - D3D12_ROOT_PARAMETER stretch_root_parameter; - stretch_root_parameter.ParameterType = + // Create the stretch pipeline root signature, with 1 parameter (source + // texture) for raw stretch and 3 parameters (source texture, gamma ramp LUT, + // inverse of the size of the gamma ramp LUT) for gamma-correcting stretch. + // Raw. + D3D12_ROOT_PARAMETER stretch_root_parameters[3]; + stretch_root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - stretch_root_parameter.DescriptorTable.NumDescriptorRanges = 1; + stretch_root_parameters[0].DescriptorTable.NumDescriptorRanges = 1; D3D12_DESCRIPTOR_RANGE stretch_root_texture_range; stretch_root_texture_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; stretch_root_texture_range.NumDescriptors = 1; stretch_root_texture_range.BaseShaderRegister = 0; stretch_root_texture_range.RegisterSpace = 0; stretch_root_texture_range.OffsetInDescriptorsFromTableStart = 0; - stretch_root_parameter.DescriptorTable.pDescriptorRanges = + stretch_root_parameters[0].DescriptorTable.pDescriptorRanges = &stretch_root_texture_range; - stretch_root_parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + stretch_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; D3D12_STATIC_SAMPLER_DESC stretch_sampler_desc; stretch_sampler_desc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; stretch_sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; @@ -73,7 +77,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, stretch_sampler_desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; D3D12_ROOT_SIGNATURE_DESC stretch_root_desc; stretch_root_desc.NumParameters = 1; - stretch_root_desc.pParameters = &stretch_root_parameter; + stretch_root_desc.pParameters = stretch_root_parameters; stretch_root_desc.NumStaticSamplers = 1; stretch_root_desc.pStaticSamplers = &stretch_sampler_desc; stretch_root_desc.Flags = @@ -84,8 +88,39 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, XELOGE("Failed to create the front buffer stretch root signature"); return X_STATUS_UNSUCCESSFUL; } + // Gamma. + stretch_root_parameters[1].ParameterType = + D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + stretch_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1; + D3D12_DESCRIPTOR_RANGE stretch_root_gamma_ramp_range; + stretch_root_gamma_ramp_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + stretch_root_gamma_ramp_range.NumDescriptors = 1; + stretch_root_gamma_ramp_range.BaseShaderRegister = 1; + stretch_root_gamma_ramp_range.RegisterSpace = 0; + stretch_root_texture_range.OffsetInDescriptorsFromTableStart = 0; + stretch_root_parameters[1].DescriptorTable.pDescriptorRanges = + &stretch_root_gamma_ramp_range; + stretch_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + stretch_root_parameters[2].ParameterType = + D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + stretch_root_parameters[2].Constants.ShaderRegister = 0; + stretch_root_parameters[2].Constants.RegisterSpace = 0; + stretch_root_parameters[2].Constants.Num32BitValues = 1; + stretch_root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + stretch_root_desc.NumParameters = 3; + stretch_root_desc.pParameters = stretch_root_parameters; + stretch_gamma_root_signature_ = + ui::d3d12::util::CreateRootSignature(device, stretch_root_desc); + if (stretch_gamma_root_signature_ == nullptr) { + XELOGE( + "Failed to create the gamma-correcting front buffer stretch root " + "signature"); + stretch_root_signature_->Release(); + stretch_root_signature_ = nullptr; + return X_STATUS_UNSUCCESSFUL; + } - // Create the stretch pipeline. + // Create the stretch pipelines. D3D12_GRAPHICS_PIPELINE_STATE_DESC stretch_pipeline_desc = {}; stretch_pipeline_desc.pRootSignature = stretch_root_signature_; stretch_pipeline_desc.VS.pShaderBytecode = fullscreen_vs; @@ -107,6 +142,24 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, if (FAILED(device->CreateGraphicsPipelineState( &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_pipeline_)))) { XELOGE("Failed to create the front buffer stretch pipeline state"); + stretch_gamma_root_signature_->Release(); + stretch_gamma_root_signature_ = nullptr; + stretch_root_signature_->Release(); + stretch_root_signature_ = nullptr; + return X_STATUS_UNSUCCESSFUL; + } + stretch_pipeline_desc.pRootSignature = stretch_gamma_root_signature_; + stretch_pipeline_desc.PS.pShaderBytecode = stretch_gamma_ps; + stretch_pipeline_desc.PS.BytecodeLength = sizeof(stretch_gamma_ps); + if (FAILED(device->CreateGraphicsPipelineState( + &stretch_pipeline_desc, IID_PPV_ARGS(&stretch_gamma_pipeline_)))) { + XELOGE( + "Failed to create the gamma-correcting front buffer stretch " + "pipeline state"); + stretch_pipeline_->Release(); + stretch_pipeline_ = nullptr; + stretch_gamma_root_signature_->Release(); + stretch_gamma_root_signature_ = nullptr; stretch_root_signature_->Release(); stretch_root_signature_ = nullptr; return X_STATUS_UNSUCCESSFUL; @@ -116,7 +169,9 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, } void D3D12GraphicsSystem::Shutdown() { + ui::d3d12::util::ReleaseAndNull(stretch_gamma_pipeline_); ui::d3d12::util::ReleaseAndNull(stretch_pipeline_); + ui::d3d12::util::ReleaseAndNull(stretch_gamma_root_signature_); ui::d3d12::util::ReleaseAndNull(stretch_root_signature_); GraphicsSystem::Shutdown(); @@ -130,9 +185,17 @@ void D3D12GraphicsSystem::AwaitFrontBufferUnused() { void D3D12GraphicsSystem::StretchTextureToFrontBuffer( D3D12_GPU_DESCRIPTOR_HANDLE handle, + D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size, ID3D12GraphicsCommandList* command_list) { - command_list->SetPipelineState(stretch_pipeline_); - command_list->SetGraphicsRootSignature(stretch_root_signature_); + if (gamma_ramp_handle != nullptr) { + command_list->SetPipelineState(stretch_gamma_pipeline_); + command_list->SetGraphicsRootSignature(stretch_gamma_root_signature_); + command_list->SetGraphicsRootDescriptorTable(1, *gamma_ramp_handle); + command_list->SetGraphicsRoot32BitConstants(2, 1, &gamma_ramp_inv_size, 0); + } else { + command_list->SetPipelineState(stretch_pipeline_); + command_list->SetGraphicsRootSignature(stretch_root_signature_); + } command_list->SetGraphicsRootDescriptorTable(0, handle); command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); command_list->DrawInstanced(3, 1, 0, 0); @@ -186,7 +249,8 @@ void D3D12GraphicsSystem::Swap(xe::ui::UIEvent* e) { command_list->RSSetScissorRects(1, &scissor); command_list->SetDescriptorHeaps(1, &swap_srv_heap); StretchTextureToFrontBuffer( - swap_srv_heap->GetGPUDescriptorHandleForHeapStart(), command_list); + swap_srv_heap->GetGPUDescriptorHandleForHeapStart(), nullptr, 0.0f, + command_list); } } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.h b/src/xenia/gpu/d3d12/d3d12_graphics_system.h index ec87082e5..19f268305 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.h +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.h @@ -36,9 +36,12 @@ class D3D12GraphicsSystem : public GraphicsSystem { // Draws a texture covering the entire viewport to the render target currently // bound on the specified command list (in D3D12Context::kSwapChainFormat). // This changes the current pipeline, graphics root signature and primitive - // topology. - void StretchTextureToFrontBuffer(D3D12_GPU_DESCRIPTOR_HANDLE handle, - ID3D12GraphicsCommandList* command_list); + // topology. The gamma ramp texture must be 1D if present at all, for linear + // space, pass nullptr as the gamma ramp. + void StretchTextureToFrontBuffer( + D3D12_GPU_DESCRIPTOR_HANDLE handle, + D3D12_GPU_DESCRIPTOR_HANDLE* gamma_ramp_handle, float gamma_ramp_inv_size, + ID3D12GraphicsCommandList* command_list); private: std::unique_ptr CreateCommandProcessor() override; @@ -48,7 +51,9 @@ class D3D12GraphicsSystem : public GraphicsSystem { ui::d3d12::D3D12Context* display_context_ = nullptr; ID3D12RootSignature* stretch_root_signature_ = nullptr; + ID3D12RootSignature* stretch_gamma_root_signature_ = nullptr; ID3D12PipelineState* stretch_pipeline_ = nullptr; + ID3D12PipelineState* stretch_gamma_pipeline_ = nullptr; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso new file mode 100644 index 0000000000000000000000000000000000000000..6a1fe672401491ce67313d8cb86163e9eaf6fa2e GIT binary patch literal 1296 zcmb7DO=}ZT6uoIoZG#`7by0LPh(eVjX({4DtZgRKh}B>wEz!+5#+N3NWXfceqNM~} zxbb5n{s#%DKfr(B!i5_b{s<}V<2jjmF@vSnL+*R`-Fx0W_q{hUD=WEIXFinPy?FNK z%eT)rzkaN}Pl!k+C4!$U!cL8e6kt2B;D|`c%v;|N^a-))Y4bc| zGqBmn&S5?VBYwb=11~)ae7vlc#HO&7kISCXXE21dO16oWnlQT%Foh zPRDI+HI?r)8*SzKPPK`(ARegg;IXghZn~|O>-bDmlpXjgsMc2*D|ud!^V*%jZ3i9c z^?KB$hKV7gMKCuR1b8;~{W4nXbLu%J<}}(*oKEAZl3LSq1J>tlCI$+u_BWoR?yW}E z_c~rJFlI{^jKZ3|X4rN2q4JHK*V<~}tj2uy(!AWTS8vau1Y63a+xs48rpR3K}M1bnea6fE^yBX{W_k4Z&ASKvKPIU74fWH1DOc-4ed$d z`p`{JVGp&^-E4Px7!I9~*Il|IGI{n7FUE>$EiHiKYyGjXxAADtMk>@`kJKa5nu{Lw zS!JIdxrf@s>HpZX5^*mca?fS_ZS|d6Cqlhzcm}(?Tz(7fNf_^$;EYdd2=;#v_5OV;< ngIrwQ&P=qFW?VEe)SGeo#D#_v@0&8M_MGg=sJz*Nv4Rl*`SljT diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h index c5dbe0ba5..5aa4fdb1e 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h @@ -1,28 +1,29 @@ // generated from `xb buildhlsl` // source: stretch.ps.hlsl const uint8_t stretch_ps[] = { - 0x44, 0x58, 0x42, 0x43, 0x2E, 0x71, 0x8B, 0x1B, 0xF0, 0x06, 0x39, 0x2A, - 0x24, 0xF3, 0x17, 0x82, 0x9A, 0x83, 0xE2, 0x79, 0x01, 0x00, 0x00, 0x00, - 0xA4, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, - 0x08, 0x01, 0x00, 0x00, 0x3C, 0x01, 0x00, 0x00, 0x70, 0x01, 0x00, 0x00, - 0x08, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xCC, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x84, 0x8B, 0x62, 0x99, 0x4D, 0x9D, 0x1A, 0x13, + 0x49, 0x13, 0xF7, 0x6B, 0x9F, 0x69, 0x25, 0x8A, 0x01, 0x00, 0x00, 0x00, + 0xB0, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x14, 0x01, 0x00, 0x00, 0x48, 0x01, 0x00, 0x00, 0x7C, 0x01, 0x00, 0x00, + 0x14, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00, - 0xA2, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, + 0xAF, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x73, - 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x72, 0x00, 0x78, 0x65, 0x5F, 0x74, 0x65, - 0x78, 0x74, 0x75, 0x72, 0x65, 0x00, 0x4D, 0x69, 0x63, 0x72, 0x6F, 0x73, - 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C, 0x53, 0x4C, - 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F, 0x6D, 0x70, - 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00, 0xAB, 0xAB, + 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x72, 0x5F, 0x6C, 0x69, 0x6E, 0x65, 0x61, + 0x72, 0x5F, 0x63, 0x6C, 0x61, 0x6D, 0x70, 0x00, 0x78, 0x65, 0x5F, 0x74, + 0x65, 0x78, 0x74, 0x75, 0x72, 0x65, 0x00, 0x4D, 0x69, 0x63, 0x72, 0x6F, + 0x73, 0x6F, 0x66, 0x74, 0x20, 0x28, 0x52, 0x29, 0x20, 0x48, 0x4C, 0x53, + 0x4C, 0x20, 0x53, 0x68, 0x61, 0x64, 0x65, 0x72, 0x20, 0x43, 0x6F, 0x6D, + 0x70, 0x69, 0x6C, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2E, 0x31, 0x00, 0xAB, 0x49, 0x53, 0x47, 0x4E, 0x2C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt index 20ab2820c..ff7c711a5 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt @@ -6,7 +6,7 @@ // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ -// xe_sampler sampler NA NA S0 s0 1 +// xe_sampler_linear_clamp sampler NA NA S0 s0 1 // xe_texture texture float4 2d T0 t0 1 // // diff --git a/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl b/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl index e8a0647fe..681552587 100644 --- a/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl +++ b/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl @@ -1,5 +1,6 @@ Texture2D xe_texture : register(t0); -SamplerState xe_sampler : register(s0); +SamplerState xe_sampler_linear_clamp : register(s0); + float4 main(float2 xe_texcoord : TEXCOORD) : SV_Target { - return xe_texture.SampleLevel(xe_sampler, xe_texcoord, 0.0f); + return xe_texture.SampleLevel(xe_sampler_linear_clamp, xe_texcoord, 0.0f); } diff --git a/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl b/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl new file mode 100644 index 000000000..cfa545aa1 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl @@ -0,0 +1,19 @@ +Texture2D xe_texture : register(t0); +Texture1D xe_gamma_ramp : register(t1); +SamplerState xe_sampler_linear_clamp : register(s0); +cbuffer XeStretchGammaRootConstants : register(b0) { + float xe_gamma_ramp_inv_size; +}; + +float4 main(float2 xe_texcoord : TEXCOORD) : SV_Target { + float4 color = + xe_texture.SampleLevel(xe_sampler_linear_clamp, xe_texcoord, 0.0f); + // The center of the first texel of the LUT contains the value for 0, and the + // center of the last texel contains the value for 1. + color.rgb = color.rgb * (1.0f - xe_gamma_ramp_inv_size) + + (0.5 * xe_gamma_ramp_inv_size); + color.r = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.r, 0.0f).r; + color.g = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.g, 0.0f).g; + color.b = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.b, 0.0f).b; + return color; +} diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index ce7720363..32284cd6d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -750,7 +750,8 @@ bool TextureCache::TileResolvedTexture( return true; } -bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) { +bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle, + TextureFormat& format_out) { auto group = reinterpret_cast( ®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); auto& fetch = group->texture_fetch; @@ -781,6 +782,7 @@ bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle) { auto device = command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); device->CreateShaderResourceView(texture->resource, &srv_desc, handle); + format_out = key.format; return true; } diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index c7cf35949..4b2ab3edf 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -93,7 +93,8 @@ class TextureCache { uint32_t buffer_size, const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& footprint); - bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle); + bool RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle, + TextureFormat& format_out); private: enum class LoadMode { diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index e57b50399..ca1785d21 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1031,8 +1031,6 @@ void DxbcShaderTranslator::CompletePixelShader() { // Convert to gamma space (likely needs to be done after the exponent bias // since gamma is a property of the storage format). - // TODO(Triang3l): Check how SetPWLGamma effects this - currently using the - // default curve. // Get which render targets need the conversion. uint32_t gamma_toggle_temp = PushSystemTemp(); uint32_t gamma_pieces_temp = PushSystemTemp(); @@ -4674,8 +4672,6 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.movc_instruction_count; // Linearize the texture if it's stored in a gamma format. - // TODO(Triang3l): Check how SetPWLGamma effects this - currently using - // the default curve. for (uint32_t i = 0; i < 4; ++i) { // Calculate how far we are on each piece of the curve. Multiply by // 1/width of each piece, subtract start/width of it and saturate.