From 920704c71a1b662c77759989459b08ebdaec8cc9 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 22 Mar 2022 21:39:06 +0300 Subject: [PATCH 1/4] [D3D12] RT transfer: Same front/back stencil ops --- .../gpu/d3d12/d3d12_render_target_cache.cc | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 61b8bb83f..8ce62e9a9 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -4388,14 +4388,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { D3D12_STENCIL_OP_REPLACE; pipeline_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; - pipeline_desc.DepthStencilState.BackFace.StencilFailOp = - D3D12_STENCIL_OP_KEEP; - pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp = - D3D12_STENCIL_OP_KEEP; - pipeline_desc.DepthStencilState.BackFace.StencilPassOp = - D3D12_STENCIL_OP_REPLACE; - pipeline_desc.DepthStencilState.BackFace.StencilFunc = - D3D12_COMPARISON_FUNC_ALWAYS; + pipeline_desc.DepthStencilState.BackFace = + pipeline_desc.DepthStencilState.FrontFace; pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format); // Even if creation fails, still store the null pointers not to try to // create again. @@ -4448,14 +4442,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // stencil being different. pipeline_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; - pipeline_desc.DepthStencilState.BackFace.StencilFailOp = - D3D12_STENCIL_OP_KEEP; - pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp = - D3D12_STENCIL_OP_REPLACE; - pipeline_desc.DepthStencilState.BackFace.StencilPassOp = - D3D12_STENCIL_OP_REPLACE; - pipeline_desc.DepthStencilState.BackFace.StencilFunc = - D3D12_COMPARISON_FUNC_ALWAYS; + pipeline_desc.DepthStencilState.BackFace = + pipeline_desc.DepthStencilState.FrontFace; } pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format); } From ee8e71cea847849bcde1ed0832e722bb2755f5ed Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 22 Mar 2022 21:41:44 +0300 Subject: [PATCH 2/4] [D3D12] RT dump: Fix r# allocation --- src/xenia/gpu/d3d12/d3d12_render_target_cache.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 8ce62e9a9..d623012c7 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -5970,7 +5970,8 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline( a.OpDclInput(dxbc::Dest::VThreadID(0b0011)); // r0 - addressing before the load, then addressing and conversion scratch // r1 - addressing scratch before the load, then data - a.OpDclTemps(2); + stat.temp_register_count = 2; + a.OpDclTemps(stat.temp_register_count); // There's no strict dependency on the group size here, for simplicity of // calculations especially with resolution scaling, dividing manually (as the // group size is not unlimited). The only restriction is that an integer @@ -6355,8 +6356,6 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline( case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: // Float16 has a wider range for both color and alpha, also NaNs. // Color - clamp and convert. - stat.temp_register_count = - std::max(stat.temp_register_count, uint32_t(3)); // Convert red in r1.x to the result register r1.x - the same, but // UnclampedFloat32To7e3 allows that - using r0.x as a temporary. DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 0, 0); From 32ab1a2df13bea79a05df96c5349845a197ef1eb Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 22 Mar 2022 21:48:26 +0300 Subject: [PATCH 3/4] [D3D12] Minor RT code style/comments cleanup --- .../gpu/d3d12/d3d12_render_target_cache.cc | 113 +++++------ .../gpu/d3d12/d3d12_render_target_cache.h | 180 +++++++++--------- src/xenia/gpu/dxbc_shader_translator.h | 2 +- src/xenia/gpu/dxbc_shader_translator_om.cc | 2 +- 4 files changed, 151 insertions(+), 146 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index d623012c7..45f2c3284 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -3516,7 +3516,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { if (source_is_color) { switch (source_color_format) { case xenos::ColorRenderTargetFormat::k_8_8_8_8: - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { color_packed_in_r0x_and_r1x = true; for (uint32_t i = 0; i < 2; ++i) { a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i), dxbc::Src::LF(255.0f), @@ -3528,9 +3528,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(i, dxbc::Src::kXXXX)); } } - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { color_packed_in_r0x_and_r1x = true; for (uint32_t i = 0; i < 2; ++i) { a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i), @@ -3543,9 +3543,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(i, dxbc::Src::kXXXX)); } } - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { color_packed_in_r0x_and_r1x = true; for (uint32_t i = 0; i < 2; ++i) { // Float16 has a wider range for both color and alpha, also NaNs - @@ -3569,12 +3570,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::LU(30), dxbc::Src::R(i, dxbc::Src::kWWWW), dxbc::Src::R(i, dxbc::Src::kXXXX)); } - break; + } break; // All 64bpp formats, and all 16 bits per component formats, are // represented as integers in ownership transfer for safe handling of // NaNs and -32768 / -32767. case xenos::ColorRenderTargetFormat::k_16_16: - case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { if (dest_color_format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { for (uint32_t i = 0; i < 2; ++i) { @@ -3586,9 +3587,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(0)); a.OpMov(dxbc::Dest::O(0, 0b1100), dxbc::Src::R(1, 0b0100 << 4)); } - break; + } break; case xenos::ColorRenderTargetFormat::k_16_16_16_16: - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { if (dest_color_format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { a.OpBFI(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16), @@ -3597,11 +3598,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { } else { a.OpMov(dxbc::Dest::O(0), dxbc::Src::R(1)); } - break; - case xenos::ColorRenderTargetFormat::k_32_FLOAT: + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { color_packed_in_r0x_and_r1x = true; - break; - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + } break; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { if (dest_color_format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) { a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(1)); @@ -3609,14 +3610,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpUBFE(dxbc::Dest::O(0), dxbc::Src::LU(16), dxbc::Src::LU(0, 16, 0, 16), dxbc::Src::R(1, 0b01010000)); } - break; + } break; } } else { assert_not_zero(rs & kTransferUsedRootParameterDepthSRVBit); color_packed_in_r0x_and_r1x = true; for (uint32_t i = 0; i < 2; ++i) { switch (source_depth_format) { - case xenos::DepthRenderTargetFormat::kD24S8: + case xenos::DepthRenderTargetFormat::kD24S8: { // Round to the nearest even integer. This seems to be the correct, // adding +0.5 and rounding towards zero results in red instead of // black in the 4D5307E6 clear shader. @@ -3626,12 +3627,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(i, dxbc::Src::kWWWW)); a.OpFToU(dxbc::Dest::R(i, 0b1000), dxbc::Src::R(i, dxbc::Src::kWWWW)); - break; - case xenos::DepthRenderTargetFormat::kD24FS8: + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { // Convert using r1.y as temporary. DxbcShaderTranslator::PreClampedDepthTo20e4(a, i, 3, i, 3, 1, 1, true); - break; + } break; } // Merge depth and stencil into r0/r1.x. a.OpBFI(dxbc::Dest::R(i, 0b0001), dxbc::Src::LU(24), dxbc::Src::LU(8), @@ -3652,14 +3653,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { } } } else { - // Handle 32bpp color. If color_packed_in_r1x is true, a raw 32bpp color - // value was written, and common handling will be done. + // Handle a 32bpp destination (32bpp color, or depth / stencil). If + // color_packed_in_r1x is true, a raw 32bpp color value was written, and + // common handling will be done. bool color_packed_in_r1x = false; bool depth_loaded_in_guest_format = false; if (source_is_color) { switch (source_color_format) { case xenos::ColorRenderTargetFormat::k_8_8_8_8: - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { if (dest_is_stencil_bit) { a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX), dxbc::Src::LF(255.0f), dxbc::Src::LF(0.5f)); @@ -3706,9 +3708,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(1, dxbc::Src::kXXXX)); } } - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { if (dest_is_stencil_bit) { a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX), dxbc::Src::LF(1023.0f), dxbc::Src::LF(0.5f)); @@ -3732,9 +3734,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(1, dxbc::Src::kXXXX)); } } - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + case xenos::ColorRenderTargetFormat:: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { if (dest_is_stencil_bit) { DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 2, 0); } else if (dest_is_color && @@ -3767,11 +3770,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::LU(30), dxbc::Src::R(1, dxbc::Src::kWWWW), dxbc::Src::R(1, dxbc::Src::kXXXX)); } - break; + } break; case xenos::ColorRenderTargetFormat::k_16_16: case xenos::ColorRenderTargetFormat::k_16_16_16_16: case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: - case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: { // All 16 bits per component formats are represented as integers in // ownership transfer for safe handling of NaNs and -32768 / -32767. if (dest_is_stencil_bit) { @@ -3789,11 +3792,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::LU(16), dxbc::Src::R(1, dxbc::Src::kYYYY), dxbc::Src::R(1, dxbc::Src::kXXXX)); } - break; + } break; case xenos::ColorRenderTargetFormat::k_32_FLOAT: - case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: { color_packed_in_r1x = true; - break; + } break; } } else if (rs & kTransferUsedRootParameterDepthSRVBit) { if (dest_is_color || dest_depth_format != source_depth_format) { @@ -3801,7 +3804,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // format. Convert the depth within r1.w. depth_loaded_in_guest_format = true; switch (source_depth_format) { - case xenos::DepthRenderTargetFormat::kD24S8: + case xenos::DepthRenderTargetFormat::kD24S8: { // Round to the nearest even integer. This seems to be the correct, // adding +0.5 and rounding towards zero results in red instead of // black in the 4D5307E6 clear shader. @@ -3811,12 +3814,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(1, dxbc::Src::kWWWW)); a.OpFToU(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW)); - break; - case xenos::DepthRenderTargetFormat::kD24FS8: + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { // Convert using r1.y as temporary. DxbcShaderTranslator::PreClampedDepthTo20e4(a, 1, 3, 1, 3, 1, 1, true); - break; + } break; } if (dest_is_color) { // Merge depth and stencil into r1.x for reinterpretation as color. @@ -3835,16 +3838,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { if (color_packed_in_r1x) { switch (dest_color_format) { case xenos::ColorRenderTargetFormat::k_8_8_8_8: - case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: { a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(8), dxbc::Src::LU(0, 8, 16, 24), dxbc::Src::R(1, dxbc::Src::kXXXX)); a.OpUToF(dxbc::Dest::R(1), dxbc::Src::R(1)); a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1), dxbc::Src::LF(1.0f / 255.0f)); - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10: - case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: { a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(10, 10, 10, 2), dxbc::Src::LU(0, 10, 20, 30), dxbc::Src::R(1, dxbc::Src::kXXXX)); @@ -3852,10 +3855,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1), dxbc::Src::LF(1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f)); - break; + } break; case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: case xenos::ColorRenderTargetFormat:: - k_2_10_10_10_FLOAT_AS_16_16_16_16: + k_2_10_10_10_FLOAT_AS_16_16_16_16: { // Color using r1.yz as temporary. for (uint32_t i = 0; i < 3; ++i) { DxbcShaderTranslator::Float7e3To32(a, dxbc::Dest::O(0, 1 << i), @@ -3869,21 +3872,21 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpMul(dxbc::Dest::O(0, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW), dxbc::Src::LF(1.0f / 3.0f)); - break; + } break; case xenos::ColorRenderTargetFormat::k_16_16: - case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: { // All 16 bits per component formats are represented as integers // in ownership transfer for safe handling of NaNs and // -32768 / -32767. a.OpUBFE(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16), dxbc::Src::LU(0, 16, 0, 0), dxbc::Src::R(1, dxbc::Src::kXXXX)); - break; - case xenos::ColorRenderTargetFormat::k_32_FLOAT: + } break; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: { // Already as a 32-bit value. a.OpMov(dxbc::Dest::O(0, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX)); - break; + } break; default: // A 64bpp format (handled separately) or an invalid one. assert_unhandled_case(dest_color_format); @@ -4178,7 +4181,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // using r0.z as a temporary and check if it matches the value in // the currently owning guest render target. switch (dest_depth_format) { - case xenos::DepthRenderTargetFormat::kD24S8: + case xenos::DepthRenderTargetFormat::kD24S8: { // Round to the nearest even integer. This seems to be the // correct, adding +0.5 and rounding towards zero results in red // instead of black in the 4D5307E6 clear shader. @@ -4189,11 +4192,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { dxbc::Src::R(0, dxbc::Src::kYYYY)); a.OpFToU(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kYYYY)); - break; - case xenos::DepthRenderTargetFormat::kD24FS8: + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { DxbcShaderTranslator::PreClampedDepthTo20e4(a, 0, 1, 0, 0, 0, 2, true); - break; + } break; } a.OpIEq(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::R(1, dxbc::Src::kWWWW)); @@ -4206,7 +4209,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { } // Convert using r0.x as a temporary. switch (dest_depth_format) { - case xenos::DepthRenderTargetFormat::kD24S8: + case xenos::DepthRenderTargetFormat::kD24S8: { // Multiplying by 1.0 / 0xFFFFFF produces an incorrect result (for // 0xC00000, for instance - which is 2_10_10_10 clear to 0001) - // rescale from 0...0xFFFFFF to 0...0x1000000 doing what true @@ -4223,11 +4226,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpMul(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW), dxbc::Src::LF(1.0f / float(1 << 24))); - break; - case xenos::DepthRenderTargetFormat::kD24FS8: + } break; + case xenos::DepthRenderTargetFormat::kD24FS8: { DxbcShaderTranslator::Depth20e4To32(a, dxbc::Dest::R(1, 0b1000), 1, 3, 0, 1, 3, 0, 0, true); - break; + } break; } // Host depth is different, or not available - convert the guest depth // to the destination format. @@ -5273,13 +5276,15 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears( if (transfer_root_parameters_used & kTransferUsedRootParameterHostDepthAddressConstantBit) { assert_not_null(host_depth_source_d3d12_rt); + RenderTargetKey host_depth_source_rt_key = + host_depth_source_d3d12_rt->key(); TransferAddressConstant host_depth_address_constant; host_depth_address_constant.dest_pitch = dest_pitch_tiles; host_depth_address_constant.source_pitch = - host_depth_source_d3d12_rt->key().GetPitchTiles(); + host_depth_source_rt_key.GetPitchTiles(); host_depth_address_constant.source_to_dest = int32_t(dest_rt_key.base_tiles) - - int32_t(host_depth_source_d3d12_rt->key().base_tiles); + int32_t(host_depth_source_rt_key.base_tiles); if (last_host_depth_address_constant != host_depth_address_constant) { last_host_depth_address_constant = host_depth_address_constant; transfer_root_parameters_set &= diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h index 15a87bc8c..8b5b17310 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h @@ -126,95 +126,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache { xenos::DepthRenderTargetFormat format); protected: - class D3D12RenderTarget final : public RenderTarget { - public: - // descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host - // sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present - // when the DXGI formats are different for drawing and bit-exact loading - // (for NaN pattern preservation across EDRAM tile ownership transfers in - // floating-point formats, and to distinguish between two -1 representations - // in snorm formats). - D3D12RenderTarget( - RenderTargetKey key, ID3D12Resource* resource, - ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw, - ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb, - ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& - descriptor_load_separate, - ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv, - ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil, - D3D12_RESOURCE_STATES resource_state) - : RenderTarget(key), - resource_(resource), - descriptor_draw_(std::move(descriptor_draw)), - descriptor_draw_srgb_(std::move(descriptor_draw_srgb)), - descriptor_load_separate_(std::move(descriptor_load_separate)), - descriptor_srv_(std::move(descriptor_srv)), - descriptor_srv_stencil_(std::move(descriptor_srv_stencil)), - resource_state_(resource_state) {} - - ID3D12Resource* resource() const { return resource_.Get(); } - const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw() - const { - return descriptor_draw_; - } - const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb() - const { - return descriptor_draw_srgb_; - } - const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv() - const { - return descriptor_srv_; - } - const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& - descriptor_srv_stencil() const { - return descriptor_srv_stencil_; - } - const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& - descriptor_load_separate() const { - return descriptor_load_separate_; - } - - D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) { - D3D12_RESOURCE_STATES old_state = resource_state_; - resource_state_ = new_state; - return old_state; - } - - uint32_t temporary_srv_descriptor_index() const { - return temporary_srv_descriptor_index_; - } - void SetTemporarySRVDescriptorIndex(uint32_t index) { - temporary_srv_descriptor_index_ = index; - } - uint32_t temporary_srv_descriptor_index_stencil() const { - return temporary_srv_descriptor_index_stencil_; - } - void SetTemporarySRVDescriptorIndexStencil(uint32_t index) { - temporary_srv_descriptor_index_stencil_ = index; - } - uint32_t temporary_sort_index() const { return temporary_sort_index_; } - void SetTemporarySortIndex(uint32_t index) { - temporary_sort_index_ = index; - } - - private: - Microsoft::WRL::ComPtr resource_; - ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_; - ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_; - ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_; - // Texture SRV non-shader-visible descriptors, to prepare shader-visible - // descriptors faster, by copying rather than by creating every time. - // TODO(Triang3l): With bindless resources, persistently store them in the - // heap. - ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_; - ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_; - D3D12_RESOURCE_STATES resource_state_; - // Temporary storage for indices in operations like transfers and dumps. - uint32_t temporary_srv_descriptor_index_ = UINT32_MAX; - uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX; - uint32_t temporary_sort_index_ = 0; - }; - uint32_t GetMaxRenderTargetWidth() const override { return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; } @@ -310,6 +221,95 @@ class D3D12RenderTargetCache final : public RenderTargetCache { // For host render targets. + class D3D12RenderTarget final : public RenderTarget { + public: + // descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host + // sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present + // when the DXGI formats are different for drawing and bit-exact loading + // (for NaN pattern preservation across EDRAM tile ownership transfers in + // floating-point formats, and to distinguish between two -1 representations + // in snorm formats). + D3D12RenderTarget( + RenderTargetKey key, ID3D12Resource* resource, + ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw, + ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb, + ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& + descriptor_load_separate, + ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv, + ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil, + D3D12_RESOURCE_STATES resource_state) + : RenderTarget(key), + resource_(resource), + descriptor_draw_(std::move(descriptor_draw)), + descriptor_draw_srgb_(std::move(descriptor_draw_srgb)), + descriptor_load_separate_(std::move(descriptor_load_separate)), + descriptor_srv_(std::move(descriptor_srv)), + descriptor_srv_stencil_(std::move(descriptor_srv_stencil)), + resource_state_(resource_state) {} + + ID3D12Resource* resource() const { return resource_.Get(); } + const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw() + const { + return descriptor_draw_; + } + const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb() + const { + return descriptor_draw_srgb_; + } + const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv() + const { + return descriptor_srv_; + } + const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& + descriptor_srv_stencil() const { + return descriptor_srv_stencil_; + } + const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& + descriptor_load_separate() const { + return descriptor_load_separate_; + } + + D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) { + D3D12_RESOURCE_STATES old_state = resource_state_; + resource_state_ = new_state; + return old_state; + } + + uint32_t temporary_srv_descriptor_index() const { + return temporary_srv_descriptor_index_; + } + void SetTemporarySRVDescriptorIndex(uint32_t index) { + temporary_srv_descriptor_index_ = index; + } + uint32_t temporary_srv_descriptor_index_stencil() const { + return temporary_srv_descriptor_index_stencil_; + } + void SetTemporarySRVDescriptorIndexStencil(uint32_t index) { + temporary_srv_descriptor_index_stencil_ = index; + } + uint32_t temporary_sort_index() const { return temporary_sort_index_; } + void SetTemporarySortIndex(uint32_t index) { + temporary_sort_index_ = index; + } + + private: + Microsoft::WRL::ComPtr resource_; + ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_; + ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_; + ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_; + // Texture SRV non-shader-visible descriptors, to prepare shader-visible + // descriptors faster, by copying rather than by creating every time. + // TODO(Triang3l): With bindless resources, persistently store them in the + // heap. + ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_; + ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_; + D3D12_RESOURCE_STATES resource_state_; + // Temporary storage for indices in operations like transfers and dumps. + uint32_t temporary_srv_descriptor_index_ = UINT32_MAX; + uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX; + uint32_t temporary_sort_index_ = 0; + }; + enum TransferCBVRegister : uint32_t { kTransferCBVRegisterStencilMask, kTransferCBVRegisterAddress, @@ -438,7 +438,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache { // Last bits because this affects the root signature - after sorting, only // change it as fewer times as possible. Depth buffers have an additional - // depth SRV. + // stencil SRV. static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 3)); TransferMode mode : 3; }; diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 0a25cef21..b81f9dd4f 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -536,7 +536,7 @@ class DxbcShaderTranslator : public ShaderTranslator { // range to 20e4 floating point, with zeros in bits 24:31, rounding to the // nearest even. Source and destination may be the same, temporary must be // different than both. If remap_from_0_to_0_5 is true, it's assumed that - // 0...1 is pre-remapped to 0...0.5 on the input. + // 0...1 is pre-remapped to 0...0.5 in the input. static void PreClampedDepthTo20e4( dxbc::Assembler& a, uint32_t f24_temp, uint32_t f24_temp_component, uint32_t f32_temp, uint32_t f32_temp_component, uint32_t temp_temp, diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index bcebd4a6c..6c90c42e8 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -3140,7 +3140,7 @@ void DxbcShaderTranslator::PreClampedFloat32To7e3( // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // Assuming the color is already clamped to [0, 31.875]. - // Check if the number is too small to be represented as normalized 20e4. + // Check if the number is too small to be represented as normalized 7e3. // temp = f32 < 2^-2 a.OpULT(temp_dest, f32_src, dxbc::Src::LU(0x3E800000)); // Handle denormalized numbers separately. From fa62d395fd5a074cee0610cc2bce3b36e84b6139 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 22 Mar 2022 21:51:02 +0300 Subject: [PATCH 4/4] [Vulkan] InitializeSubresourceRange: Use return, not reference --- src/xenia/gpu/vulkan/vulkan_command_processor.cc | 16 ++++++++-------- src/xenia/ui/vulkan/vulkan_immediate_drawer.cc | 5 +++-- src/xenia/ui/vulkan/vulkan_presenter.cc | 2 +- src/xenia/ui/vulkan/vulkan_util.h | 5 +++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index be7268329..14cce000e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -404,8 +404,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, acquire_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; acquire_image_memory_barrier.image = texture->image; - ui::vulkan::util::InitializeSubresourceRange( - acquire_image_memory_barrier.subresourceRange); + acquire_image_memory_barrier.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); } { acquire_barrier_dst_stages |= @@ -427,8 +427,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, acquire_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; acquire_image_memory_barrier.image = vulkan_context.image(); - ui::vulkan::util::InitializeSubresourceRange( - acquire_image_memory_barrier.subresourceRange); + acquire_image_memory_barrier.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); if (vulkan_context.image_ever_written_previously()) { acquire_barrier_src_stages |= ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask; @@ -496,8 +496,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, release_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; release_image_memory_barrier.image = texture->image; - ui::vulkan::util::InitializeSubresourceRange( - release_image_memory_barrier.subresourceRange); + release_image_memory_barrier.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); } { release_barrier_src_stages |= @@ -523,8 +523,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, release_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; release_image_memory_barrier.image = vulkan_context.image(); - ui::vulkan::util::InitializeSubresourceRange( - release_image_memory_barrier.subresourceRange); + release_image_memory_barrier.subresourceRange = + ui::vulkan::util::InitializeSubresourceRange(); } assert_not_zero(release_barrier_src_stages); assert_not_zero(release_barrier_dst_stages); diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index b8fecdaa1..a90f530d9 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -426,7 +426,8 @@ void VulkanImmediateDrawer::End() { image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - util::InitializeSubresourceRange(image_memory_barrier.subresourceRange); + image_memory_barrier.subresourceRange = + util::InitializeSubresourceRange(); for (const PendingTextureUpload& pending_texture_upload : texture_uploads_pending_) { image_memory_barriers.emplace_back(image_memory_barrier).image = @@ -913,7 +914,7 @@ bool VulkanImmediateDrawer::CreateTextureResource( image_view_create_info.components.g = swizzle; image_view_create_info.components.b = swizzle; image_view_create_info.components.a = swizzle; - util::InitializeSubresourceRange(image_view_create_info.subresourceRange); + image_view_create_info.subresourceRange = util::InitializeSubresourceRange(); VkImageView image_view; if (dfn.vkCreateImageView(device, &image_view_create_info, nullptr, &image_view) != VK_SUCCESS) { diff --git a/src/xenia/ui/vulkan/vulkan_presenter.cc b/src/xenia/ui/vulkan/vulkan_presenter.cc index bf129b103..f1353bf3e 100644 --- a/src/xenia/ui/vulkan/vulkan_presenter.cc +++ b/src/xenia/ui/vulkan/vulkan_presenter.cc @@ -313,7 +313,7 @@ bool VulkanPresenter::CaptureGuestOutput(RawImage& image_out) { image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_memory_barrier.image = guest_output_image->image(); - util::InitializeSubresourceRange(image_memory_barrier.subresourceRange); + image_memory_barrier.subresourceRange = util::InitializeSubresourceRange(); dfn.vkCmdPipelineBarrier(command_buffer, kGuestOutputInternalStageMask, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_memory_barrier); diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index 0eb2532d0..b566e26bb 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -124,17 +124,18 @@ inline VkExtent2D GetMax2DFramebufferExtent(const VulkanProvider& provider) { return max_extent; } -inline void InitializeSubresourceRange( - VkImageSubresourceRange& range, +inline VkImageSubresourceRange InitializeSubresourceRange( VkImageAspectFlags aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT, uint32_t base_mip_level = 0, uint32_t level_count = VK_REMAINING_MIP_LEVELS, uint32_t base_array_layer = 0, uint32_t layer_count = VK_REMAINING_ARRAY_LAYERS) { + VkImageSubresourceRange range; range.aspectMask = aspect_mask; range.baseMipLevel = base_mip_level; range.levelCount = level_count; range.baseArrayLayer = base_array_layer; range.layerCount = layer_count; + return range; } // Creates a buffer backed by a dedicated allocation. The allocation size will