From bc0c2040e275d5684ca66dbf17d63e4b172052b7 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 10 Dec 2020 12:36:37 +0300 Subject: [PATCH] [DXBC] ROV: Force late Z write with kill instructions --- src/xenia/gpu/dxbc_shader_translator.h | 12 +-- src/xenia/gpu/dxbc_shader_translator_om.cc | 95 ++++++++++++---------- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 2ca52e7f5..87820587b 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -189,12 +189,12 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_ROVStencilTest_Shift, // If the depth/stencil test has failed, but resulted in a stencil value // that is different than the one currently in the depth buffer, write it - // anyway and don't run the shader (to check if the sample may be discarded - // some way). This, however, also results in depth/stencil testing done - // entirely early even when it passes to prevent writing in divergent places - // in the shader. When the shader can kill, this must be set only for - // RB_DEPTHCONTROL EARLY_Z_ENABLE, not for alpha test/alpha to coverage - // disabled. + // anyway and don't run the rest of the shader (to check if the sample may + // be discarded some way) - use when alpha test and alpha to coverage are + // disabled. Ignored by the shader if not applicable to it (like if it has + // kill instructions or writes the depth output). + // TODO(Triang3l): Replace with an alpha-to-mask flag, check if + // (flags & (alpha test | alpha to mask)) == (always | disabled). kSysFlag_ROVDepthStencilEarlyWrite_Shift, kSysFlag_Count, diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index f3b964ae2..ea79b737c 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -1024,51 +1024,60 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.z = viewport maximum depth if not writing to oDepth // temp.w = whether depth/stencil has been modified DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src); - // Check if need to write. - // temp.x? = resulting sample depth/stencil - // temp.y = polygon offset if not writing to oDepth - // temp.z = viewport maximum depth if not writing to oDepth - // temp.w = free - DxbcOpIf(true, temp_w_src); - { - if (depth_stencil_early) { - // Get if early depth/stencil write is enabled to temp.w. - // temp.w = whether early depth/stencil write is enabled - system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(temp_w_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite)); - // Check if need to write early. - // temp.w = free - DxbcOpIf(true, temp_w_src); - } - // Write the new depth/stencil. - if (uav_index_edram_ == kBindingIndexUnallocated) { - uav_index_edram_ = uav_count_++; - } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - sample_depth_stencil_src); - if (depth_stencil_early) { - // Need to still run the shader to know whether to write the - // depth/stencil value. - DxbcOpElse(); - // Set sample bit out of bits 4:7 of system_temp_rov_params_.x if need - // to write later (after checking if the sample is not discarded by a - // kill instruction, alphatest or alpha-to-coverage). - DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << (4 + i))); - // Close the early depth/stencil check. - DxbcOpEndIf(); + if (depth_stencil_early && !CanWriteZEarly()) { + // Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always + // need to write late in this shader, as it may do something like + // explicitly killing pixels. + DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1), + DxbcSrc::LU(4 + i), temp_w_src, + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + } else { + // Check if need to write. + // temp.x? = resulting sample depth/stencil + // temp.y = polygon offset if not writing to oDepth + // temp.z = viewport maximum depth if not writing to oDepth + // temp.w = free + DxbcOpIf(true, temp_w_src); + { + if (depth_stencil_early) { + // Get if early depth/stencil write is enabled to temp.w. + // temp.w = whether early depth/stencil write is enabled + system_constants_used_ |= 1ull << kSysConst_Flags_Index; + DxbcOpAnd(temp_w_dest, + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite)); + // Check if need to write early. + // temp.w = free + DxbcOpIf(true, temp_w_src); + } + // Write the new depth/stencil. + if (uav_index_edram_ == kBindingIndexUnallocated) { + uav_index_edram_ = uav_count_++; + } + DxbcOpStoreUAVTyped( + DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, + sample_depth_stencil_src); + if (depth_stencil_early) { + // Need to still run the shader to know whether to write the + // depth/stencil value. + DxbcOpElse(); + // Set the sample bit in bits 4:7 of system_temp_rov_params_.x if need + // to write later (after checking if the sample is not discarded by a + // kill instruction, alphatest or alpha-to-coverage). + DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << (4 + i))); + // Close the early depth/stencil check. + DxbcOpEndIf(); + } } + // Close the write check. + DxbcOpEndIf(); } - // Close the write check. - DxbcOpEndIf(); // Release sample_temp. PopSystemTemp();