From 9f789e01b6ae60e306d3af44dcc1c0dc5d76d29c Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 7 Jun 2020 15:21:31 +0300 Subject: [PATCH] [DXBC] Fix and cleanup depth-only PS register usage --- src/xenia/gpu/dxbc_shader_translator.cc | 42 +++++++++++-------- src/xenia/gpu/dxbc_shader_translator.h | 4 +- src/xenia/gpu/dxbc_shader_translator_om.cc | 49 ++++++++++++---------- 3 files changed, 55 insertions(+), 40 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 092ff2a0c..61a60ad8a 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -833,14 +833,16 @@ void DxbcShaderTranslator::StartTranslation() { if (edram_rov_used_) { // Will be initialized unconditionally. system_temp_rov_params_ = PushSystemTemp(); - // If the shader doesn't write to oDepth, each component will be written - // to if depth/stencil is enabled and the respective sample is covered - - // so need to initialize now because the first writes will be conditional. - // If the shader writes to oDepth, this is oDepth of the shader, written - // by the guest code, so initialize because assumptions can't be made - // about the integrity of the guest code. - system_temp_rov_depth_stencil_ = - PushSystemTemp(writes_depth() ? 0b0001 : 0b1111); + if (ROV_IsDepthStencilEarly() || writes_depth()) { + // If the shader doesn't write to oDepth, each component will be written + // to if depth/stencil is enabled and the respective sample is covered - + // so need to initialize now because the first writes will be + // conditional. If the shader writes to oDepth, this is oDepth of the + // shader, written by the guest code, so initialize because assumptions + // can't be made about the integrity of the guest code. + system_temp_rov_depth_stencil_ = + PushSystemTemp(writes_depth() ? 0b0001 : 0b1111); + } } for (uint32_t i = 0; i < 4; ++i) { if (writes_color_target(i)) { @@ -887,15 +889,15 @@ void DxbcShaderTranslator::StartTranslation() { system_temp_loop_count_ = PushSystemTemp(0b1111); system_temp_grad_h_lod_ = PushSystemTemp(0b1111); system_temp_grad_v_ = PushSystemTemp(0b0111); - } - // Zero general-purpose registers to prevent crashes when the game references - // them after only initializing them conditionally. - for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0; - i < register_count(); ++i) { - DxbcOpMov( - uses_register_dynamic_addressing() ? DxbcDest::X(0, i) : DxbcDest::R(i), - DxbcSrc::LF(0.0f)); + // Zero general-purpose registers to prevent crashes when the game + // references them after only initializing them conditionally. + for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0; + i < register_count(); ++i) { + DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i) + : DxbcDest::R(i), + DxbcSrc::LF(0.0f)); + } } // Write stage-specific prologue. @@ -1163,8 +1165,12 @@ void DxbcShaderTranslator::CompleteShaderCode() { } } if (edram_rov_used_) { - // Release system_temp_rov_params_ and system_temp_rov_depth_stencil_. - PopSystemTemp(2); + if (ROV_IsDepthStencilEarly() || writes_depth()) { + // Release system_temp_rov_depth_stencil_. + PopSystemTemp(); + } + // Release system_temp_rov_params_. + PopSystemTemp(); } } diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 419ebf22e..1512876da 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -2522,7 +2522,9 @@ class DxbcShaderTranslator : public ShaderTranslator { // dwords. uint32_t system_temp_rov_params_; // ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1 - // VGPR when writing to oDepth. + // VGPR when writing to oDepth. Not used in the depth-only pixel shader (or, + // more formally, if neither early depth-stencil nor oDepth are used) because + // it always calculates and writes in the same place. // When not writing to oDepth: New per-sample depth/stencil values, generated // during early depth/stencil test (actual writing checks coverage bits). // When writing to oDepth: X also used to hold the depth written by the diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 3d5efd737..297b3b9ab 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -581,23 +581,28 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Perform depth/stencil test for the sample, get the result in bits 4 // (passed) and 8 (new depth/stencil buffer value is different). DxbcOpCall(DxbcSrc::Label(label_rov_depth_stencil_sample_)); - // Write the resulting depth/stencil value in system_temps_subroutine_[0].x - // to the sample's depth in system_temp_rov_depth_stencil_. - DxbcOpMov(DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i), - DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); - if (i) { - // Shift the result bits to the correct position. - DxbcOpIShL(DxbcDest::R(system_temps_subroutine_, 0b0010), - DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY), - DxbcSrc::LU(i)); + if (ROV_IsDepthStencilEarly()) { + // Write the resulting depth/stencil value in + // system_temps_subroutine_[0].x to the sample's depth in + // system_temp_rov_depth_stencil_. + DxbcOpMov(DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); + } + if (!is_depth_only_pixel_shader_) { + if (i) { + // Shift the result bits to the correct position. + DxbcOpIShL(DxbcDest::R(system_temps_subroutine_, 0b0010), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY), + DxbcSrc::LU(i)); + } + // Add the result in system_temps_subroutine_[0].y to + // system_temp_rov_params_.x. Bits 0:3 will be cleared in case of test + // failure (only doing this for covered samples), bits 4:7 will be added + // if need to defer writing. + DxbcOpXOr(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY)); } - // Add the result in system_temps_subroutine_[0].y to - // system_temp_rov_params_.x. Bits 0:3 will be cleared in case of test - // failure (only doing this for covered samples), bits 4:7 will be added if - // need to defer writing. - DxbcOpXOr(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY)); // Close the sample conditional. DxbcOpEndIf(); @@ -1595,11 +1600,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ROV_DepthStencilTest(); } - // Check if any sample is still covered after depth testing and writing, skip - // color writing completely in this case. - DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(0b1111)); - DxbcOpRetC(false, temp_x_src); + if (!is_depth_only_pixel_shader_) { + // Check if any sample is still covered after depth testing and writing, + // skip color writing completely in this case. + DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(0b1111)); + DxbcOpRetC(false, temp_x_src); + } // Write color values. for (uint32_t i = 0; i < 4; ++i) {