[DXBC] Fix and cleanup depth-only PS register usage

This commit is contained in:
Triang3l 2020-06-07 15:21:31 +03:00
parent 2cd69092ce
commit 9f789e01b6
3 changed files with 55 additions and 40 deletions

View File

@ -833,14 +833,16 @@ void DxbcShaderTranslator::StartTranslation() {
if (edram_rov_used_) {
// Will be initialized unconditionally.
system_temp_rov_params_ = PushSystemTemp();
// If the shader doesn't write to oDepth, each component will be written
// to if depth/stencil is enabled and the respective sample is covered -
// so need to initialize now because the first writes will be conditional.
// If the shader writes to oDepth, this is oDepth of the shader, written
// by the guest code, so initialize because assumptions can't be made
// about the integrity of the guest code.
system_temp_rov_depth_stencil_ =
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
if (ROV_IsDepthStencilEarly() || writes_depth()) {
// If the shader doesn't write to oDepth, each component will be written
// to if depth/stencil is enabled and the respective sample is covered -
// so need to initialize now because the first writes will be
// conditional. If the shader writes to oDepth, this is oDepth of the
// shader, written by the guest code, so initialize because assumptions
// can't be made about the integrity of the guest code.
system_temp_rov_depth_stencil_ =
PushSystemTemp(writes_depth() ? 0b0001 : 0b1111);
}
}
for (uint32_t i = 0; i < 4; ++i) {
if (writes_color_target(i)) {
@ -887,15 +889,15 @@ void DxbcShaderTranslator::StartTranslation() {
system_temp_loop_count_ = PushSystemTemp(0b1111);
system_temp_grad_h_lod_ = PushSystemTemp(0b1111);
system_temp_grad_v_ = PushSystemTemp(0b0111);
}
// Zero general-purpose registers to prevent crashes when the game references
// them after only initializing them conditionally.
for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0;
i < register_count(); ++i) {
DxbcOpMov(
uses_register_dynamic_addressing() ? DxbcDest::X(0, i) : DxbcDest::R(i),
DxbcSrc::LF(0.0f));
// Zero general-purpose registers to prevent crashes when the game
// references them after only initializing them conditionally.
for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0;
i < register_count(); ++i) {
DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i)
: DxbcDest::R(i),
DxbcSrc::LF(0.0f));
}
}
// Write stage-specific prologue.
@ -1163,8 +1165,12 @@ void DxbcShaderTranslator::CompleteShaderCode() {
}
}
if (edram_rov_used_) {
// Release system_temp_rov_params_ and system_temp_rov_depth_stencil_.
PopSystemTemp(2);
if (ROV_IsDepthStencilEarly() || writes_depth()) {
// Release system_temp_rov_depth_stencil_.
PopSystemTemp();
}
// Release system_temp_rov_params_.
PopSystemTemp();
}
}

View File

@ -2522,7 +2522,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
// dwords.
uint32_t system_temp_rov_params_;
// ROV only - new depth/stencil data. 4 VGPRs when not writing to oDepth, 1
// VGPR when writing to oDepth.
// VGPR when writing to oDepth. Not used in the depth-only pixel shader (or,
// more formally, if neither early depth-stencil nor oDepth are used) because
// it always calculates and writes in the same place.
// When not writing to oDepth: New per-sample depth/stencil values, generated
// during early depth/stencil test (actual writing checks coverage bits).
// When writing to oDepth: X also used to hold the depth written by the

View File

@ -581,23 +581,28 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
// Perform depth/stencil test for the sample, get the result in bits 4
// (passed) and 8 (new depth/stencil buffer value is different).
DxbcOpCall(DxbcSrc::Label(label_rov_depth_stencil_sample_));
// Write the resulting depth/stencil value in system_temps_subroutine_[0].x
// to the sample's depth in system_temp_rov_depth_stencil_.
DxbcOpMov(DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
if (i) {
// Shift the result bits to the correct position.
DxbcOpIShL(DxbcDest::R(system_temps_subroutine_, 0b0010),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY),
DxbcSrc::LU(i));
if (ROV_IsDepthStencilEarly()) {
// Write the resulting depth/stencil value in
// system_temps_subroutine_[0].x to the sample's depth in
// system_temp_rov_depth_stencil_.
DxbcOpMov(DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
}
if (!is_depth_only_pixel_shader_) {
if (i) {
// Shift the result bits to the correct position.
DxbcOpIShL(DxbcDest::R(system_temps_subroutine_, 0b0010),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY),
DxbcSrc::LU(i));
}
// Add the result in system_temps_subroutine_[0].y to
// system_temp_rov_params_.x. Bits 0:3 will be cleared in case of test
// failure (only doing this for covered samples), bits 4:7 will be added
// if need to defer writing.
DxbcOpXOr(DxbcDest::R(system_temp_rov_params_, 0b0001),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY));
}
// Add the result in system_temps_subroutine_[0].y to
// system_temp_rov_params_.x. Bits 0:3 will be cleared in case of test
// failure (only doing this for covered samples), bits 4:7 will be added if
// need to defer writing.
DxbcOpXOr(DxbcDest::R(system_temp_rov_params_, 0b0001),
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY));
// Close the sample conditional.
DxbcOpEndIf();
@ -1595,11 +1600,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ROV_DepthStencilTest();
}
// Check if any sample is still covered after depth testing and writing, skip
// color writing completely in this case.
DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
DxbcSrc::LU(0b1111));
DxbcOpRetC(false, temp_x_src);
if (!is_depth_only_pixel_shader_) {
// Check if any sample is still covered after depth testing and writing,
// skip color writing completely in this case.
DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
DxbcSrc::LU(0b1111));
DxbcOpRetC(false, temp_x_src);
}
// Write color values.
for (uint32_t i = 0; i < 4; ++i) {