diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 6835f864e..e000fd3ad 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1645,20 +1645,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } if (render_target_cache_->IsROVUsedForEDRAM()) { uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - if (rb_depthcontrol & 0x2) { - // Read depth/stencil if depth comparison function is not "always". - uint32_t depth_comparison = (rb_depthcontrol >> 4) & 0x7; - flags |= depth_comparison - << DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift; - if (depth_comparison != 0x7) { - flags |= DxbcShaderTranslator::kSysFlag_DepthStencilRead; - if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == - DepthRenderTargetFormat::kD24FS8) { - flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24; - } + if (rb_depthcontrol & (0x1 | 0x2)) { + flags |= DxbcShaderTranslator::kSysFlag_DepthStencil; + if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == + DepthRenderTargetFormat::kD24FS8) { + flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24; } - if (rb_depthcontrol & 0x4) { - flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite; + if (rb_depthcontrol & 0x2) { + flags |= ((rb_depthcontrol >> 4) & 0x7) + << DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift; + if (rb_depthcontrol & 0x4) { + flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask | + DxbcShaderTranslator::kSysFlag_DepthStencilWrite; + } + } else { + // In case stencil is used without depth testing - always pass, and + // don't modify the stored depth. + flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess | + DxbcShaderTranslator::kSysFlag_DepthPassIfEqual | + DxbcShaderTranslator::kSysFlag_DepthPassIfGreater; } } } diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 42d032c3c..4b081bb24 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -3593,22 +3593,17 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // TODO(Triang3l): Do depth/stencil before the translated shader if possible. // *************************************************************************** - // Convert the depth to the target format - won't modify the W value. No need - // to do this in an if - if the value is not needed, the command processor can - // specify that the format is unorm24 - the conversion is much easier this way - // than for float24, only 2 instructions. - CompletePixelShader_DepthTo24Bit(); - - uint32_t depth_flags_temp = PushSystemTemp(); + uint32_t depth_stencil_flags_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_Flags_Index; - // Extract 0 or 0xFFFFFFFF for whether depth/stencil needs to be read and for - // the comparison results when the test should pass to depth_flags_temp. + // Check if anything related to depth/stencil needs to be done at all, and get + // the conditions of passing the depth test - as 0 or 0xFFFFFFFF - into + // depth_stencil_flags_temp. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(depth_flags_temp); + shader_code_.push_back(depth_stencil_flags_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(1); @@ -3617,7 +3612,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(1); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(kSysFlag_DepthStencilRead_Shift); + shader_code_.push_back(kSysFlag_DepthStencil_Shift); shader_code_.push_back(kSysFlag_DepthPassIfLess_Shift); shader_code_.push_back(kSysFlag_DepthPassIfEqual_Shift); shader_code_.push_back(kSysFlag_DepthPassIfGreater_Shift); @@ -3629,18 +3624,23 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.int_instruction_count; - // Check if operations involving the previous depth/stencil value need to be - // done. + // Enter the depth/stencil test if needed. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( D3D10_SB_INSTRUCTION_TEST_NONZERO) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_flags_temp); + shader_code_.push_back(depth_stencil_flags_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; + // Convert the depth to the target format - won't modify the W value. No need + // to do this in an if - if the value is not needed, the command processor can + // specify that the format is unorm24 - the conversion is much easier this way + // than for float24, only 2 instructions. + CompletePixelShader_DepthTo24Bit(); + // Load the previous combined depth/stencil value into system_temp_depth_.y. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_UAV_TYPED) | @@ -3683,8 +3683,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.uint_instruction_count; - // Do depth/stencil testing. - uint32_t depth_stencil_test_temp = PushSystemTemp(); + // Allocate a register for depth/stencil testing. + uint32_t depth_stencil_test_result_temp = PushSystemTemp(); // First, the depth test. // New depth in system_temp_depth_.x, old depth in system_temp_depth_.y. @@ -3694,7 +3694,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000000, 1)); shader_code_.push_back(system_temp_depth_); @@ -3709,7 +3709,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); shader_code_.push_back(system_temp_depth_); @@ -3724,79 +3724,111 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1110, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(depth_flags_temp); + shader_code_.push_back(depth_stencil_flags_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; - // 4) Start combining the results into system_temp_depth_.x - using .x - // specifically to keep the value because the stencil test also depends on the - // result of the depth test. + // 4) Start combining the results into depth_stencil_test_result_temp.x. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; - // 5) Finish combining the results into system_temp_depth_.w. + // 5) Finish combining the results into depth_stencil_test_result_temp.x. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; - // 6) Discard the pixel if depth test failed. + // Depth test done, and we don't need to compare the new value with anything + // anymore, so check if we need to apply the depth write mask and keep the + // old depth value if it's disabled. Also check if the stencil test needs to + // be performed. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); + shader_code_.push_back(depth_stencil_flags_temp); + shader_code_.push_back(EncodeVectorReplicatedOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_Flags_Vec); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(kSysFlag_DepthWriteMask); + shader_code_.push_back(kSysFlag_StencilTest); + shader_code_.push_back(0); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Preserve the original depth if the depth write mask is disabled - copy + // system_temp_depth_.y to system_temp_depth_.x if needed. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_flags_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(system_temp_depth_); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // TODO(Triang3l): Do stencil testing. + + // Discard the pixel if depth/stencil test failed. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) | ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(depth_stencil_test_result_temp); ++stat_.instruction_count; - // TODO(Triang3l): Preserve the original depth if the depth write mask is - // false. - - // TODO(Triang3l): Do stencil testing. - - // Release depth_stencil_test_temp. + // Release depth_stencil_test_result_temp. PopSystemTemp(); - // Operations involving the previous value done. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; - // Get the bit to check if need to write the new depth/stencil value. // The write masks of depth specifically and stencil specifically are handled - // in the depth/stencil test code. + // before. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(depth_flags_temp); + shader_code_.push_back(depth_stencil_flags_temp); shader_code_.push_back(EncodeVectorSelectOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); shader_code_.push_back(cbuffer_index_system_constants_); @@ -3815,7 +3847,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_flags_temp); + shader_code_.push_back(depth_stencil_flags_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -3852,12 +3884,17 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.c_texture_store_instructions; - // Done writing to the depth/stencil buffer. + // Depth/stencil writing done. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; - // Release depth_flags_temp. + // Depth/stencil operations done. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + + // Release depth_stencil_flags_temp. PopSystemTemp(); // *************************************************************************** diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 99ce902b5..b7aa132dc 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -39,19 +39,20 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, kSysFlag_ReverseZ_Shift, - kSysFlag_DepthStencilRead_Shift, + kSysFlag_DepthStencil_Shift, + kSysFlag_DepthFloat24_Shift, // Depth/stencil testing not done if DepthStencilRead is disabled, but // writing may still be done. kSysFlag_DepthPassIfLess_Shift, kSysFlag_DepthPassIfEqual_Shift, kSysFlag_DepthPassIfGreater_Shift, + // 1 to write new depth to the depth buffer, 0 to keep the old one if the + // depth test passes. + kSysFlag_DepthWriteMask_Shift, + kSysFlag_StencilTest_Shift, // This doesn't include depth/stencil masks - only reflects the fact that // the new value must be written. kSysFlag_DepthStencilWrite_Shift, - // If don't need to read or write the depth component of the depth/stencil - // buffer, better disable kSysFlag_DepthFloat24 because float->unorm is - // easier to perform than float32->float24. - kSysFlag_DepthFloat24_Shift, kSysFlag_Color0Gamma_Shift, kSysFlag_Color1Gamma_Shift, kSysFlag_Color2Gamma_Shift, @@ -61,12 +62,14 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, kSysFlag_ReverseZ = 1u << kSysFlag_ReverseZ_Shift, - kSysFlag_DepthStencilRead = 1u << kSysFlag_DepthStencilRead_Shift, + kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift, + kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift, kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift, kSysFlag_DepthPassIfEqual = 1u << kSysFlag_DepthPassIfEqual_Shift, kSysFlag_DepthPassIfGreater = 1u << kSysFlag_DepthPassIfGreater_Shift, + kSysFlag_DepthWriteMask = 1u << kSysFlag_DepthWriteMask_Shift, + kSysFlag_StencilTest = 1u << kSysFlag_StencilTest_Shift, kSysFlag_DepthStencilWrite = 1u << kSysFlag_DepthStencilWrite_Shift, - kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift, kSysFlag_Color0Gamma = 1u << kSysFlag_Color0Gamma_Shift, kSysFlag_Color1Gamma = 1u << kSysFlag_Color1Gamma_Shift, kSysFlag_Color2Gamma = 1u << kSysFlag_Color2Gamma_Shift,