diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 710d80e55..f120c4580 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1600,6 +1600,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32; uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; @@ -1655,7 +1656,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_Color3Gamma; } if (IsROVUsedForEDRAM()) { - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; if (rb_depthcontrol & (0x1 | 0x2)) { flags |= DxbcShaderTranslator::kSysFlag_DepthStencil; if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == @@ -1676,6 +1676,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_DepthPassIfEqual | DxbcShaderTranslator::kSysFlag_DepthPassIfGreater; } + if (rb_depthcontrol & 0x1) { + // Stencil test may modify the stencil buffer arbitrarily, so enable + // writing. + flags |= DxbcShaderTranslator::kSysFlag_StencilTest | + DxbcShaderTranslator::kSysFlag_DepthStencilWrite; + } } } dirty |= system_constants_.flags != flags; @@ -1935,12 +1941,67 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } } - // Depth testing and blend constant for ROV blending. + // Depth/stencil testing and blend constant for ROV blending. if (IsROVUsedForEDRAM()) { uint32_t depth_base_dwords = (regs[XE_GPU_REG_RB_DEPTH_INFO].u32 & 0xFFF) * 1280; dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; + + if (rb_depthcontrol & 0x1) { + uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; + uint32_t stencil_value; + + stencil_value = rb_stencilrefmask & 0xFF; + dirty |= system_constants_.edram_stencil_reference != stencil_value; + system_constants_.edram_stencil_reference = stencil_value; + stencil_value = (rb_stencilrefmask >> 8) & 0xFF; + dirty |= system_constants_.edram_stencil_read_mask != stencil_value; + system_constants_.edram_stencil_read_mask = stencil_value; + stencil_value = (rb_stencilrefmask >> 16) & 0xFF; + dirty |= system_constants_.edram_stencil_write_mask != stencil_value; + system_constants_.edram_stencil_write_mask = stencil_value; + + static const uint32_t kStencilOpMap[] = { + DxbcShaderTranslator::kStencilOp_Keep, + DxbcShaderTranslator::kStencilOp_Zero, + DxbcShaderTranslator::kStencilOp_Replace, + DxbcShaderTranslator::kStencilOp_IncrementSaturate, + DxbcShaderTranslator::kStencilOp_DecrementSaturate, + DxbcShaderTranslator::kStencilOp_Invert, + DxbcShaderTranslator::kStencilOp_Increment, + DxbcShaderTranslator::kStencilOp_Decrement, + }; + + stencil_value = kStencilOpMap[(rb_depthcontrol >> 11) & 0x7]; + dirty |= system_constants_.edram_stencil_front_fail != stencil_value; + system_constants_.edram_stencil_front_fail = stencil_value; + stencil_value = kStencilOpMap[(rb_depthcontrol >> 17) & 0x7]; + dirty |= + system_constants_.edram_stencil_front_depth_fail != stencil_value; + system_constants_.edram_stencil_front_depth_fail = stencil_value; + stencil_value = kStencilOpMap[(rb_depthcontrol >> 14) & 0x7]; + dirty |= system_constants_.edram_stencil_front_pass != stencil_value; + system_constants_.edram_stencil_front_pass = stencil_value; + stencil_value = (rb_depthcontrol >> 8) & 0x7; + dirty |= + system_constants_.edram_stencil_front_comparison != stencil_value; + system_constants_.edram_stencil_front_comparison = stencil_value; + + stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7]; + dirty |= system_constants_.edram_stencil_back_fail != stencil_value; + system_constants_.edram_stencil_back_fail = stencil_value; + stencil_value = kStencilOpMap[(rb_depthcontrol >> 29) & 0x7]; + dirty |= system_constants_.edram_stencil_back_depth_fail != stencil_value; + system_constants_.edram_stencil_back_depth_fail = stencil_value; + stencil_value = kStencilOpMap[(rb_depthcontrol >> 26) & 0x7]; + dirty |= system_constants_.edram_stencil_back_pass != stencil_value; + system_constants_.edram_stencil_back_pass = stencil_value; + stencil_value = (rb_depthcontrol >> 20) & 0x7; + dirty |= system_constants_.edram_stencil_back_comparison != stencil_value; + system_constants_.edram_stencil_back_comparison = stencil_value; + } + dirty |= system_constants_.edram_blend_constant[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; system_constants_.edram_blend_constant[0] = diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 11b024749..476877557 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -3610,17 +3610,17 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // TODO(Triang3l): Do depth/stencil before the translated shader if possible. // *************************************************************************** - uint32_t depth_stencil_control_temp = PushSystemTemp(); + uint32_t depth_stencil_test_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_Flags_Index; // Check if anything related to depth/stencil needs to be done at all, and get // the conditions of passing the depth test - as 0 or 0xFFFFFFFF - into - // depth_stencil_control_temp. + // depth_stencil_test_temp. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(1); @@ -3648,7 +3648,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -3706,22 +3706,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // First, the depth test. // New depth in system_temp_depth_.x, old depth in system_temp_depth_.y. - // 1) Less/greater. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1)); - shader_code_.push_back(depth_stencil_test_result_temp); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000000, 1)); - shader_code_.push_back(system_temp_depth_); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00000100, 1)); - shader_code_.push_back(system_temp_depth_); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - - // 2) Equal. + // 1) Equal. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( @@ -3736,6 +3721,21 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.int_instruction_count; + // 2) Less/greater. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000000, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00000100, 1)); + shader_code_.push_back(system_temp_depth_); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + // 3) Compare the results with the expected. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); @@ -3747,7 +3747,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(depth_stencil_test_result_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); ++stat_.instruction_count; ++stat_.uint_instruction_count; @@ -3781,15 +3781,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.uint_instruction_count; - // Depth test done, and we don't need to compare the new value with anything - // anymore, so check if we need to apply the depth write mask and keep the - // old depth value if it's disabled. Also check if the stencil test needs to - // be performed. + // depth_stencil_test_temp may be reused now. + + // Depth test done. Now check the depth write mask (write it to + // depth_stencil_test_temp.x) and whether stencil testing is enabled (to + // depth_stencil_test_temp.y). shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); shader_code_.push_back(cbuffer_index_system_constants_); @@ -3813,7 +3814,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(system_temp_depth_); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); shader_code_.push_back(system_temp_depth_); @@ -3823,34 +3824,19 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.movc_instruction_count; - // Get the bit to check if stencil test needs to be done. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(depth_stencil_control_temp); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_Flags_Vec); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(kSysFlag_StencilTest); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - // Check if stencil test needs to be done. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( D3D10_SB_INSTRUCTION_TEST_NONZERO) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_control_temp); + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; + // depth_stencil_test_temp may be reused now. + // *************************************************************************** // Stencil test begins here. // @@ -3858,9 +3844,463 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // system_temp_depth_.z. // *************************************************************************** - // TODO(Triang3l): Actually do the stencil test. + // Load the operations and the comparison to depth_stencil_test_temp. + system_constants_used_ |= (1ull << kSysConst_EDRAMStencilFront_Index) | + (1ull << kSysConst_EDRAMStencilBack_Index); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0, 1)); + shader_code_.push_back(kPSInFrontFaceRegister); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilFront_Vec); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilBack_Vec); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; - // Preserve the original color value if either depth or stencil test has + // Load masked reference and original value into + // depth_stencil_test_result_temp.yw for comparison. + system_constants_used_ |= (1ull << kSysConst_EDRAMStencilReference_Index) | + (1ull << kSysConst_EDRAMStencilReadMask_Vec); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilReference_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilReference_Vec); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilReadMask_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilReadMask_Vec); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilReadMask_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilReadMask_Vec); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Compare the stencil values into depth_stencil_test_result_temp.y - + // depth_stencil_test_result_temp.x contains the depth test result. + + // 1) Equal - has only 1 result, so won't overwrite the original values in yw. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // 2) Less/greater - compare yw into yw. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01101100, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // 3) Extract the comparison values. + uint32_t stencil_comparison_temp = PushSystemTemp(); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1110, 1)); + shader_code_.push_back(stencil_comparison_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(1); + shader_code_.push_back(2); + shader_code_.push_back(EncodeVectorReplicatedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Comparison_Comp, + 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // 4) Compare the results with the expected. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1110, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(stencil_comparison_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Release stencil_comparison_temp. + PopSystemTemp(); + + // 5) Start combining the results into depth_stencil_test_result_temp.y. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // 6) Finish combining the results into depth_stencil_test_result_temp.y. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Write the needed stencil operation to depth_stencil_test_result_temp.z. + + // 1) Assuming the stencil test has passed, choose between the "pass" + // operation and the "depth fail" operation. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Pass_Comp, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, + kSysConst_EDRAMStencilSide_DepthFail_Comp, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // 2) If the stencil test has failed, use the "fail" operation. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Fail_Comp, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // depth_stencil_test_temp may be reused now. + + // We don't need separate depth and stencil test results anymore, so now we + // can mark the pixel to be discarded if the stencil test has failed - by + // setting that whole depth/stencil test has failed. The original depth value + // will be restored in this case, and after writing the new stencil, the pixel + // will be discared. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Extract the sub-operations to depth_stencil_test_temp. + // + // Don't care about & 0xFF now, applying the write mask will drop the unused + // bits. + // + // X - current value mask (keep/increment/decrement/invert vs. zero/replace) - + // 0xFFFFFFFF if the original value is needed, 0 if it needs to be zeroed. + // Y - value to add (increment/decrement) - 0, 1 or 0xFFFFFFFF (-1). + // Z - saturate - 0xFFFFFFFF to clamp to 0-255 after addition, 0 otherwise. + // W - invert mask - for XORing: 0xFFFFFFFF if need to invert, 0 otherwise. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(1); + shader_code_.push_back(2); + shader_code_.push_back(1); + shader_code_.push_back(1); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(kStencilOp_Flag_CurrentMask_Shift); + shader_code_.push_back(kStencilOp_Flag_Add_Shift); + shader_code_.push_back(kStencilOp_Flag_Saturate_Shift); + shader_code_.push_back(kStencilOp_Flag_Invert_Shift); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // Mask the original value into depth_stencil_test_temp.x. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Increment or decrement the value in depth_stencil_test_temp.x. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + + // Saturate the value in depth_stencil_test_temp.x if needed, using + // depth_stencil_test_temp.y as an intermediate for min/max. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMAX) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMIN) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0xFF); + ++stat_.instruction_count; + ++stat_.int_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // Invert the value in depth_stencil_test_temp.x if needed. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_XOR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Replace the value in depth_stencil_test_temp.x with the reference if + // needed. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(depth_stencil_test_result_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(kStencilOp_Flag_NewMask); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + system_constants_used_ |= 1ull << kSysConst_EDRAMStencilReference_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilReference_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilReference_Vec); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // Apply the write mask to the new value - this will also reduce it to 8 bits. + system_constants_used_ |= 1ull << kSysConst_EDRAMStencilWriteMask_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilWriteMask_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilWriteMask_Vec); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Invert the write mask to keep the unmodified bits of the old value. + system_constants_used_ |= 1ull << kSysConst_EDRAMStencilWriteMask_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_NOT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(depth_stencil_test_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_EDRAMStencilWriteMask_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_EDRAMStencilWriteMask_Vec); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Mask the old value. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Combine the old and new stencil values. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(depth_stencil_test_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Preserve the original depth value if either depth or stencil test has // failed because when stencil testing is enabled, something will be written // since stencil may be modified on failure too (if stencil is disabled, // there's a discard later, in the "else" case, that will prevent depth @@ -3915,7 +4355,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); shader_code_.push_back(EncodeVectorSelectOperand( D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); shader_code_.push_back(cbuffer_index_system_constants_); @@ -3934,7 +4374,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(depth_stencil_control_temp); + shader_code_.push_back(depth_stencil_test_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -4000,7 +4440,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; - // Release depth_stencil_control_temp. + // Release depth_stencil_test_temp. PopSystemTemp(); // *************************************************************************** diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 52c0fc36d..ca85278db 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -78,17 +78,17 @@ class DxbcShaderTranslator : public ShaderTranslator { enum : uint32_t { kStencilOp_Flag_CurrentMask_Shift, - kStencilOp_Flag_Invert_Shift, // 0, 1 or 3 expanded to 0 or 1 or 0xFF - the value to add. kStencilOp_Flag_Add_Shift, kStencilOp_Flag_Saturate_Shift = kStencilOp_Flag_Add_Shift + 2, + kStencilOp_Flag_Invert_Shift, kStencilOp_Flag_NewMask_Shift, kStencilOp_Flag_CurrentMask = 1u << kStencilOp_Flag_CurrentMask_Shift, - kStencilOp_Flag_Invert = 1u << kStencilOp_Flag_Invert_Shift, kStencilOp_Flag_Increment = 1u << kStencilOp_Flag_Add_Shift, kStencilOp_Flag_Decrement = 3u << kStencilOp_Flag_Add_Shift, kStencilOp_Flag_Saturate = 1u << kStencilOp_Flag_Saturate_Shift, + kStencilOp_Flag_Invert = 1u << kStencilOp_Flag_Invert_Shift, kStencilOp_Flag_NewMask = 1u << kStencilOp_Flag_NewMask_Shift, kStencilOp_Keep = kStencilOp_Flag_CurrentMask,