[D3D12] ROV: Broken stencil test

This commit is contained in:
Triang3l 2018-10-19 11:03:45 +03:00
parent ea2864d21e
commit c1e78f6953
3 changed files with 556 additions and 55 deletions

View File

@ -1600,6 +1600,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t vgt_indx_offset = regs[XE_GPU_REG_VGT_INDX_OFFSET].u32;
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32;
@ -1655,7 +1656,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
flags |= DxbcShaderTranslator::kSysFlag_Color3Gamma;
}
if (IsROVUsedForEDRAM()) {
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
if (rb_depthcontrol & (0x1 | 0x2)) {
flags |= DxbcShaderTranslator::kSysFlag_DepthStencil;
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
@ -1676,6 +1676,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
DxbcShaderTranslator::kSysFlag_DepthPassIfEqual |
DxbcShaderTranslator::kSysFlag_DepthPassIfGreater;
}
if (rb_depthcontrol & 0x1) {
// Stencil test may modify the stencil buffer arbitrarily, so enable
// writing.
flags |= DxbcShaderTranslator::kSysFlag_StencilTest |
DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
}
}
}
dirty |= system_constants_.flags != flags;
@ -1935,12 +1941,67 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
}
}
// Depth testing and blend constant for ROV blending.
// Depth/stencil testing and blend constant for ROV blending.
if (IsROVUsedForEDRAM()) {
uint32_t depth_base_dwords =
(regs[XE_GPU_REG_RB_DEPTH_INFO].u32 & 0xFFF) * 1280;
dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords;
system_constants_.edram_depth_base_dwords = depth_base_dwords;
if (rb_depthcontrol & 0x1) {
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
uint32_t stencil_value;
stencil_value = rb_stencilrefmask & 0xFF;
dirty |= system_constants_.edram_stencil_reference != stencil_value;
system_constants_.edram_stencil_reference = stencil_value;
stencil_value = (rb_stencilrefmask >> 8) & 0xFF;
dirty |= system_constants_.edram_stencil_read_mask != stencil_value;
system_constants_.edram_stencil_read_mask = stencil_value;
stencil_value = (rb_stencilrefmask >> 16) & 0xFF;
dirty |= system_constants_.edram_stencil_write_mask != stencil_value;
system_constants_.edram_stencil_write_mask = stencil_value;
static const uint32_t kStencilOpMap[] = {
DxbcShaderTranslator::kStencilOp_Keep,
DxbcShaderTranslator::kStencilOp_Zero,
DxbcShaderTranslator::kStencilOp_Replace,
DxbcShaderTranslator::kStencilOp_IncrementSaturate,
DxbcShaderTranslator::kStencilOp_DecrementSaturate,
DxbcShaderTranslator::kStencilOp_Invert,
DxbcShaderTranslator::kStencilOp_Increment,
DxbcShaderTranslator::kStencilOp_Decrement,
};
stencil_value = kStencilOpMap[(rb_depthcontrol >> 11) & 0x7];
dirty |= system_constants_.edram_stencil_front_fail != stencil_value;
system_constants_.edram_stencil_front_fail = stencil_value;
stencil_value = kStencilOpMap[(rb_depthcontrol >> 17) & 0x7];
dirty |=
system_constants_.edram_stencil_front_depth_fail != stencil_value;
system_constants_.edram_stencil_front_depth_fail = stencil_value;
stencil_value = kStencilOpMap[(rb_depthcontrol >> 14) & 0x7];
dirty |= system_constants_.edram_stencil_front_pass != stencil_value;
system_constants_.edram_stencil_front_pass = stencil_value;
stencil_value = (rb_depthcontrol >> 8) & 0x7;
dirty |=
system_constants_.edram_stencil_front_comparison != stencil_value;
system_constants_.edram_stencil_front_comparison = stencil_value;
stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7];
dirty |= system_constants_.edram_stencil_back_fail != stencil_value;
system_constants_.edram_stencil_back_fail = stencil_value;
stencil_value = kStencilOpMap[(rb_depthcontrol >> 29) & 0x7];
dirty |= system_constants_.edram_stencil_back_depth_fail != stencil_value;
system_constants_.edram_stencil_back_depth_fail = stencil_value;
stencil_value = kStencilOpMap[(rb_depthcontrol >> 26) & 0x7];
dirty |= system_constants_.edram_stencil_back_pass != stencil_value;
system_constants_.edram_stencil_back_pass = stencil_value;
stencil_value = (rb_depthcontrol >> 20) & 0x7;
dirty |= system_constants_.edram_stencil_back_comparison != stencil_value;
system_constants_.edram_stencil_back_comparison = stencil_value;
}
dirty |= system_constants_.edram_blend_constant[0] !=
regs[XE_GPU_REG_RB_BLEND_RED].f32;
system_constants_.edram_blend_constant[0] =

View File

@ -3610,17 +3610,17 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// TODO(Triang3l): Do depth/stencil before the translated shader if possible.
// ***************************************************************************
uint32_t depth_stencil_control_temp = PushSystemTemp();
uint32_t depth_stencil_test_temp = PushSystemTemp();
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
// Check if anything related to depth/stencil needs to be done at all, and get
// the conditions of passing the depth test - as 0 or 0xFFFFFFFF - into
// depth_stencil_control_temp.
// depth_stencil_test_temp.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(1);
@ -3648,7 +3648,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
@ -3706,22 +3706,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// First, the depth test.
// New depth in system_temp_depth_.x, old depth in system_temp_depth_.y.
// 1) Less/greater.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000000, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00000100, 1));
shader_code_.push_back(system_temp_depth_);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// 2) Equal.
// 1) Equal.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
@ -3736,6 +3721,21 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.int_instruction_count;
// 2) Less/greater.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000000, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00000100, 1));
shader_code_.push_back(system_temp_depth_);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// 3) Compare the results with the expected.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
@ -3747,7 +3747,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
@ -3781,15 +3781,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Depth test done, and we don't need to compare the new value with anything
// anymore, so check if we need to apply the depth write mask and keep the
// old depth value if it's disabled. Also check if the stencil test needs to
// be performed.
// depth_stencil_test_temp may be reused now.
// Depth test done. Now check the depth write mask (write it to
// depth_stencil_test_temp.x) and whether stencil testing is enabled (to
// depth_stencil_test_temp.y).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
@ -3813,7 +3814,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_depth_);
@ -3823,34 +3824,19 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Get the bit to check if stencil test needs to be done.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_Flags_Vec);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(kSysFlag_StencilTest);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Check if stencil test needs to be done.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_control_temp);
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// depth_stencil_test_temp may be reused now.
// ***************************************************************************
// Stencil test begins here.
//
@ -3858,9 +3844,463 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// system_temp_depth_.z.
// ***************************************************************************
// TODO(Triang3l): Actually do the stencil test.
// Load the operations and the comparison to depth_stencil_test_temp.
system_constants_used_ |= (1ull << kSysConst_EDRAMStencilFront_Index) |
(1ull << kSysConst_EDRAMStencilBack_Index);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0, 1));
shader_code_.push_back(kPSInFrontFaceRegister);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilFront_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilBack_Vec);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Preserve the original color value if either depth or stencil test has
// Load masked reference and original value into
// depth_stencil_test_result_temp.yw for comparison.
system_constants_used_ |= (1ull << kSysConst_EDRAMStencilReference_Index) |
(1ull << kSysConst_EDRAMStencilReadMask_Vec);
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilReference_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilReference_Vec);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilReadMask_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilReadMask_Vec);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilReadMask_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilReadMask_Vec);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Compare the stencil values into depth_stencil_test_result_temp.y -
// depth_stencil_test_result_temp.x contains the depth test result.
// 1) Equal - has only 1 result, so won't overwrite the original values in yw.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// 2) Less/greater - compare yw into yw.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01101100, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// 3) Extract the comparison values.
uint32_t stencil_comparison_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1110, 1));
shader_code_.push_back(stencil_comparison_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(1);
shader_code_.push_back(1);
shader_code_.push_back(1);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(1);
shader_code_.push_back(2);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Comparison_Comp,
1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// 4) Compare the results with the expected.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1110, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(stencil_comparison_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Release stencil_comparison_temp.
PopSystemTemp();
// 5) Start combining the results into depth_stencil_test_result_temp.y.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// 6) Finish combining the results into depth_stencil_test_result_temp.y.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Write the needed stencil operation to depth_stencil_test_result_temp.z.
// 1) Assuming the stencil test has passed, choose between the "pass"
// operation and the "depth fail" operation.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Pass_Comp, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP,
kSysConst_EDRAMStencilSide_DepthFail_Comp, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// 2) If the stencil test has failed, use the "fail" operation.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSysConst_EDRAMStencilSide_Fail_Comp, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// depth_stencil_test_temp may be reused now.
// We don't need separate depth and stencil test results anymore, so now we
// can mark the pixel to be discarded if the stencil test has failed - by
// setting that whole depth/stencil test has failed. The original depth value
// will be restored in this case, and after writing the new stencil, the pixel
// will be discared.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Extract the sub-operations to depth_stencil_test_temp.
//
// Don't care about & 0xFF now, applying the write mask will drop the unused
// bits.
//
// X - current value mask (keep/increment/decrement/invert vs. zero/replace) -
// 0xFFFFFFFF if the original value is needed, 0 if it needs to be zeroed.
// Y - value to add (increment/decrement) - 0, 1 or 0xFFFFFFFF (-1).
// Z - saturate - 0xFFFFFFFF to clamp to 0-255 after addition, 0 otherwise.
// W - invert mask - for XORing: 0xFFFFFFFF if need to invert, 0 otherwise.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_IBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(1);
shader_code_.push_back(2);
shader_code_.push_back(1);
shader_code_.push_back(1);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kStencilOp_Flag_CurrentMask_Shift);
shader_code_.push_back(kStencilOp_Flag_Add_Shift);
shader_code_.push_back(kStencilOp_Flag_Saturate_Shift);
shader_code_.push_back(kStencilOp_Flag_Invert_Shift);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Mask the original value into depth_stencil_test_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Increment or decrement the value in depth_stencil_test_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Saturate the value in depth_stencil_test_temp.x if needed, using
// depth_stencil_test_temp.y as an intermediate for min/max.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0);
++stat_.instruction_count;
++stat_.int_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMIN) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0xFF);
++stat_.instruction_count;
++stat_.int_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Invert the value in depth_stencil_test_temp.x if needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_XOR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Replace the value in depth_stencil_test_temp.x with the reference if
// needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(kStencilOp_Flag_NewMask);
++stat_.instruction_count;
++stat_.uint_instruction_count;
system_constants_used_ |= 1ull << kSysConst_EDRAMStencilReference_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilReference_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilReference_Vec);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Apply the write mask to the new value - this will also reduce it to 8 bits.
system_constants_used_ |= 1ull << kSysConst_EDRAMStencilWriteMask_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilWriteMask_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilWriteMask_Vec);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Invert the write mask to keep the unmodified bits of the old value.
system_constants_used_ |= 1ull << kSysConst_EDRAMStencilWriteMask_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_NOT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_EDRAMStencilWriteMask_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMStencilWriteMask_Vec);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Mask the old value.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Combine the old and new stencil values.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Preserve the original depth value if either depth or stencil test has
// failed because when stencil testing is enabled, something will be written
// since stencil may be modified on failure too (if stencil is disabled,
// there's a discard later, in the "else" case, that will prevent depth
@ -3915,7 +4355,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
@ -3934,7 +4374,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_control_temp);
shader_code_.push_back(depth_stencil_test_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
@ -4000,7 +4440,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Release depth_stencil_control_temp.
// Release depth_stencil_test_temp.
PopSystemTemp();
// ***************************************************************************

View File

@ -78,17 +78,17 @@ class DxbcShaderTranslator : public ShaderTranslator {
enum : uint32_t {
kStencilOp_Flag_CurrentMask_Shift,
kStencilOp_Flag_Invert_Shift,
// 0, 1 or 3 expanded to 0 or 1 or 0xFF - the value to add.
kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Saturate_Shift = kStencilOp_Flag_Add_Shift + 2,
kStencilOp_Flag_Invert_Shift,
kStencilOp_Flag_NewMask_Shift,
kStencilOp_Flag_CurrentMask = 1u << kStencilOp_Flag_CurrentMask_Shift,
kStencilOp_Flag_Invert = 1u << kStencilOp_Flag_Invert_Shift,
kStencilOp_Flag_Increment = 1u << kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Decrement = 3u << kStencilOp_Flag_Add_Shift,
kStencilOp_Flag_Saturate = 1u << kStencilOp_Flag_Saturate_Shift,
kStencilOp_Flag_Invert = 1u << kStencilOp_Flag_Invert_Shift,
kStencilOp_Flag_NewMask = 1u << kStencilOp_Flag_NewMask_Shift,
kStencilOp_Keep = kStencilOp_Flag_CurrentMask,