[D3D12] ROV: Write mask, write in ascending RT number order

This commit is contained in:
Triang3l 2018-10-16 20:50:01 +03:00
parent e8414da5dd
commit 37fe120fe0
3 changed files with 58 additions and 18 deletions

View File

@ -1880,7 +1880,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t rt_flags = uint32_t rt_flags =
DxbcShaderTranslator::GetColorFormatRTFlags(color_format); DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
if (rt_mask != 0) { if (rt_mask != 0) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Used; rt_flags |= DxbcShaderTranslator::kRTFlag_Used |
(rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift);
if (rt_mask != rt_mask_all) { if (rt_mask != rt_mask_all) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Load; rt_flags |= DxbcShaderTranslator::kRTFlag_Load;
} }

View File

@ -3859,25 +3859,20 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
// In case of overlap, the render targets with the lower index have higher
// priority since they usually have the most important value.
uint32_t rt_index = 3 - i;
// Check if the render target needs to be written to. // Check if the render target needs to be written to.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) | D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(rt_used_temp); shader_code_.push_back(rt_used_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
// Clamp the color (the source value) before blending. // Clamp the color (the source value) before blending.
// https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend // https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend
CompletePixelShader_WriteToROV_ClampColor(rt_index, CompletePixelShader_WriteToROV_ClampColor(i, system_temp_color_[i]);
system_temp_color_[rt_index]);
// Load the previous value in the render target to blend and to apply the // Load the previous value in the render target to blend and to apply the
// write mask. // write mask.
@ -3886,14 +3881,13 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
D3D10_SB_INSTRUCTION_TEST_NONZERO) | D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(rt_load_temp); shader_code_.push_back(rt_load_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
uint32_t dest_color_temp = PushSystemTemp(); uint32_t dest_color_temp = PushSystemTemp();
CompletePixelShader_WriteToROV_LoadColor(edram_coord_low_temp, CompletePixelShader_WriteToROV_LoadColor(
edram_coord_high_temp, rt_index, edram_coord_low_temp, edram_coord_high_temp, i, dest_color_temp);
dest_color_temp);
// Blend if needed. // Blend if needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
@ -3901,18 +3895,55 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
D3D10_SB_INSTRUCTION_TEST_NONZERO) | D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(rt_blend_temp); shader_code_.push_back(rt_blend_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
CompletePixelShader_WriteToROV_Blend( CompletePixelShader_WriteToROV_Blend(i, system_temp_color_[i],
rt_index, system_temp_color_[rt_index], dest_color_temp); dest_color_temp);
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count; ++stat_.instruction_count;
// TODO(Triang3l): Apply the write mask. // Mask the components to overwrite.
uint32_t write_mask_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(write_mask_temp);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kRTFlag_WriteR);
shader_code_.push_back(kRTFlag_WriteG);
shader_code_.push_back(kRTFlag_WriteB);
shader_code_.push_back(kRTFlag_WriteA);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_color_[i]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(write_mask_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_color_[i]);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(dest_color_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Release write_mask_temp.
PopSystemTemp();
// Release dest_color_temp. // Release dest_color_temp.
PopSystemTemp(); PopSystemTemp();
@ -3925,8 +3956,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Write the new color, which may have been modified by blending. // Write the new color, which may have been modified by blending.
CompletePixelShader_WriteToROV_StoreColor(edram_coord_low_temp, CompletePixelShader_WriteToROV_StoreColor(edram_coord_low_temp,
edram_coord_high_temp, rt_index, edram_coord_high_temp, i,
system_temp_color_[rt_index]); system_temp_color_[i]);
// Close the check whether the RT is used. // Close the check whether the RT is used.
shader_code_.push_back( shader_code_.push_back(

View File

@ -81,6 +81,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
// enabled and it's not no-op). // enabled and it's not no-op).
kRTFlag_Load_Shift, kRTFlag_Load_Shift,
kRTFlag_Blend_Shift, kRTFlag_Blend_Shift,
kRTFlag_WriteR_Shift,
kRTFlag_WriteG_Shift,
kRTFlag_WriteB_Shift,
kRTFlag_WriteA_Shift,
// Whether the format is fixed-point and needs to be converted to integer // Whether the format is fixed-point and needs to be converted to integer
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16). // (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
kRTFlag_FormatFixed_Shift, kRTFlag_FormatFixed_Shift,
@ -93,6 +97,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
kRTFlag_Used = 1u << kRTFlag_Used_Shift, kRTFlag_Used = 1u << kRTFlag_Used_Shift,
kRTFlag_Load = 1u << kRTFlag_Load_Shift, kRTFlag_Load = 1u << kRTFlag_Load_Shift,
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift, kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift,
kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift,
kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift,
kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift,
kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift, kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift,
kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift, kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift,
kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift, kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift,