[D3D12] ROV: Used render targets flags

This commit is contained in:
Triang3l 2018-10-12 12:32:48 +03:00
parent 3dc15dbb44
commit 013087108b
3 changed files with 130 additions and 14 deletions

View File

@ -1793,7 +1793,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.alpha_test != alpha_test;
system_constants_.alpha_test = alpha_test;
// Color exponent bias and output index mapping.
// Color exponent bias and output index mapping or ROV writing.
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info;
switch (i) {
@ -1841,9 +1842,59 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
80;
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
static const uint32_t kRTFormatFlags[16] = {
// k_8_8_8_8
DxbcShaderTranslator::kRTFlag_FormatFixed,
// k_8_8_8_8_GAMMA
DxbcShaderTranslator::kRTFlag_FormatFixed,
// k_2_10_10_10
DxbcShaderTranslator::kRTFlag_FormatFixed,
// k_2_10_10_10_FLOAT
DxbcShaderTranslator::kRTFlag_FormatFloat10,
// k_16_16
DxbcShaderTranslator::kRTFlag_FormatFixed,
// k_16_16_16_16
DxbcShaderTranslator::kRTFlag_Format64bpp |
DxbcShaderTranslator::kRTFlag_FormatFixed,
// k_16_16_FLOAT
DxbcShaderTranslator::kRTFlag_FormatFloat16,
// k_16_16_16_16_FLOAT
DxbcShaderTranslator::kRTFlag_Format64bpp |
DxbcShaderTranslator::kRTFlag_FormatFloat16,
// Unused
0,
// Unused
0,
// k_2_10_10_10_AS_16_16_16_16
DxbcShaderTranslator::kRTFlag_FormatFixed,
// Unused.
0,
// k_2_10_10_10_FLOAT_AS_16_16_16_16
DxbcShaderTranslator::kRTFlag_FormatFloat10,
// Unused.
0,
// k_32_FLOAT
0,
// k_32_32_FLOAT
DxbcShaderTranslator::kRTFlag_Format64bpp,
};
static const uint32_t kRTFormatAllComponentsMask[16] = {
0b1111, 0b1111, 0b1111, 0b1111, 0b0011, 0b1111, 0b0011, 0b1111,
0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b0000, 0b0001, 0b0011,
};
uint32_t rt_mask_all = kRTFormatAllComponentsMask[uint32_t(color_format)];
uint32_t rt_mask = (rb_color_mask >> (i * 4)) & rt_mask_all;
uint32_t rt_flags = kRTFormatFlags[uint32_t(color_format)];
if (rt_mask != 0) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Used;
if (rt_mask != rt_mask_all) {
rt_flags |= DxbcShaderTranslator::kRTFlag_LoadingNeeded;
}
}
dirty |= system_constants_.edram_rt_flags[i] != rt_flags;
system_constants_.edram_rt_flags[i] = rt_flags;
if (system_constants_color_formats_[i] != color_format) {
dirty = true;
uint32_t rt_flags = 0;
// Initialize min/max to Infinity.
uint32_t color_min = 0xFF800000u, alpha_min = 0xFF800000u;
uint32_t color_max = 0x7F800000u, alpha_max = 0x7F800000u;
@ -1851,7 +1902,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
switch (color_format) {
case ColorRenderTargetFormat::k_8_8_8_8:
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
system_constants_.edram_rt_pack_width_low[i][0] = 8;
system_constants_.edram_rt_pack_width_low[i][1] = 8;
system_constants_.edram_rt_pack_width_low[i][2] = 8;
@ -1866,7 +1916,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
break;
case ColorRenderTargetFormat::k_2_10_10_10:
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
system_constants_.edram_rt_pack_width_low[i][0] = 10;
system_constants_.edram_rt_pack_width_low[i][1] = 10;
system_constants_.edram_rt_pack_width_low[i][2] = 10;
@ -1883,7 +1932,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
break;
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFloat10;
system_constants_.edram_rt_pack_width_low[i][0] = 10;
system_constants_.edram_rt_pack_width_low[i][1] = 10;
system_constants_.edram_rt_pack_width_low[i][2] = 10;
@ -1900,7 +1948,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
break;
case ColorRenderTargetFormat::k_16_16:
case ColorRenderTargetFormat::k_16_16_16_16:
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
system_constants_.edram_rt_pack_width_low[i][0] = 16;
system_constants_.edram_rt_pack_width_low[i][1] = 16;
system_constants_.edram_rt_pack_width_low[i][2] = 0;
@ -1918,7 +1965,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
break;
case ColorRenderTargetFormat::k_16_16_FLOAT:
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFloat16;
system_constants_.edram_rt_pack_width_low[i][0] = 16;
system_constants_.edram_rt_pack_width_low[i][1] = 16;
system_constants_.edram_rt_pack_width_low[i][2] = 0;
@ -1945,7 +1991,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
assert_always();
break;
}
system_constants_.edram_rt_flags[i] = rt_flags;
uint32_t rt_pair_index = i >> 1;
uint32_t rt_pair_comp = (i & 1) << 1;
system_constants_

View File

@ -1355,14 +1355,80 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.int_instruction_count;
// ***************************************************************************
// Test pixel writing.
// Write to color render targets.
// ***************************************************************************
CompletePixelShader_WriteToROV_StoreColor(edram_coord_temp, 0,
system_temp_color_[0]);
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
// Release edram_coord_temp.
PopSystemTemp();
// Get what render targets need to be written to.
uint32_t rt_used_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_used_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kRTFlag_Used);
shader_code_.push_back(kRTFlag_Used);
shader_code_.push_back(kRTFlag_Used);
shader_code_.push_back(kRTFlag_Used);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Get what render targets need to be read (for write masks and blending).
uint32_t rt_loading_needed_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_loading_needed_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kRTFlag_LoadingNeeded);
shader_code_.push_back(kRTFlag_LoadingNeeded);
shader_code_.push_back(kRTFlag_LoadingNeeded);
shader_code_.push_back(kRTFlag_LoadingNeeded);
++stat_.instruction_count;
++stat_.uint_instruction_count;
for (uint32_t i = 0; i < 4; ++i) {
// In case of overlap, the render targets with the lower index have higher
// priority since they usually have the most important value.
uint32_t rt_index = 3 - i;
// Check if the render target needs to be written to.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
shader_code_.push_back(rt_used_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
CompletePixelShader_WriteToROV_StoreColor(edram_coord_temp, rt_index,
system_temp_color_[rt_index]);
// Close the check whether the RT is used.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
// Release rt_used_temp, rt_loading_needed_temp and edram_coord_temp.
PopSystemTemp(3);
}
void DxbcShaderTranslator::CompletePixelShader() {

View File

@ -46,9 +46,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
enum : uint32_t {
// Whether the write mask is non-zero.
kRTFlag_Used = 1,
// Whether the render target needs to be merged with another (if the write
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
// enabled and it's not no-op).
kRTFlag_LoadingNeeded = kRTFlag_Used << 1,
// Whether the format is represented by 2 dwords.
kRTFlag_Format64bpp = kRTFlag_Used << 1,
kRTFlag_Format64bpp = kRTFlag_LoadingNeeded << 1,
// Whether the format is fixed-point and needs to be converted to integer
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
kRTFlag_FormatFixed = kRTFlag_Format64bpp << 1,