[D3D12] ROV: Used render targets flags
This commit is contained in:
parent
3dc15dbb44
commit
013087108b
|
@ -1793,7 +1793,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.alpha_test != alpha_test;
|
||||
system_constants_.alpha_test = alpha_test;
|
||||
|
||||
// Color exponent bias and output index mapping.
|
||||
// Color exponent bias and output index mapping or ROV writing.
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info;
|
||||
switch (i) {
|
||||
|
@ -1841,9 +1842,59 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
80;
|
||||
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
||||
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
||||
static const uint32_t kRTFormatFlags[16] = {
|
||||
// k_8_8_8_8
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// k_8_8_8_8_GAMMA
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// k_2_10_10_10
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// k_2_10_10_10_FLOAT
|
||||
DxbcShaderTranslator::kRTFlag_FormatFloat10,
|
||||
// k_16_16
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// k_16_16_16_16
|
||||
DxbcShaderTranslator::kRTFlag_Format64bpp |
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// k_16_16_FLOAT
|
||||
DxbcShaderTranslator::kRTFlag_FormatFloat16,
|
||||
// k_16_16_16_16_FLOAT
|
||||
DxbcShaderTranslator::kRTFlag_Format64bpp |
|
||||
DxbcShaderTranslator::kRTFlag_FormatFloat16,
|
||||
// Unused
|
||||
0,
|
||||
// Unused
|
||||
0,
|
||||
// k_2_10_10_10_AS_16_16_16_16
|
||||
DxbcShaderTranslator::kRTFlag_FormatFixed,
|
||||
// Unused.
|
||||
0,
|
||||
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
||||
DxbcShaderTranslator::kRTFlag_FormatFloat10,
|
||||
// Unused.
|
||||
0,
|
||||
// k_32_FLOAT
|
||||
0,
|
||||
// k_32_32_FLOAT
|
||||
DxbcShaderTranslator::kRTFlag_Format64bpp,
|
||||
};
|
||||
static const uint32_t kRTFormatAllComponentsMask[16] = {
|
||||
0b1111, 0b1111, 0b1111, 0b1111, 0b0011, 0b1111, 0b0011, 0b1111,
|
||||
0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b0000, 0b0001, 0b0011,
|
||||
};
|
||||
uint32_t rt_mask_all = kRTFormatAllComponentsMask[uint32_t(color_format)];
|
||||
uint32_t rt_mask = (rb_color_mask >> (i * 4)) & rt_mask_all;
|
||||
uint32_t rt_flags = kRTFormatFlags[uint32_t(color_format)];
|
||||
if (rt_mask != 0) {
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_Used;
|
||||
if (rt_mask != rt_mask_all) {
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_LoadingNeeded;
|
||||
}
|
||||
}
|
||||
dirty |= system_constants_.edram_rt_flags[i] != rt_flags;
|
||||
system_constants_.edram_rt_flags[i] = rt_flags;
|
||||
if (system_constants_color_formats_[i] != color_format) {
|
||||
dirty = true;
|
||||
uint32_t rt_flags = 0;
|
||||
// Initialize min/max to Infinity.
|
||||
uint32_t color_min = 0xFF800000u, alpha_min = 0xFF800000u;
|
||||
uint32_t color_max = 0x7F800000u, alpha_max = 0x7F800000u;
|
||||
|
@ -1851,7 +1902,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
switch (color_format) {
|
||||
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
|
||||
system_constants_.edram_rt_pack_width_low[i][0] = 8;
|
||||
system_constants_.edram_rt_pack_width_low[i][1] = 8;
|
||||
system_constants_.edram_rt_pack_width_low[i][2] = 8;
|
||||
|
@ -1866,7 +1916,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
break;
|
||||
case ColorRenderTargetFormat::k_2_10_10_10:
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
|
||||
system_constants_.edram_rt_pack_width_low[i][0] = 10;
|
||||
system_constants_.edram_rt_pack_width_low[i][1] = 10;
|
||||
system_constants_.edram_rt_pack_width_low[i][2] = 10;
|
||||
|
@ -1883,7 +1932,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
break;
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFloat10;
|
||||
system_constants_.edram_rt_pack_width_low[i][0] = 10;
|
||||
system_constants_.edram_rt_pack_width_low[i][1] = 10;
|
||||
system_constants_.edram_rt_pack_width_low[i][2] = 10;
|
||||
|
@ -1900,7 +1948,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
break;
|
||||
case ColorRenderTargetFormat::k_16_16:
|
||||
case ColorRenderTargetFormat::k_16_16_16_16:
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFixed;
|
||||
system_constants_.edram_rt_pack_width_low[i][0] = 16;
|
||||
system_constants_.edram_rt_pack_width_low[i][1] = 16;
|
||||
system_constants_.edram_rt_pack_width_low[i][2] = 0;
|
||||
|
@ -1918,7 +1965,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
break;
|
||||
case ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||
rt_flags |= DxbcShaderTranslator::kRTFlag_FormatFloat16;
|
||||
system_constants_.edram_rt_pack_width_low[i][0] = 16;
|
||||
system_constants_.edram_rt_pack_width_low[i][1] = 16;
|
||||
system_constants_.edram_rt_pack_width_low[i][2] = 0;
|
||||
|
@ -1945,7 +1991,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
assert_always();
|
||||
break;
|
||||
}
|
||||
system_constants_.edram_rt_flags[i] = rt_flags;
|
||||
uint32_t rt_pair_index = i >> 1;
|
||||
uint32_t rt_pair_comp = (i & 1) << 1;
|
||||
system_constants_
|
||||
|
|
|
@ -1355,14 +1355,80 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.int_instruction_count;
|
||||
|
||||
// ***************************************************************************
|
||||
// Test pixel writing.
|
||||
// Write to color render targets.
|
||||
// ***************************************************************************
|
||||
|
||||
CompletePixelShader_WriteToROV_StoreColor(edram_coord_temp, 0,
|
||||
system_temp_color_[0]);
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||
|
||||
// Release edram_coord_temp.
|
||||
PopSystemTemp();
|
||||
// Get what render targets need to be written to.
|
||||
uint32_t rt_used_temp = PushSystemTemp();
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(rt_used_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(kRTFlag_Used);
|
||||
shader_code_.push_back(kRTFlag_Used);
|
||||
shader_code_.push_back(kRTFlag_Used);
|
||||
shader_code_.push_back(kRTFlag_Used);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Get what render targets need to be read (for write masks and blending).
|
||||
uint32_t rt_loading_needed_temp = PushSystemTemp();
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(rt_loading_needed_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(kRTFlag_LoadingNeeded);
|
||||
shader_code_.push_back(kRTFlag_LoadingNeeded);
|
||||
shader_code_.push_back(kRTFlag_LoadingNeeded);
|
||||
shader_code_.push_back(kRTFlag_LoadingNeeded);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
// In case of overlap, the render targets with the lower index have higher
|
||||
// priority since they usually have the most important value.
|
||||
uint32_t rt_index = 3 - i;
|
||||
|
||||
// Check if the render target needs to be written to.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, rt_index, 1));
|
||||
shader_code_.push_back(rt_used_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
CompletePixelShader_WriteToROV_StoreColor(edram_coord_temp, rt_index,
|
||||
system_temp_color_[rt_index]);
|
||||
|
||||
// Close the check whether the RT is used.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
}
|
||||
|
||||
// Release rt_used_temp, rt_loading_needed_temp and edram_coord_temp.
|
||||
PopSystemTemp(3);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader() {
|
||||
|
|
|
@ -46,9 +46,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
|
||||
enum : uint32_t {
|
||||
// Whether the write mask is non-zero.
|
||||
kRTFlag_Used = 1,
|
||||
// Whether the render target needs to be merged with another (if the write
|
||||
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
|
||||
// enabled and it's not no-op).
|
||||
kRTFlag_LoadingNeeded = kRTFlag_Used << 1,
|
||||
// Whether the format is represented by 2 dwords.
|
||||
kRTFlag_Format64bpp = kRTFlag_Used << 1,
|
||||
kRTFlag_Format64bpp = kRTFlag_LoadingNeeded << 1,
|
||||
// Whether the format is fixed-point and needs to be converted to integer
|
||||
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
|
||||
kRTFlag_FormatFixed = kRTFlag_Format64bpp << 1,
|
||||
|
|
Loading…
Reference in New Issue