[D3D12] ROV: Track which RTs and components have been actually written

This commit is contained in:
Triang3l 2018-10-18 14:54:33 +03:00
parent 1860bc6a59
commit f48ea20880
4 changed files with 186 additions and 61 deletions

View File

@ -1887,26 +1887,19 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
80; 80;
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
system_constants_.edram_pitch_tiles = edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles;
static const uint32_t kRTFormatAllComponentsMask[16] = {
0b1111, 0b1111, 0b1111, 0b1111, 0b0011, 0b1111, 0b0011, 0b1111,
0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b0000, 0b0001, 0b0011,
};
uint32_t rt_mask_all = kRTFormatAllComponentsMask[uint32_t(color_format)];
uint32_t rt_mask = (rb_color_mask >> (i * 4)) & rt_mask_all;
uint32_t rt_flags = uint32_t rt_flags =
DxbcShaderTranslator::GetColorFormatRTFlags(color_format); DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
// Exclude unused components from the write mask.
uint32_t rt_mask =
(rb_color_mask >> (i * 4)) & 0xF &
~(rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift);
if (rt_mask != 0) { if (rt_mask != 0) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Used | rt_flags |= rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift;
(rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift);
if (rt_mask != rt_mask_all) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Load;
}
uint32_t blend_x, blend_y; uint32_t blend_x, blend_y;
if (colorcontrol_blend_enable && if (colorcontrol_blend_enable &&
DxbcShaderTranslator::GetBlendConstants(blend_control, blend_x, DxbcShaderTranslator::GetBlendConstants(blend_control, blend_x,
blend_y)) { blend_y)) {
rt_flags |= DxbcShaderTranslator::kRTFlag_Load | rt_flags |= DxbcShaderTranslator::kRTFlag_Blend;
DxbcShaderTranslator::kRTFlag_Blend;
uint32_t rt_pair_index = i >> 1; uint32_t rt_pair_index = i >> 1;
uint32_t rt_pair_comp = (i & 1) << 1; uint32_t rt_pair_comp = (i & 1) << 1;
if (system_constants_ if (system_constants_

View File

@ -83,29 +83,33 @@ uint32_t DxbcShaderTranslator::GetColorFormatRTFlags(
// k_2_10_10_10_FLOAT // k_2_10_10_10_FLOAT
kRTFlag_FormatFloat10, kRTFlag_FormatFloat10,
// k_16_16 // k_16_16
kRTFlag_FormatFixed, kRTFlag_FormatFixed | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
// k_16_16_16_16 // k_16_16_16_16
kRTFlag_FormatFixed, kRTFlag_FormatFixed,
// k_16_16_FLOAT // k_16_16_FLOAT
kRTFlag_FormatFloat16, kRTFlag_FormatFloat16 | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
// k_16_16_16_16_FLOAT // k_16_16_16_16_FLOAT
kRTFlag_FormatFloat16, kRTFlag_FormatFloat16,
// Unused // Unused
0, kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
kRTFlag_FormatUnusedA,
// Unused // Unused
0, kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
kRTFlag_FormatUnusedA,
// k_2_10_10_10_AS_16_16_16_16 // k_2_10_10_10_AS_16_16_16_16
kRTFlag_FormatFixed, kRTFlag_FormatFixed,
// Unused. // Unused.
0, kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
kRTFlag_FormatUnusedA,
// k_2_10_10_10_FLOAT_AS_16_16_16_16 // k_2_10_10_10_FLOAT_AS_16_16_16_16
kRTFlag_FormatFloat10, kRTFlag_FormatFloat10,
// Unused. // Unused.
0, kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
kRTFlag_FormatUnusedA,
// k_32_FLOAT // k_32_FLOAT
0, kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
// k_32_32_FLOAT // k_32_32_FLOAT
0, kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
}; };
return kRTFormatFlags[uint32_t(format)]; return kRTFormatFlags[uint32_t(format)];
} }
@ -1054,10 +1058,15 @@ void DxbcShaderTranslator::StartTranslation() {
} else if (IsDXBCPixelShader()) { } else if (IsDXBCPixelShader()) {
if (!is_depth_only_pixel_shader_) { if (!is_depth_only_pixel_shader_) {
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
system_temp_color_[i] = PushSystemTemp(true); // In the ROV path, no need to initialize the colors because original
// values will be kept for the unwritten components.
system_temp_color_[i] = PushSystemTemp(!edram_rov_used_);
} }
} }
if (edram_rov_used_) { if (edram_rov_used_) {
if (!is_depth_only_pixel_shader_) {
system_temp_color_written_ = PushSystemTemp(true);
}
system_temp_depth_ = PushSystemTemp(); system_temp_depth_ = PushSystemTemp();
} }
} }
@ -3242,6 +3251,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
} }
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
bool color_targets_written;
if (is_depth_only_pixel_shader_) {
color_targets_written = false;
} else {
color_targets_written = writes_color_target(0) || writes_color_target(1) ||
writes_color_target(2) || writes_color_target(3);
}
// *************************************************************************** // ***************************************************************************
// Calculate the offsets of the samples in the EDRAM. // Calculate the offsets of the samples in the EDRAM.
// *************************************************************************** // ***************************************************************************
@ -3399,7 +3416,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Calculate the address in the EDRAM buffer. // Calculate the address in the EDRAM buffer.
if (!is_depth_only_pixel_shader_) { if (color_targets_written) {
// 1a) Get dword offset within the tile to edram_coord_low_temp.x. // 1a) Get dword offset within the tile to edram_coord_low_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
@ -3437,7 +3454,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
if (!is_depth_only_pixel_shader_) { if (color_targets_written) {
// 2a) Combine the tile offset and the offset within the tile to // 2a) Combine the tile offset and the offset within the tile to
// edram_coord_low_temp.x. // edram_coord_low_temp.x.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
@ -3481,7 +3498,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
uint32_t edram_coord_high_temp = 0; uint32_t edram_coord_high_temp = 0;
if (!is_depth_only_pixel_shader_) { if (color_targets_written) {
edram_coord_high_temp = PushSystemTemp(); edram_coord_high_temp = PushSystemTemp();
// Get which render targets are 64bpp, as log2 of dword count per pixel. // Get which render targets are 64bpp, as log2 of dword count per pixel.
@ -3990,16 +4007,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Write to color render targets. // Write to color render targets.
// *************************************************************************** // ***************************************************************************
if (!is_depth_only_pixel_shader_) { if (color_targets_written) {
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index; system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
// Get what render targets need to be written to. // Mask disabled color writes.
uint32_t rt_used_temp = PushSystemTemp(); uint32_t rt_write_masks_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_used_temp); shader_code_.push_back(rt_write_masks_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_); shader_code_.push_back(cbuffer_index_system_constants_);
@ -4007,33 +4024,27 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec); shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kRTFlag_Used); shader_code_.push_back(kRTFlag_WriteR_Shift);
shader_code_.push_back(kRTFlag_Used); shader_code_.push_back(kRTFlag_WriteR_Shift);
shader_code_.push_back(kRTFlag_Used); shader_code_.push_back(kRTFlag_WriteR_Shift);
shader_code_.push_back(kRTFlag_Used); shader_code_.push_back(kRTFlag_WriteR_Shift);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Get what render targets need to be read (for write masks and blending).
uint32_t rt_load_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_load_temp); shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(cbuffer_index_system_constants_); shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(kRTFlag_Load); shader_code_.push_back(rt_write_masks_temp);
shader_code_.push_back(kRTFlag_Load);
shader_code_.push_back(kRTFlag_Load);
shader_code_.push_back(kRTFlag_Load);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Release rt_write_masks_temp.
PopSystemTemp();
// Get what render targets need blending (if only write mask is used and no // Get what render targets need blending (if only write mask is used and no
// blending, skip blending). // blending, skip blending).
@ -4057,7 +4068,92 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Get what render targets need to be read (for write mask and blending).
uint32_t rt_overwritten_temp = PushSystemTemp();
// First, ignore components that don't exist in the render target at all -
// treat them as overwritten.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_overwritten_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(4);
shader_code_.push_back(4);
shader_code_.push_back(4);
shader_code_.push_back(4);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_overwritten_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(rt_overwritten_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Then, check if the write mask + unused components is 1111 - if yes (and
// not blending), the pixel will be totally overwritten and no need to load
// the old pixel value.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_overwritten_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(rt_overwritten_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0b1111);
shader_code_.push_back(0b1111);
shader_code_.push_back(0b1111);
shader_code_.push_back(0b1111);
++stat_.instruction_count;
++stat_.int_instruction_count;
// Force load the previous pixel if blending.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_overwritten_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(rt_blend_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(rt_overwritten_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
if (!writes_color_target(i)) {
continue;
}
// Check if the render target needs to be written to. // Check if the render target needs to be written to.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
@ -4065,7 +4161,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(rt_used_temp); shader_code_.push_back(system_temp_color_written_);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
@ -4077,11 +4173,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// write mask. // write mask.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) | D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(rt_load_temp); shader_code_.push_back(rt_overwritten_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
uint32_t dest_color_temp = PushSystemTemp(); uint32_t dest_color_temp = PushSystemTemp();
@ -4165,12 +4261,12 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count; ++stat_.instruction_count;
} }
// Release rt_used_temp, rt_load_temp and rt_blend_temp. // Release rt_blend_temp and rt_overwritten_temp.
PopSystemTemp(3); PopSystemTemp(2);
} }
// Release edram_coord_low_temp and, if used, edram_coord_high_temp. // Release edram_coord_low_temp and, if used, edram_coord_high_temp.
PopSystemTemp(is_depth_only_pixel_shader_ ? 1 : 2); PopSystemTemp(color_targets_written ? 2 : 1);
} }
void DxbcShaderTranslator::CompletePixelShader() { void DxbcShaderTranslator::CompletePixelShader() {
@ -4476,6 +4572,10 @@ void DxbcShaderTranslator::CompleteShaderCode() {
if (edram_rov_used_) { if (edram_rov_used_) {
// Release system_temp_depth_. // Release system_temp_depth_.
PopSystemTemp(); PopSystemTemp();
if (!is_depth_only_pixel_shader_) {
// Release system_temp_color_written_.
PopSystemTemp();
}
} }
if (!is_depth_only_pixel_shader_) { if (!is_depth_only_pixel_shader_) {
// Release system_temp_color_. // Release system_temp_color_.
@ -5460,6 +5560,30 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
} }
PopSystemTemp(2); PopSystemTemp(2);
} }
if (edram_rov_used_ &&
result.storage_target == InstructionStorageTarget::kColorTarget) {
// For ROV output, mark that the color has been written to.
// According to:
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
// if a color target has been written to - including due to flow control -
// the render target must not be modified (the unwritten components of a
// written target are undefined, but let's keep the original value in this
// case).
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1 << uint32_t(result.storage_index), 1));
shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, uint32_t(result.storage_index), 1));
shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(swizzle_mask | constant_mask);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
} }
void DxbcShaderTranslator::ClosePredicate() { void DxbcShaderTranslator::ClosePredicate() {

View File

@ -77,17 +77,19 @@ class DxbcShaderTranslator : public ShaderTranslator {
}; };
enum : uint32_t { enum : uint32_t {
// Whether the write mask is non-zero.
kRTFlag_Used_Shift,
// Whether the render target needs to be merged with another (if the write // Whether the render target needs to be merged with another (if the write
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is // mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
// enabled and it's not no-op). // enabled and it's not no-op).
kRTFlag_Load_Shift,
kRTFlag_Blend_Shift,
kRTFlag_WriteR_Shift, kRTFlag_WriteR_Shift,
kRTFlag_WriteG_Shift, kRTFlag_WriteG_Shift,
kRTFlag_WriteB_Shift, kRTFlag_WriteB_Shift,
kRTFlag_WriteA_Shift, kRTFlag_WriteA_Shift,
kRTFlag_Blend_Shift,
// Whether the component does not exist in the render target format.
kRTFlag_FormatUnusedR_Shift,
kRTFlag_FormatUnusedG_Shift,
kRTFlag_FormatUnusedB_Shift,
kRTFlag_FormatUnusedA_Shift,
// Whether the format is fixed-point and needs to be converted to integer // Whether the format is fixed-point and needs to be converted to integer
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16). // (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
kRTFlag_FormatFixed_Shift, kRTFlag_FormatFixed_Shift,
@ -97,13 +99,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
// f16tof32/f32tof16 is needed. // f16tof32/f32tof16 is needed.
kRTFlag_FormatFloat16_Shift, kRTFlag_FormatFloat16_Shift,
kRTFlag_Used = 1u << kRTFlag_Used_Shift,
kRTFlag_Load = 1u << kRTFlag_Load_Shift,
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift, kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift,
kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift, kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift,
kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift, kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift,
kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift, kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift,
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
kRTFlag_FormatUnusedR = 1u << kRTFlag_FormatUnusedR_Shift,
kRTFlag_FormatUnusedG = 1u << kRTFlag_FormatUnusedG_Shift,
kRTFlag_FormatUnusedB = 1u << kRTFlag_FormatUnusedB_Shift,
kRTFlag_FormatUnusedA = 1u << kRTFlag_FormatUnusedA_Shift,
kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift, kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift,
kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift, kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift,
kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift, kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift,
@ -919,6 +923,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Color outputs in pixel shaders (because of exponent bias, alpha test and // Color outputs in pixel shaders (because of exponent bias, alpha test and
// remapping). // remapping).
uint32_t system_temp_color_[4]; uint32_t system_temp_color_[4];
// Whether the color output has been written in the execution path (ROV only).
uint32_t system_temp_color_written_;
// Depth output in pixel shader, and 3 dwords usable as scratch for operations // Depth output in pixel shader, and 3 dwords usable as scratch for operations
// related to depth. Currently only used for ROV depth. // related to depth. Currently only used for ROV depth.
// TODO(Triang3l): Reduce depth to 24-bit in pixel shaders when using a DSV // TODO(Triang3l): Reduce depth to 24-bit in pixel shaders when using a DSV

View File

@ -53,6 +53,8 @@ class ShaderTranslator {
bool uses_register_dynamic_addressing() const { bool uses_register_dynamic_addressing() const {
return uses_register_dynamic_addressing_; return uses_register_dynamic_addressing_;
} }
// True if the current shader writes to a color target on any execution path.
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
// A list of all vertex bindings, populated before translation occurs. // A list of all vertex bindings, populated before translation occurs.
const std::vector<Shader::VertexBinding>& vertex_bindings() const { const std::vector<Shader::VertexBinding>& vertex_bindings() const {
return vertex_bindings_; return vertex_bindings_;