[D3D12] ROV: Track which RTs and components have been actually written
This commit is contained in:
parent
1860bc6a59
commit
f48ea20880
|
@ -1887,26 +1887,19 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
80;
|
80;
|
||||||
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
||||||
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
||||||
static const uint32_t kRTFormatAllComponentsMask[16] = {
|
|
||||||
0b1111, 0b1111, 0b1111, 0b1111, 0b0011, 0b1111, 0b0011, 0b1111,
|
|
||||||
0b0000, 0b0000, 0b1111, 0b0000, 0b1111, 0b0000, 0b0001, 0b0011,
|
|
||||||
};
|
|
||||||
uint32_t rt_mask_all = kRTFormatAllComponentsMask[uint32_t(color_format)];
|
|
||||||
uint32_t rt_mask = (rb_color_mask >> (i * 4)) & rt_mask_all;
|
|
||||||
uint32_t rt_flags =
|
uint32_t rt_flags =
|
||||||
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
|
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
|
||||||
|
// Exclude unused components from the write mask.
|
||||||
|
uint32_t rt_mask =
|
||||||
|
(rb_color_mask >> (i * 4)) & 0xF &
|
||||||
|
~(rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift);
|
||||||
if (rt_mask != 0) {
|
if (rt_mask != 0) {
|
||||||
rt_flags |= DxbcShaderTranslator::kRTFlag_Used |
|
rt_flags |= rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift;
|
||||||
(rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift);
|
|
||||||
if (rt_mask != rt_mask_all) {
|
|
||||||
rt_flags |= DxbcShaderTranslator::kRTFlag_Load;
|
|
||||||
}
|
|
||||||
uint32_t blend_x, blend_y;
|
uint32_t blend_x, blend_y;
|
||||||
if (colorcontrol_blend_enable &&
|
if (colorcontrol_blend_enable &&
|
||||||
DxbcShaderTranslator::GetBlendConstants(blend_control, blend_x,
|
DxbcShaderTranslator::GetBlendConstants(blend_control, blend_x,
|
||||||
blend_y)) {
|
blend_y)) {
|
||||||
rt_flags |= DxbcShaderTranslator::kRTFlag_Load |
|
rt_flags |= DxbcShaderTranslator::kRTFlag_Blend;
|
||||||
DxbcShaderTranslator::kRTFlag_Blend;
|
|
||||||
uint32_t rt_pair_index = i >> 1;
|
uint32_t rt_pair_index = i >> 1;
|
||||||
uint32_t rt_pair_comp = (i & 1) << 1;
|
uint32_t rt_pair_comp = (i & 1) << 1;
|
||||||
if (system_constants_
|
if (system_constants_
|
||||||
|
|
|
@ -83,29 +83,33 @@ uint32_t DxbcShaderTranslator::GetColorFormatRTFlags(
|
||||||
// k_2_10_10_10_FLOAT
|
// k_2_10_10_10_FLOAT
|
||||||
kRTFlag_FormatFloat10,
|
kRTFlag_FormatFloat10,
|
||||||
// k_16_16
|
// k_16_16
|
||||||
kRTFlag_FormatFixed,
|
kRTFlag_FormatFixed | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
|
||||||
// k_16_16_16_16
|
// k_16_16_16_16
|
||||||
kRTFlag_FormatFixed,
|
kRTFlag_FormatFixed,
|
||||||
// k_16_16_FLOAT
|
// k_16_16_FLOAT
|
||||||
kRTFlag_FormatFloat16,
|
kRTFlag_FormatFloat16 | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
|
||||||
// k_16_16_16_16_FLOAT
|
// k_16_16_16_16_FLOAT
|
||||||
kRTFlag_FormatFloat16,
|
kRTFlag_FormatFloat16,
|
||||||
// Unused
|
// Unused
|
||||||
0,
|
kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
|
||||||
|
kRTFlag_FormatUnusedA,
|
||||||
// Unused
|
// Unused
|
||||||
0,
|
kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
|
||||||
|
kRTFlag_FormatUnusedA,
|
||||||
// k_2_10_10_10_AS_16_16_16_16
|
// k_2_10_10_10_AS_16_16_16_16
|
||||||
kRTFlag_FormatFixed,
|
kRTFlag_FormatFixed,
|
||||||
// Unused.
|
// Unused.
|
||||||
0,
|
kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
|
||||||
|
kRTFlag_FormatUnusedA,
|
||||||
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
||||||
kRTFlag_FormatFloat10,
|
kRTFlag_FormatFloat10,
|
||||||
// Unused.
|
// Unused.
|
||||||
0,
|
kRTFlag_FormatUnusedR | kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB |
|
||||||
|
kRTFlag_FormatUnusedA,
|
||||||
// k_32_FLOAT
|
// k_32_FLOAT
|
||||||
0,
|
kRTFlag_FormatUnusedG | kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
|
||||||
// k_32_32_FLOAT
|
// k_32_32_FLOAT
|
||||||
0,
|
kRTFlag_FormatUnusedB | kRTFlag_FormatUnusedA,
|
||||||
};
|
};
|
||||||
return kRTFormatFlags[uint32_t(format)];
|
return kRTFormatFlags[uint32_t(format)];
|
||||||
}
|
}
|
||||||
|
@ -1054,10 +1058,15 @@ void DxbcShaderTranslator::StartTranslation() {
|
||||||
} else if (IsDXBCPixelShader()) {
|
} else if (IsDXBCPixelShader()) {
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (!is_depth_only_pixel_shader_) {
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
system_temp_color_[i] = PushSystemTemp(true);
|
// In the ROV path, no need to initialize the colors because original
|
||||||
|
// values will be kept for the unwritten components.
|
||||||
|
system_temp_color_[i] = PushSystemTemp(!edram_rov_used_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (edram_rov_used_) {
|
if (edram_rov_used_) {
|
||||||
|
if (!is_depth_only_pixel_shader_) {
|
||||||
|
system_temp_color_written_ = PushSystemTemp(true);
|
||||||
|
}
|
||||||
system_temp_depth_ = PushSystemTemp();
|
system_temp_depth_ = PushSystemTemp();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3242,6 +3251,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
|
bool color_targets_written;
|
||||||
|
if (is_depth_only_pixel_shader_) {
|
||||||
|
color_targets_written = false;
|
||||||
|
} else {
|
||||||
|
color_targets_written = writes_color_target(0) || writes_color_target(1) ||
|
||||||
|
writes_color_target(2) || writes_color_target(3);
|
||||||
|
}
|
||||||
|
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
// Calculate the offsets of the samples in the EDRAM.
|
// Calculate the offsets of the samples in the EDRAM.
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
@ -3399,7 +3416,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
|
|
||||||
// Calculate the address in the EDRAM buffer.
|
// Calculate the address in the EDRAM buffer.
|
||||||
|
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (color_targets_written) {
|
||||||
// 1a) Get dword offset within the tile to edram_coord_low_temp.x.
|
// 1a) Get dword offset within the tile to edram_coord_low_temp.x.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
@ -3437,7 +3454,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (color_targets_written) {
|
||||||
// 2a) Combine the tile offset and the offset within the tile to
|
// 2a) Combine the tile offset and the offset within the tile to
|
||||||
// edram_coord_low_temp.x.
|
// edram_coord_low_temp.x.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||||
|
@ -3481,7 +3498,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
|
|
||||||
uint32_t edram_coord_high_temp = 0;
|
uint32_t edram_coord_high_temp = 0;
|
||||||
|
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (color_targets_written) {
|
||||||
edram_coord_high_temp = PushSystemTemp();
|
edram_coord_high_temp = PushSystemTemp();
|
||||||
|
|
||||||
// Get which render targets are 64bpp, as log2 of dword count per pixel.
|
// Get which render targets are 64bpp, as log2 of dword count per pixel.
|
||||||
|
@ -3990,16 +4007,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Write to color render targets.
|
// Write to color render targets.
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (color_targets_written) {
|
||||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||||
|
|
||||||
// Get what render targets need to be written to.
|
// Mask disabled color writes.
|
||||||
uint32_t rt_used_temp = PushSystemTemp();
|
uint32_t rt_write_masks_temp = PushSystemTemp();
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(rt_used_temp);
|
shader_code_.push_back(rt_write_masks_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
@ -4007,33 +4024,27 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
shader_code_.push_back(kRTFlag_Used);
|
shader_code_.push_back(kRTFlag_WriteR_Shift);
|
||||||
shader_code_.push_back(kRTFlag_Used);
|
shader_code_.push_back(kRTFlag_WriteR_Shift);
|
||||||
shader_code_.push_back(kRTFlag_Used);
|
shader_code_.push_back(kRTFlag_WriteR_Shift);
|
||||||
shader_code_.push_back(kRTFlag_Used);
|
shader_code_.push_back(kRTFlag_WriteR_Shift);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
// Get what render targets need to be read (for write masks and blending).
|
|
||||||
uint32_t rt_load_temp = PushSystemTemp();
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(rt_load_temp);
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(kRTFlag_Load);
|
shader_code_.push_back(rt_write_masks_temp);
|
||||||
shader_code_.push_back(kRTFlag_Load);
|
|
||||||
shader_code_.push_back(kRTFlag_Load);
|
|
||||||
shader_code_.push_back(kRTFlag_Load);
|
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
// Release rt_write_masks_temp.
|
||||||
|
PopSystemTemp();
|
||||||
|
|
||||||
// Get what render targets need blending (if only write mask is used and no
|
// Get what render targets need blending (if only write mask is used and no
|
||||||
// blending, skip blending).
|
// blending, skip blending).
|
||||||
|
@ -4057,7 +4068,92 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Get what render targets need to be read (for write mask and blending).
|
||||||
|
uint32_t rt_overwritten_temp = PushSystemTemp();
|
||||||
|
// First, ignore components that don't exist in the render target at all -
|
||||||
|
// treat them as overwritten.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(4);
|
||||||
|
shader_code_.push_back(4);
|
||||||
|
shader_code_.push_back(4);
|
||||||
|
shader_code_.push_back(4);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatUnusedR_Shift);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
// Then, check if the write mask + unused components is 1111 - if yes (and
|
||||||
|
// not blending), the pixel will be totally overwritten and no need to load
|
||||||
|
// the old pixel value.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(0b1111);
|
||||||
|
shader_code_.push_back(0b1111);
|
||||||
|
shader_code_.push_back(0b1111);
|
||||||
|
shader_code_.push_back(0b1111);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.int_instruction_count;
|
||||||
|
// Force load the previous pixel if blending.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(rt_blend_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (!writes_color_target(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the render target needs to be written to.
|
// Check if the render target needs to be written to.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
@ -4065,7 +4161,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(rt_used_temp);
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
@ -4077,11 +4173,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// write mask.
|
// write mask.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(rt_load_temp);
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
uint32_t dest_color_temp = PushSystemTemp();
|
uint32_t dest_color_temp = PushSystemTemp();
|
||||||
|
@ -4165,12 +4261,12 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release rt_used_temp, rt_load_temp and rt_blend_temp.
|
// Release rt_blend_temp and rt_overwritten_temp.
|
||||||
PopSystemTemp(3);
|
PopSystemTemp(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release edram_coord_low_temp and, if used, edram_coord_high_temp.
|
// Release edram_coord_low_temp and, if used, edram_coord_high_temp.
|
||||||
PopSystemTemp(is_depth_only_pixel_shader_ ? 1 : 2);
|
PopSystemTemp(color_targets_written ? 2 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader() {
|
void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
|
@ -4476,6 +4572,10 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
if (edram_rov_used_) {
|
if (edram_rov_used_) {
|
||||||
// Release system_temp_depth_.
|
// Release system_temp_depth_.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
if (!is_depth_only_pixel_shader_) {
|
||||||
|
// Release system_temp_color_written_.
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (!is_depth_only_pixel_shader_) {
|
||||||
// Release system_temp_color_.
|
// Release system_temp_color_.
|
||||||
|
@ -5460,6 +5560,30 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
}
|
}
|
||||||
PopSystemTemp(2);
|
PopSystemTemp(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (edram_rov_used_ &&
|
||||||
|
result.storage_target == InstructionStorageTarget::kColorTarget) {
|
||||||
|
// For ROV output, mark that the color has been written to.
|
||||||
|
// According to:
|
||||||
|
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
|
||||||
|
// if a color target has been written to - including due to flow control -
|
||||||
|
// the render target must not be modified (the unwritten components of a
|
||||||
|
// written target are undefined, but let's keep the original value in this
|
||||||
|
// case).
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 1 << uint32_t(result.storage_index), 1));
|
||||||
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, uint32_t(result.storage_index), 1));
|
||||||
|
shader_code_.push_back(system_temp_color_written_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(swizzle_mask | constant_mask);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::ClosePredicate() {
|
void DxbcShaderTranslator::ClosePredicate() {
|
||||||
|
|
|
@ -77,17 +77,19 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum : uint32_t {
|
enum : uint32_t {
|
||||||
// Whether the write mask is non-zero.
|
|
||||||
kRTFlag_Used_Shift,
|
|
||||||
// Whether the render target needs to be merged with another (if the write
|
// Whether the render target needs to be merged with another (if the write
|
||||||
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
|
// mask is not 1111, or 11 for 16_16, or 1 for 32_FLOAT, or blending is
|
||||||
// enabled and it's not no-op).
|
// enabled and it's not no-op).
|
||||||
kRTFlag_Load_Shift,
|
|
||||||
kRTFlag_Blend_Shift,
|
|
||||||
kRTFlag_WriteR_Shift,
|
kRTFlag_WriteR_Shift,
|
||||||
kRTFlag_WriteG_Shift,
|
kRTFlag_WriteG_Shift,
|
||||||
kRTFlag_WriteB_Shift,
|
kRTFlag_WriteB_Shift,
|
||||||
kRTFlag_WriteA_Shift,
|
kRTFlag_WriteA_Shift,
|
||||||
|
kRTFlag_Blend_Shift,
|
||||||
|
// Whether the component does not exist in the render target format.
|
||||||
|
kRTFlag_FormatUnusedR_Shift,
|
||||||
|
kRTFlag_FormatUnusedG_Shift,
|
||||||
|
kRTFlag_FormatUnusedB_Shift,
|
||||||
|
kRTFlag_FormatUnusedA_Shift,
|
||||||
// Whether the format is fixed-point and needs to be converted to integer
|
// Whether the format is fixed-point and needs to be converted to integer
|
||||||
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
|
// (k_8_8_8_8, k_2_10_10_10, k_16_16, k_16_16_16_16).
|
||||||
kRTFlag_FormatFixed_Shift,
|
kRTFlag_FormatFixed_Shift,
|
||||||
|
@ -97,13 +99,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// f16tof32/f32tof16 is needed.
|
// f16tof32/f32tof16 is needed.
|
||||||
kRTFlag_FormatFloat16_Shift,
|
kRTFlag_FormatFloat16_Shift,
|
||||||
|
|
||||||
kRTFlag_Used = 1u << kRTFlag_Used_Shift,
|
|
||||||
kRTFlag_Load = 1u << kRTFlag_Load_Shift,
|
|
||||||
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
|
|
||||||
kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift,
|
kRTFlag_WriteR = 1u << kRTFlag_WriteR_Shift,
|
||||||
kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift,
|
kRTFlag_WriteG = 1u << kRTFlag_WriteG_Shift,
|
||||||
kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift,
|
kRTFlag_WriteB = 1u << kRTFlag_WriteB_Shift,
|
||||||
kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift,
|
kRTFlag_WriteA = 1u << kRTFlag_WriteA_Shift,
|
||||||
|
kRTFlag_Blend = 1u << kRTFlag_Blend_Shift,
|
||||||
|
kRTFlag_FormatUnusedR = 1u << kRTFlag_FormatUnusedR_Shift,
|
||||||
|
kRTFlag_FormatUnusedG = 1u << kRTFlag_FormatUnusedG_Shift,
|
||||||
|
kRTFlag_FormatUnusedB = 1u << kRTFlag_FormatUnusedB_Shift,
|
||||||
|
kRTFlag_FormatUnusedA = 1u << kRTFlag_FormatUnusedA_Shift,
|
||||||
kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift,
|
kRTFlag_FormatFixed = 1u << kRTFlag_FormatFixed_Shift,
|
||||||
kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift,
|
kRTFlag_FormatFloat10 = 1u << kRTFlag_FormatFloat10_Shift,
|
||||||
kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift,
|
kRTFlag_FormatFloat16 = 1u << kRTFlag_FormatFloat16_Shift,
|
||||||
|
@ -919,6 +923,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// Color outputs in pixel shaders (because of exponent bias, alpha test and
|
// Color outputs in pixel shaders (because of exponent bias, alpha test and
|
||||||
// remapping).
|
// remapping).
|
||||||
uint32_t system_temp_color_[4];
|
uint32_t system_temp_color_[4];
|
||||||
|
// Whether the color output has been written in the execution path (ROV only).
|
||||||
|
uint32_t system_temp_color_written_;
|
||||||
// Depth output in pixel shader, and 3 dwords usable as scratch for operations
|
// Depth output in pixel shader, and 3 dwords usable as scratch for operations
|
||||||
// related to depth. Currently only used for ROV depth.
|
// related to depth. Currently only used for ROV depth.
|
||||||
// TODO(Triang3l): Reduce depth to 24-bit in pixel shaders when using a DSV
|
// TODO(Triang3l): Reduce depth to 24-bit in pixel shaders when using a DSV
|
||||||
|
|
|
@ -53,6 +53,8 @@ class ShaderTranslator {
|
||||||
bool uses_register_dynamic_addressing() const {
|
bool uses_register_dynamic_addressing() const {
|
||||||
return uses_register_dynamic_addressing_;
|
return uses_register_dynamic_addressing_;
|
||||||
}
|
}
|
||||||
|
// True if the current shader writes to a color target on any execution path.
|
||||||
|
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||||
// A list of all vertex bindings, populated before translation occurs.
|
// A list of all vertex bindings, populated before translation occurs.
|
||||||
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
const std::vector<Shader::VertexBinding>& vertex_bindings() const {
|
||||||
return vertex_bindings_;
|
return vertex_bindings_;
|
||||||
|
|
Loading…
Reference in New Issue