[D3D12] ROV: Aliasing and bounds checking, retc instead of discard

This commit is contained in:
Triang3l 2018-11-17 17:15:15 +03:00
parent 6901c3ea17
commit 66a37c0cc3
3 changed files with 177 additions and 51 deletions

View File

@ -1223,7 +1223,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Update system constants before uploading them.
UpdateSystemConstantValues(
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
pipeline_render_targets);
color_mask, pipeline_render_targets);
// Update constant buffers, descriptors and root parameters.
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
@ -1607,7 +1607,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
}
void D3D12CommandProcessor::UpdateSystemConstantValues(
Endian index_endian,
Endian index_endian, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
@ -1629,7 +1629,71 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
uint32_t rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].u32;
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
// Get the color info register values for each render target, and also put
// some safety measures for the ROV path - disable fully aliased render
// targets. Also, for ROV, exclude components that don't exist in the format
// from the write mask.
uint32_t color_infos[4], rov_color_format_rt_flags[4];
for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info;
switch (i) {
case 1:
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
break;
case 2:
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
break;
case 3:
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
break;
default:
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
}
color_infos[i] = color_info;
if (IsROVUsedForEDRAM()) {
ColorRenderTargetFormat color_format =
RenderTargetCache::GetBaseColorFormat(
ColorRenderTargetFormat((color_info >> 16) & 0xF));
uint32_t rt_flags =
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
rov_color_format_rt_flags[i] = rt_flags;
// Exclude unused components from the write mask.
color_mask &=
~(((rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift) &
0xF)
<< (i * 4));
// Disable the render target if it has the same EDRAM base as another one
// (with a smaller index - assume it's more important).
if (color_mask & (0xF << (i * 4))) {
uint32_t edram_base = color_info & 0xFFF;
for (uint32_t j = 0; j < i; ++j) {
if ((color_mask & (0xF << (j * 4))) &&
edram_base == (color_infos[j] & 0xFFF)) {
color_mask &= ~(uint32_t(0xF << (i * 4)));
break;
}
}
}
}
}
// Disable depth and stencil if it aliases a color render target (for
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
// already disabled there).
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
uint32_t edram_base_depth = rb_depth_info & 0xFFF;
for (uint32_t i = 0; i < 4; ++i) {
if ((color_mask & (0xF << (i * 4))) &&
edram_base_depth == (color_infos[i] & 0xFFF)) {
rb_depthcontrol &= ~(uint32_t(0x1 | 0x2));
break;
}
}
}
bool dirty = false;
@ -1674,32 +1738,30 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) {
flags |= DxbcShaderTranslator::kSysFlag_Color3Gamma;
}
if (IsROVUsedForEDRAM()) {
if (rb_depthcontrol & (0x1 | 0x2)) {
flags |= DxbcShaderTranslator::kSysFlag_DepthStencil;
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
DepthRenderTargetFormat::kD24FS8) {
flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24;
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
flags |= DxbcShaderTranslator::kSysFlag_DepthStencil;
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
DepthRenderTargetFormat::kD24FS8) {
flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24;
}
if (rb_depthcontrol & 0x2) {
flags |= ((rb_depthcontrol >> 4) & 0x7)
<< DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift;
if (rb_depthcontrol & 0x4) {
flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask |
DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
}
if (rb_depthcontrol & 0x2) {
flags |= ((rb_depthcontrol >> 4) & 0x7)
<< DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift;
if (rb_depthcontrol & 0x4) {
flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask |
DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
}
} else {
// In case stencil is used without depth testing - always pass, and
// don't modify the stored depth.
flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess |
DxbcShaderTranslator::kSysFlag_DepthPassIfEqual |
DxbcShaderTranslator::kSysFlag_DepthPassIfGreater;
}
if (rb_depthcontrol & 0x1) {
flags |= DxbcShaderTranslator::kSysFlag_StencilTest;
if (rb_stencilrefmask & (0xFF << 16)) {
flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
}
} else {
// In case stencil is used without depth testing - always pass, and
// don't modify the stored depth.
flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess |
DxbcShaderTranslator::kSysFlag_DepthPassIfEqual |
DxbcShaderTranslator::kSysFlag_DepthPassIfGreater;
}
if (rb_depthcontrol & 0x1) {
flags |= DxbcShaderTranslator::kSysFlag_StencilTest;
if (rb_stencilrefmask & (0xFF << 16)) {
flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
}
}
}
@ -1860,25 +1922,32 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.alpha_test != alpha_test;
system_constants_.alpha_test = alpha_test;
// Color exponent bias and output index mapping or ROV writing.
// EDRAM pitch for ROV writing.
if (IsROVUsedForEDRAM()) {
uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) *
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
79) /
80;
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
}
// Color exponent bias and output index mapping or ROV render target writing.
bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0;
for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info, blend_control;
uint32_t color_info = color_infos[i];
uint32_t blend_control;
switch (i) {
case 1:
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
break;
case 2:
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
break;
case 3:
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
break;
default:
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
}
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
@ -1907,19 +1976,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t edram_base_dwords = (color_info & 0xFFF) * 1280;
dirty |= system_constants_.edram_base_dwords[i] != edram_base_dwords;
system_constants_.edram_base_dwords[i] = edram_base_dwords;
uint32_t edram_pitch_tiles =
((std::min(rb_surface_info & 0x3FFFu, 2560u) *
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
79) /
80;
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
uint32_t rt_flags =
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
// Exclude unused components from the write mask.
uint32_t rt_mask =
(rb_color_mask >> (i * 4)) & 0xF &
~(rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift);
uint32_t rt_flags = rov_color_format_rt_flags[i];
// Unused components already excluded from the write mask when color infos
// were obtained, and fully aliased render targets were already skipped.
uint32_t rt_mask = (color_mask >> (i * 4)) & 0xF;
if (rt_mask != 0) {
rt_flags |= rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift;
uint32_t blend_x, blend_y;

View File

@ -197,7 +197,7 @@ class D3D12CommandProcessor : public CommandProcessor {
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
void UpdateSystemConstantValues(
Endian index_endian,
Endian index_endian, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
const D3D12Shader* vertex_shader,

View File

@ -4111,6 +4111,37 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
++stat_.instruction_count;
++stat_.int_instruction_count;
// Prevent going out of EDRAM bounds.
uint32_t depth_stencil_bound_check_temp = PushSystemTemp();
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_bound_check_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1280 * 2048);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_bound_check_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Release depth_stencil_bound_check_temp.
PopSystemTemp();
// Enter the depth/stencil test if needed.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
@ -4805,13 +4836,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Discard the pixel if depth test failed and no stencil testing.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) |
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Stencil test done.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
@ -4894,13 +4926,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// writing because stencil may be modified even if the depth/stencil test
// fails.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) |
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(depth_stencil_test_result_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Release depth_stencil_test_result_temp.
PopSystemTemp();
@ -4920,8 +4953,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
if (color_targets_written) {
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
// Mask disabled color writes.
uint32_t rt_write_masks_temp = PushSystemTemp();
// Mask disabled color writes.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
@ -4953,6 +4987,38 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(rt_write_masks_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Prevent going out of EDRAM bounds.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(rt_write_masks_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(1280 * 2048);
shader_code_.push_back(1280 * 2048);
shader_code_.push_back(1280 * 2048);
shader_code_.push_back(1280 * 2048);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_color_written_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(rt_write_masks_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Release rt_write_masks_temp.
PopSystemTemp();