[D3D12] ROV: Aliasing and bounds checking, retc instead of discard
This commit is contained in:
parent
6901c3ea17
commit
66a37c0cc3
|
@ -1223,7 +1223,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(
|
||||
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
|
||||
pipeline_render_targets);
|
||||
color_mask, pipeline_render_targets);
|
||||
|
||||
// Update constant buffers, descriptors and root parameters.
|
||||
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
|
||||
|
@ -1607,7 +1607,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||
Endian index_endian,
|
||||
Endian index_endian, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
|
@ -1629,7 +1629,71 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
uint32_t rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].u32;
|
||||
uint32_t rb_color_mask = regs[XE_GPU_REG_RB_COLOR_MASK].u32;
|
||||
|
||||
// Get the color info register values for each render target, and also put
|
||||
// some safety measures for the ROV path - disable fully aliased render
|
||||
// targets. Also, for ROV, exclude components that don't exist in the format
|
||||
// from the write mask.
|
||||
uint32_t color_infos[4], rov_color_format_rt_flags[4];
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info;
|
||||
switch (i) {
|
||||
case 1:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
break;
|
||||
case 2:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
break;
|
||||
case 3:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
break;
|
||||
default:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
}
|
||||
color_infos[i] = color_info;
|
||||
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
ColorRenderTargetFormat color_format =
|
||||
RenderTargetCache::GetBaseColorFormat(
|
||||
ColorRenderTargetFormat((color_info >> 16) & 0xF));
|
||||
uint32_t rt_flags =
|
||||
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
|
||||
rov_color_format_rt_flags[i] = rt_flags;
|
||||
|
||||
// Exclude unused components from the write mask.
|
||||
color_mask &=
|
||||
~(((rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift) &
|
||||
0xF)
|
||||
<< (i * 4));
|
||||
|
||||
// Disable the render target if it has the same EDRAM base as another one
|
||||
// (with a smaller index - assume it's more important).
|
||||
if (color_mask & (0xF << (i * 4))) {
|
||||
uint32_t edram_base = color_info & 0xFFF;
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if ((color_mask & (0xF << (j * 4))) &&
|
||||
edram_base == (color_infos[j] & 0xFFF)) {
|
||||
color_mask &= ~(uint32_t(0xF << (i * 4)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Disable depth and stencil if it aliases a color render target (for
|
||||
// instance, during the XBLA logo in Banjo-Kazooie, though depth writing is
|
||||
// already disabled there).
|
||||
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
|
||||
uint32_t edram_base_depth = rb_depth_info & 0xFFF;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if ((color_mask & (0xF << (i * 4))) &&
|
||||
edram_base_depth == (color_infos[i] & 0xFFF)) {
|
||||
rb_depthcontrol &= ~(uint32_t(0x1 | 0x2));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool dirty = false;
|
||||
|
||||
|
@ -1674,32 +1738,30 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_Color3Gamma;
|
||||
}
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
if (rb_depthcontrol & (0x1 | 0x2)) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthStencil;
|
||||
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
|
||||
DepthRenderTargetFormat::kD24FS8) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24;
|
||||
if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthStencil;
|
||||
if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) ==
|
||||
DepthRenderTargetFormat::kD24FS8) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24;
|
||||
}
|
||||
if (rb_depthcontrol & 0x2) {
|
||||
flags |= ((rb_depthcontrol >> 4) & 0x7)
|
||||
<< DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift;
|
||||
if (rb_depthcontrol & 0x4) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask |
|
||||
DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
|
||||
}
|
||||
if (rb_depthcontrol & 0x2) {
|
||||
flags |= ((rb_depthcontrol >> 4) & 0x7)
|
||||
<< DxbcShaderTranslator::kSysFlag_DepthPassIfLess_Shift;
|
||||
if (rb_depthcontrol & 0x4) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthWriteMask |
|
||||
DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
|
||||
}
|
||||
} else {
|
||||
// In case stencil is used without depth testing - always pass, and
|
||||
// don't modify the stored depth.
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess |
|
||||
DxbcShaderTranslator::kSysFlag_DepthPassIfEqual |
|
||||
DxbcShaderTranslator::kSysFlag_DepthPassIfGreater;
|
||||
}
|
||||
if (rb_depthcontrol & 0x1) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_StencilTest;
|
||||
if (rb_stencilrefmask & (0xFF << 16)) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
|
||||
}
|
||||
} else {
|
||||
// In case stencil is used without depth testing - always pass, and
|
||||
// don't modify the stored depth.
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthPassIfLess |
|
||||
DxbcShaderTranslator::kSysFlag_DepthPassIfEqual |
|
||||
DxbcShaderTranslator::kSysFlag_DepthPassIfGreater;
|
||||
}
|
||||
if (rb_depthcontrol & 0x1) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_StencilTest;
|
||||
if (rb_stencilrefmask & (0xFF << 16)) {
|
||||
flags |= DxbcShaderTranslator::kSysFlag_DepthStencilWrite;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1860,25 +1922,32 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.alpha_test != alpha_test;
|
||||
system_constants_.alpha_test = alpha_test;
|
||||
|
||||
// Color exponent bias and output index mapping or ROV writing.
|
||||
// EDRAM pitch for ROV writing.
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) *
|
||||
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
|
||||
79) /
|
||||
80;
|
||||
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
||||
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
||||
}
|
||||
|
||||
// Color exponent bias and output index mapping or ROV render target writing.
|
||||
bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info, blend_control;
|
||||
uint32_t color_info = color_infos[i];
|
||||
uint32_t blend_control;
|
||||
switch (i) {
|
||||
case 1:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32;
|
||||
break;
|
||||
case 2:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32;
|
||||
break;
|
||||
case 3:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32;
|
||||
break;
|
||||
default:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
blend_control = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32;
|
||||
}
|
||||
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
||||
|
@ -1907,19 +1976,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t edram_base_dwords = (color_info & 0xFFF) * 1280;
|
||||
dirty |= system_constants_.edram_base_dwords[i] != edram_base_dwords;
|
||||
system_constants_.edram_base_dwords[i] = edram_base_dwords;
|
||||
uint32_t edram_pitch_tiles =
|
||||
((std::min(rb_surface_info & 0x3FFFu, 2560u) *
|
||||
(msaa_samples >= MsaaSamples::k4X ? 2 : 1)) +
|
||||
79) /
|
||||
80;
|
||||
dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles;
|
||||
system_constants_.edram_pitch_tiles = edram_pitch_tiles;
|
||||
uint32_t rt_flags =
|
||||
DxbcShaderTranslator::GetColorFormatRTFlags(color_format);
|
||||
// Exclude unused components from the write mask.
|
||||
uint32_t rt_mask =
|
||||
(rb_color_mask >> (i * 4)) & 0xF &
|
||||
~(rt_flags >> DxbcShaderTranslator::kRTFlag_FormatUnusedR_Shift);
|
||||
uint32_t rt_flags = rov_color_format_rt_flags[i];
|
||||
// Unused components already excluded from the write mask when color infos
|
||||
// were obtained, and fully aliased render targets were already skipped.
|
||||
uint32_t rt_mask = (color_mask >> (i * 4)) & 0xF;
|
||||
if (rt_mask != 0) {
|
||||
rt_flags |= rt_mask << DxbcShaderTranslator::kRTFlag_WriteR_Shift;
|
||||
uint32_t blend_x, blend_y;
|
||||
|
|
|
@ -197,7 +197,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
||||
void UpdateSystemConstantValues(
|
||||
Endian index_endian,
|
||||
Endian index_endian, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
||||
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
||||
const D3D12Shader* vertex_shader,
|
||||
|
|
|
@ -4111,6 +4111,37 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
// Prevent going out of EDRAM bounds.
|
||||
uint32_t depth_stencil_bound_check_temp = PushSystemTemp();
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(depth_stencil_bound_check_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
||||
shader_code_.push_back(system_temp_depth_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1280 * 2048);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(depth_stencil_test_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(depth_stencil_test_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(depth_stencil_bound_check_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
// Release depth_stencil_bound_check_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Enter the depth/stencil test if needed.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
|
@ -4805,13 +4836,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
|
||||
// Discard the pixel if depth test failed and no stencil testing.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) |
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(depth_stencil_test_result_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Stencil test done.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
|
@ -4894,13 +4926,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
// writing because stencil may be modified even if the depth/stencil test
|
||||
// fails.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) |
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(depth_stencil_test_result_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Release depth_stencil_test_result_temp.
|
||||
PopSystemTemp();
|
||||
|
@ -4920,8 +4953,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
if (color_targets_written) {
|
||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||
|
||||
// Mask disabled color writes.
|
||||
uint32_t rt_write_masks_temp = PushSystemTemp();
|
||||
|
||||
// Mask disabled color writes.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||
shader_code_.push_back(
|
||||
|
@ -4953,6 +4987,38 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
shader_code_.push_back(rt_write_masks_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Prevent going out of EDRAM bounds.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ULT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(rt_write_masks_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(edram_coord_low_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(1280 * 2048);
|
||||
shader_code_.push_back(1280 * 2048);
|
||||
shader_code_.push_back(1280 * 2048);
|
||||
shader_code_.push_back(1280 * 2048);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(system_temp_color_written_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_color_written_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(rt_write_masks_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Release rt_write_masks_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
|
|
Loading…
Reference in New Issue