[D3D12] Place UAV barriers for the EDRAM ROV when layout changes

This commit is contained in:
Triang3l 2019-04-12 15:59:09 +03:00
parent 45cee3f871
commit 93a18a517b
4 changed files with 241 additions and 178 deletions

View File

@ -1469,10 +1469,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
} }
if (IsROVUsedForEDRAM()) {
render_target_cache_->UseEDRAMAsUAV();
}
// Actually draw. // Actually draw.
if (indexed) { if (indexed) {
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32

View File

@ -131,6 +131,7 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
Shutdown(); Shutdown();
return false; return false;
} }
edram_buffer_modified_ = false;
// Create non-shader-visible descriptors of the EDRAM buffer for copying. // Create non-shader-visible descriptors of the EDRAM buffer for copying.
D3D12_DESCRIPTOR_HEAP_DESC edram_buffer_descriptor_heap_desc; D3D12_DESCRIPTOR_HEAP_DESC edram_buffer_descriptor_heap_desc;
@ -377,6 +378,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
return false; return false;
} }
ClearBindings();
return true; return true;
} }
@ -443,23 +446,32 @@ void RenderTargetCache::ClearCache() {
#endif #endif
} }
void RenderTargetCache::BeginFrame() { ClearBindings(); } void RenderTargetCache::BeginFrame() {
// A frame does not always end in a resolve (for example, when memexport
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // readback happens) or something else that would surely submit the UAV
if (command_processor_->IsROVUsedForEDRAM()) { // barrier, so we need to preserve the `current_` variables.
return true; if (!command_processor_->IsROVUsedForEDRAM()) {
ClearBindings();
}
} }
bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// There are two kinds of render target binding updates in this implementation // There are two kinds of render target binding updates in this implementation
// in case something has been changed - full and partial. // in case something has been changed - full and partial.
// //
// A full update involves flushing all the currently bound render targets that // For the RTV/DSV path, a full update involves flushing all the currently
// have been modified to the EDRAM buffer, allocating all the newly bound // bound render targets that have been modified to the EDRAM buffer,
// render targets in the heaps, loading them from the EDRAM buffer and binding // allocating all the newly bound render targets in the heaps, loading them
// them. // from the EDRAM buffer and binding them.
//
// For the ROV path, a full update places a UAV barrier because across draws,
// pixels with different SV_Positions or different sample counts (thus without
// interlocking between each other) may access the same data now. Not having
// the barriers causes visual glitches in many games, such as Halo 3 where the
// right side of the menu and shadow maps get corrupted (at least on Nvidia).
// //
// ("Bound" here means ever used since the last full update - and in this case // ("Bound" here means ever used since the last full update - and in this case
// it's bound to the Direct3D 12 command list.) // it's bound to the Direct3D 12 command list in the RTV/DSV path.)
// //
// However, Banjo-Kazooie interleaves color/depth and depth-only writes every // However, Banjo-Kazooie interleaves color/depth and depth-only writes every
// draw call, and doing a full update whenever the color mask is changed is // draw call, and doing a full update whenever the color mask is changed is
@ -503,9 +515,10 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// - Surface pitch changed. // - Surface pitch changed.
// - Sample count changed. // - Sample count changed.
// - Render target is disabled and another render target got more space than // - Render target is disabled and another render target got more space than
// is currently available in the textures. // is currently available in the textures (RTV/DSV only).
// - EDRAM base of a currently used RT changed. // - EDRAM base of a currently used RT changed.
// - Format of a currently used RT changed. // - Format of a currently used RT changed (RTV/DSV) or pixel size of a
// currently used RT changed (ROV).
// - Current viewport contains unsaved data from previously used render // - Current viewport contains unsaved data from previously used render
// targets. // targets.
// - New render target overlaps unsaved data from other bound render targets. // - New render target overlaps unsaved data from other bound render targets.
@ -518,13 +531,15 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// - New render target is added, but doesn't overlap unsaved data from other // - New render target is added, but doesn't overlap unsaved data from other
// currently or previously used render targets, and it doesn't require a // currently or previously used render targets, and it doesn't require a
// bigger size. // bigger size.
auto command_list = command_processor_->GetDeferredCommandList();
auto& regs = *register_file_; auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
bool rov_used = command_processor_->IsROVUsedForEDRAM();
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
if (surface_pitch == 0) { if (surface_pitch == 0) {
@ -553,8 +568,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
// 0x1 = stencil test, 0x2 = depth test, 0x4 = depth write. // 0x1 = stencil test, 0x2 = depth test.
enabled[4] = (rb_depthcontrol & (0x1 | 0x2 | 0x4)) != 0; enabled[4] = (rb_depthcontrol & (0x1 | 0x2)) != 0;
edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u); edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u);
formats[4] = (rb_depth_info >> 16) & 0x1; formats[4] = (rb_depth_info >> 16) & 0x1;
formats_are_64bpp[4] = false; formats_are_64bpp[4] = false;
@ -599,8 +614,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
edram_max_rows = std::min(edram_max_rows, 160u * msaa_samples_y); edram_max_rows = std::min(edram_max_rows, 160u * msaa_samples_y);
// Check the following full update conditions: // Check the following full update conditions:
// - Render target is disabled and another render target got more space than // - Render target is disabled and another render target got more space than
// is currently available in the textures. // is currently available in the textures (RTV/DSV only).
if (edram_max_rows > current_edram_max_rows_) { if (!rov_used && edram_max_rows > current_edram_max_rows_) {
full_update = true; full_update = true;
} }
@ -635,7 +650,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
// Check the following full update conditions: // Check the following full update conditions:
// - EDRAM base of a currently used RT changed. // - EDRAM base of a currently used RT changed.
// - Format of a currently used RT changed. // - Format of a currently used RT changed (RTV/DSV) or pixel size of a
// currently used RT changed (ROV).
// Also build a list of render targets to attach in a partial update. // Also build a list of render targets to attach in a partial update.
uint32_t render_targets_to_attach = 0; uint32_t render_targets_to_attach = 0;
if (!full_update) { if (!full_update) {
@ -645,9 +661,18 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
const RenderTargetBinding& binding = current_bindings_[i]; const RenderTargetBinding& binding = current_bindings_[i];
if (binding.is_bound) { if (binding.is_bound) {
if (binding.edram_base != edram_bases[i] || if (binding.edram_base != edram_bases[i]) {
binding.format != formats[i]) { break;
full_update = true; }
if (rov_used) {
if (i != 4) {
full_update |= IsColorFormat64bpp(binding.color_format) !=
formats_are_64bpp[i];
}
} else {
full_update |= binding.format != formats[i];
}
if (full_update) {
break; break;
} }
} else { } else {
@ -719,13 +744,23 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
uint32_t heap_usage[5] = {}; uint32_t heap_usage[5] = {};
#endif #endif
if (full_update) { if (full_update) {
// Export the currently bound render targets before we ruin the bindings. if (rov_used) {
// Place a UAV barrier because across draws, pixels with different
// SV_Positions or different sample counts (thus without interlocking
// between each other) may access the same data now.
CommitEDRAMBufferUAVWrites(false);
} else {
// Export the currently bound render targets before we ruin the
// bindings.
StoreRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
}
ClearBindings(); ClearBindings();
current_surface_pitch_ = surface_pitch; current_surface_pitch_ = surface_pitch;
current_msaa_samples_ = msaa_samples; current_msaa_samples_ = msaa_samples;
if (!rov_used) {
current_edram_max_rows_ = edram_max_rows; current_edram_max_rows_ = edram_max_rows;
}
// If updating fully, need to reattach all the render targets and allocate // If updating fully, need to reattach all the render targets and allocate
// from scratch. // from scratch.
@ -736,6 +771,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
} }
} else { } else {
#if 0 #if 0
if (!rov_used) {
// If updating partially, only need to attach new render targets. // If updating partially, only need to attach new render targets.
for (uint32_t i = 0; i < 5; ++i) { for (uint32_t i = 0; i < 5; ++i) {
const RenderTargetBinding& binding = current_bindings_[i]; const RenderTargetBinding& binding = current_bindings_[i];
@ -749,14 +785,17 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
render_target->heap_page_count; render_target->heap_page_count;
} }
} }
}
#endif #endif
} }
XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u", XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u",
full_update ? "Full" : "Partial", surface_pitch, msaa_samples, full_update ? "Full" : "Partial", surface_pitch, msaa_samples,
render_targets_to_attach); render_targets_to_attach);
#if 0
auto device = auto device =
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice(); command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
#endif
// Allocate new render targets and add them to the bindings list. // Allocate new render targets and add them to the bindings list.
for (uint32_t i = 0; i < 5; ++i) { for (uint32_t i = 0; i < 5; ++i) {
@ -770,11 +809,13 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
binding.format = formats[i]; binding.format = formats[i];
binding.render_target = nullptr; binding.render_target = nullptr;
if (!rov_used) {
RenderTargetKey key; RenderTargetKey key;
key.width_ss_div_80 = edram_row_tiles_32bpp; key.width_ss_div_80 = edram_row_tiles_32bpp;
key.height_ss_div_16 = current_edram_max_rows_; key.height_ss_div_16 = current_edram_max_rows_;
key.is_depth = i == 4 ? 1 : 0; key.is_depth = i == 4 ? 1 : 0;
key.format = formats[i]; key.format = formats[i];
D3D12_RESOURCE_DESC resource_desc; D3D12_RESOURCE_DESC resource_desc;
if (!GetResourceDesc(key, resource_desc)) { if (!GetResourceDesc(key, resource_desc)) {
// Invalid format. // Invalid format.
@ -814,8 +855,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
heap_usage[heap_page_first / kHeap4MBPages] += heap_page_count; heap_usage[heap_page_first / kHeap4MBPages] += heap_page_count;
// Inform Direct3D that we're reusing the heap for this render target. // Inform Direct3D that we're reusing the heap for this render target.
command_processor_->PushAliasingBarrier(nullptr, command_processor_->PushAliasingBarrier(
binding.render_target->resource); nullptr, binding.render_target->resource);
#else #else
// If multiple render targets have the same format, assign different // If multiple render targets have the same format, assign different
// instance numbers to them. // instance numbers to them.
@ -833,7 +874,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
binding.render_target = FindOrCreateRenderTarget(key, instance); binding.render_target = FindOrCreateRenderTarget(key, instance);
#endif #endif
} }
}
if (!rov_used) {
// Sample positions when loading depth must match sample positions when // Sample positions when loading depth must match sample positions when
// drawing. // drawing.
command_processor_->SetSamplePositions(msaa_samples); command_processor_->SetSamplePositions(msaa_samples);
@ -856,8 +899,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
++load_render_target_count; ++load_render_target_count;
} }
if (load_render_target_count != 0) { if (load_render_target_count != 0) {
LoadRenderTargetsFromEDRAM(load_render_target_count, load_render_targets, LoadRenderTargetsFromEDRAM(load_render_target_count,
load_edram_bases); load_render_targets, load_edram_bases);
} }
// Transition the render targets to the appropriate state if needed, // Transition the render targets to the appropriate state if needed,
@ -905,8 +948,9 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN; current_pipeline_render_targets_[4].format = DXGI_FORMAT_UNKNOWN;
} }
command_processor_->SubmitBarriers(); command_processor_->SubmitBarriers();
command_list->D3DOMSetRenderTargets(rtv_count, rtv_handles, FALSE, command_processor_->GetDeferredCommandList()->D3DOMSetRenderTargets(
dsv_handle); rtv_count, rtv_handles, FALSE, dsv_handle);
}
} }
// Update the dirty regions. // Update the dirty regions.
@ -915,7 +959,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
continue; continue;
} }
RenderTargetBinding& binding = current_bindings_[i]; RenderTargetBinding& binding = current_bindings_[i];
if (binding.render_target == nullptr) { if (!rov_used && binding.render_target == nullptr) {
// Nothing to store to the EDRAM buffer if there was an error. // Nothing to store to the EDRAM buffer if there was an error.
continue; continue;
} }
@ -923,18 +967,26 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) {
std::max(binding.edram_dirty_rows, edram_dirty_rows); std::max(binding.edram_dirty_rows, edram_dirty_rows);
} }
if (rov_used) {
// The buffer will be used for ROV drawing now.
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
edram_buffer_modified_ = true;
}
return true; return true;
} }
bool RenderTargetCache::Resolve(SharedMemory* shared_memory, bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
TextureCache* texture_cache, Memory* memory) { TextureCache* texture_cache, Memory* memory) {
if (!command_processor_->IsROVUsedForEDRAM()) {
// Save the currently bound render targets to the EDRAM buffer that will be // Save the currently bound render targets to the EDRAM buffer that will be
// used as the resolve source and clear bindings to allow render target // used as the resolve source and clear bindings to allow render target
// resources to be reused as source textures for format conversion, resolving // resources to be reused as source textures for format conversion,
// samples, to let format conversion bind other render targets, and so after a // resolving samples, to let format conversion bind other render targets,
// clear new data will be loaded. // and so after a clear new data will be loaded.
StoreRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
ClearBindings(); ClearBindings();
}
auto& regs = *register_file_; auto& regs = *register_file_;
@ -1083,7 +1135,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
if (command_processor_->IsROVUsedForEDRAM()) { if (command_processor_->IsROVUsedForEDRAM()) {
// Commit ROV writes. // Commit ROV writes.
command_processor_->PushUAVBarrier(edram_buffer_); CommitEDRAMBufferUAVWrites(false);
} }
// GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed // GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed
@ -1809,7 +1861,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start); command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
// 1 group per 80x16 samples. Resolution scale handled in the shader itself. // 1 group per 80x16 samples. Resolution scale handled in the shader itself.
command_list->D3DDispatch(row_width_ss_div_80, rows, 1); command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
command_processor_->PushUAVBarrier(edram_buffer_); CommitEDRAMBufferUAVWrites(true);
return true; return true;
} }
@ -2010,14 +2062,13 @@ RenderTargetCache::ResolveTarget* RenderTargetCache::FindOrCreateResolveTarget(
} }
void RenderTargetCache::UnbindRenderTargets() { void RenderTargetCache::UnbindRenderTargets() {
if (command_processor_->IsROVUsedForEDRAM()) {
return;
}
StoreRenderTargetsToEDRAM(); StoreRenderTargetsToEDRAM();
ClearBindings(); ClearBindings();
} }
void RenderTargetCache::UseEDRAMAsUAV() {
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
void RenderTargetCache::WriteEDRAMUint32UAVDescriptor( void RenderTargetCache::WriteEDRAMUint32UAVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) { D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
@ -2095,6 +2146,13 @@ void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) {
edram_buffer_state_ = new_state; edram_buffer_state_ = new_state;
} }
void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
if (edram_buffer_modified_ || force) {
command_processor_->PushUAVBarrier(edram_buffer_);
}
edram_buffer_modified_ = false;
}
void RenderTargetCache::WriteEDRAMRawSRVDescriptor( void RenderTargetCache::WriteEDRAMRawSRVDescriptor(
D3D12_CPU_DESCRIPTOR_HANDLE handle) { D3D12_CPU_DESCRIPTOR_HANDLE handle) {
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider(); auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
@ -2589,7 +2647,7 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
command_list->D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1); command_list->D3DDispatch(surface_pitch_tiles, binding.edram_dirty_rows, 1);
// Commit the UAV write. // Commit the UAV write.
command_processor_->PushUAVBarrier(edram_buffer_); CommitEDRAMBufferUAVWrites(true);
} }
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);

View File

@ -274,8 +274,6 @@ class RenderTargetCache {
// the command processor takes over framebuffer bindings to draw something // the command processor takes over framebuffer bindings to draw something
// special. // special.
void UnbindRenderTargets(); void UnbindRenderTargets();
// Transitions the EDRAM buffer to a UAV - for use with ROV rendering.
void UseEDRAMAsUAV();
void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void EndFrame(); void EndFrame();
@ -422,6 +420,7 @@ class RenderTargetCache {
uint32_t GetEDRAMBufferSize() const; uint32_t GetEDRAMBufferSize() const;
void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state); void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state);
void CommitEDRAMBufferUAVWrites(bool force);
void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle); void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
@ -511,6 +510,9 @@ class RenderTargetCache {
// The EDRAM buffer allowing color and depth data to be reinterpreted. // The EDRAM buffer allowing color and depth data to be reinterpreted.
ID3D12Resource* edram_buffer_ = nullptr; ID3D12Resource* edram_buffer_ = nullptr;
D3D12_RESOURCE_STATES edram_buffer_state_; D3D12_RESOURCE_STATES edram_buffer_state_;
// Whether there have been any outstanding UAV writes and a UAV barrier is
// needed before accessing the EDRAM buffer in an unordered way again.
bool edram_buffer_modified_ = false;
// Non-shader-visible descriptor heap containing pre-created SRV and UAV // Non-shader-visible descriptor heap containing pre-created SRV and UAV
// descriptors of the EDRAM buffer, for faster binding (via copying rather // descriptors of the EDRAM buffer, for faster binding (via copying rather
@ -629,6 +631,7 @@ class RenderTargetCache {
uint32_t current_surface_pitch_ = 0; uint32_t current_surface_pitch_ = 0;
MsaaSamples current_msaa_samples_ = MsaaSamples::k1X; MsaaSamples current_msaa_samples_ = MsaaSamples::k1X;
// current_edram_max_rows_ is for RTV/DSV only (render target texture size).
uint32_t current_edram_max_rows_ = 0; uint32_t current_edram_max_rows_ = 0;
RenderTargetBinding current_bindings_[5] = {}; RenderTargetBinding current_bindings_[5] = {};

View File

@ -1988,6 +1988,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
PopSystemTemp(); PopSystemTemp();
// Load the previous depth/stencil values. // Load the previous depth/stencil values.
// The `if`s are REQUIRED - interlocking is done per-sample, not per-pixel!
uint32_t depth_values_temp = PushSystemTemp(); uint32_t depth_values_temp = PushSystemTemp();
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
@ -2848,6 +2849,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Write new depth/stencil for the covered samples. // Write new depth/stencil for the covered samples.
// The `if`s are REQUIRED - interlocking is done per-sample, not per-pixel!
for (uint32_t i = 0; i < 4; ++i) { for (uint32_t i = 0; i < 4; ++i) {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
@ -5564,6 +5566,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
} }
// Sample loop. // Sample loop.
// The `if`s are REQUIRED - interlocking is done per-sample, not
// per-pixel!
for (uint32_t k = 0; k < 4; ++k) { for (uint32_t k = 0; k < 4; ++k) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
@ -5927,6 +5931,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
} }
// Sample loop. // Sample loop.
// The `if`s are REQUIRED - interlocking is done per-sample,
// not per-pixel!
for (uint32_t k = 0; k < 4; ++k) { for (uint32_t k = 0; k < 4; ++k) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |