From e803ee84d59a6c6d43294a9f6187027aefba955b Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 22 Dec 2018 15:39:47 +0300 Subject: [PATCH] [D3D12] Bind shared memory as UAV with memexport --- .../gpu/d3d12/d3d12_command_processor.cc | 70 ++++++++++++++----- src/xenia/gpu/d3d12/d3d12_command_processor.h | 3 +- src/xenia/gpu/dxbc_shader_translator.cc | 70 ++++++++++++++----- src/xenia/gpu/dxbc_shader_translator.h | 13 ++++ src/xenia/gpu/dxbc_shader_translator_fetch.cc | 54 ++++++++++++++ src/xenia/gpu/dxbc_shader_translator_om.cc | 16 ++--- 6 files changed, 184 insertions(+), 42 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index af567982f..d492d849a 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -288,11 +288,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( } // Shared memory and, if ROVs are used, EDRAM. - D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[2]; + D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3]; { auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - parameter.DescriptorTable.NumDescriptorRanges = 1; + parameter.DescriptorTable.NumDescriptorRanges = 2; parameter.DescriptorTable.pDescriptorRanges = shared_memory_and_edram_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; @@ -302,14 +302,22 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( shared_memory_and_edram_ranges[0].BaseShaderRegister = 0; shared_memory_and_edram_ranges[0].RegisterSpace = 0; shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0; + shared_memory_and_edram_ranges[1].RangeType = + D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + shared_memory_and_edram_ranges[1].NumDescriptors = 1; + shared_memory_and_edram_ranges[1].BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); + shared_memory_and_edram_ranges[1].RegisterSpace = 0; + shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; if (IsROVUsedForEDRAM()) { ++parameter.DescriptorTable.NumDescriptorRanges; - shared_memory_and_edram_ranges[1].RangeType = + shared_memory_and_edram_ranges[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - shared_memory_and_edram_ranges[1].NumDescriptors = 1; - shared_memory_and_edram_ranges[1].BaseShaderRegister = 0; - shared_memory_and_edram_ranges[1].RegisterSpace = 0; - shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; + shared_memory_and_edram_ranges[2].NumDescriptors = 1; + shared_memory_and_edram_ranges[2].BaseShaderRegister = + UINT(DxbcShaderTranslator::UAVRegister::kEDRAM); + shared_memory_and_edram_ranges[2].RegisterSpace = 0; + shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2; } } @@ -1335,7 +1343,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Update system constants before uploading them. UpdateSystemConstantValues( - primitive_type, + memexport_used, primitive_type, indexed ? index_buffer_info->endianness : Endian::kUnspecified, color_mask, pipeline_render_targets); @@ -1535,7 +1543,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, shared_memory_->GetGPUAddress() + index_base; index_buffer_view.SizeInBytes = index_buffer_size; } - shared_memory_->UseForReading(); + if (memexport_used) { + shared_memory_->UseForWriting(); + } else { + shared_memory_->UseForReading(); + } command_list->IASetIndexBuffer(&index_buffer_view); SubmitBarriers(); if (adaptive_tessellation) { @@ -1550,7 +1562,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address = primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count, converted_index_count); - shared_memory_->UseForReading(); + if (memexport_used) { + shared_memory_->UseForWriting(); + } else { + shared_memory_->UseForReading(); + } SubmitBarriers(); if (conversion_gpu_address) { D3D12_INDEX_BUFFER_VIEW index_buffer_view; @@ -1564,6 +1580,18 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } } + if (memexport_used) { + // Commit shared memory writing. + PushUAVBarrier(shared_memory_->GetBuffer()); + // Invalidate textures in memexported memory and watch for changes. + for (uint32_t i = 0; i < memexport_range_count; ++i) { + const MemExportRange& memexport_range = memexport_ranges[i]; + shared_memory_->RangeWrittenByGPU( + memexport_range.base_address_dwords << 2, + memexport_range.size_dwords << 2); + } + } + // TODO(Triang3l): Read back memexported data if the respective gflag is set. return true; @@ -1868,7 +1896,8 @@ void D3D12CommandProcessor::UpdateFixedFunctionState( } void D3D12CommandProcessor::UpdateSystemConstantValues( - PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask, + bool shared_memory_is_uav, PrimitiveType primitive_type, + Endian index_endian, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { auto& regs = *register_file_; @@ -1966,6 +1995,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Flags. uint32_t flags = 0; + // Whether shared memory is an SRV or a UAV. Because a resource can't be in a + // read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in + // the pipeline uses memexport, the shared memory buffer must be a UAV. + if (shared_memory_is_uav) { + flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV; + } // W0 division control. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. @@ -2730,9 +2765,9 @@ bool D3D12CommandProcessor::UpdateBindings( if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } - // All the constants + shared memory + textures. + // All the constants + shared memory SRV and UAV + textures. uint32_t view_count_full_update = - 6 + texture_count_vertex + texture_count_pixel; + 7 + texture_count_vertex + texture_count_pixel; if (IsROVUsedForEDRAM()) { // + EDRAM UAV. ++view_count_full_update; @@ -2779,10 +2814,13 @@ bool D3D12CommandProcessor::UpdateBindings( write_textures_pixel = texture_count_pixel != 0; texture_bindings_written_vertex_ = false; texture_bindings_written_pixel_ = false; - // If updating fully, write the shared memory descriptor (t0) and, if - // needed, the EDRAM descriptor (u0). - shared_memory_->CreateSRV(view_cpu_handle); + // If updating fully, write the shared memory SRV and UAV descriptors and, + // if needed, the EDRAM descriptor. gpu_handle_shared_memory_and_edram_ = view_gpu_handle; + shared_memory_->CreateSRV(view_cpu_handle); + view_cpu_handle.ptr += descriptor_size_view; + view_gpu_handle.ptr += descriptor_size_view; + shared_memory_->CreateRawUAV(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; if (IsROVUsedForEDRAM()) { diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 329277ee3..74ed2e02b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -202,7 +202,8 @@ class D3D12CommandProcessor : public CommandProcessor { void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list); void UpdateSystemConstantValues( - PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask, + bool shared_memory_is_uav, PrimitiveType primitive_type, + Endian index_endian, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]); bool UpdateBindings(ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 510288517..63f99358f 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -3003,10 +3003,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Bound resource count (samplers, SRV, UAV, CBV). uint32_t resource_count = cbuffer_count_; if (!is_depth_only_pixel_shader_) { - // + 1 for shared memory (vfetches can probably appear in pixel shaders too, - // they are handled safely there anyway). + // + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders + // too, and the UAV is needed for memexport, however, the choice between + // SRV and UAV is per-pipeline, not per-shader - a resource can't be in a + // read-only state (SRV, IBV) if it's in a read/write state such as UAV). resource_count += - uint32_t(sampler_bindings_.size()) + 1 + uint32_t(texture_srvs_.size()); + uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size()); } if (IsDxbcPixelShader() && edram_rov_used_) { // EDRAM. @@ -3318,20 +3320,23 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) * sizeof(uint32_t); uint32_t sampler_name_offset = 0; - uint32_t shared_memory_name_offset = 0; + uint32_t shared_memory_srv_name_offset = 0; uint32_t texture_name_offset = 0; + uint32_t shared_memory_uav_name_offset = 0; if (!is_depth_only_pixel_shader_) { sampler_name_offset = new_offset; for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { new_offset += AppendString(shader_object_, sampler_bindings_[i].name.c_str()); } - shared_memory_name_offset = new_offset; - new_offset += AppendString(shader_object_, "xe_shared_memory"); + shared_memory_srv_name_offset = new_offset; + new_offset += AppendString(shader_object_, "xe_shared_memory_srv"); texture_name_offset = new_offset; for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str()); } + shared_memory_uav_name_offset = new_offset; + new_offset += AppendString(shader_object_, "xe_shared_memory_uav"); } uint32_t edram_name_offset = new_offset; if (IsDxbcPixelShader() && edram_rov_used_) { @@ -3367,8 +3372,8 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { sampler_name_offset += GetStringLength(sampler_binding.name.c_str()); } - // Shared memory. - shader_object_.push_back(shared_memory_name_offset); + // Shared memory (when memexport isn't used in the pipeline). + shader_object_.push_back(shared_memory_srv_name_offset); // D3D_SIT_BYTEADDRESS. shader_object_.push_back(7); // D3D_RETURN_TYPE_MIXED. @@ -3422,6 +3427,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(1 + i); texture_name_offset += GetStringLength(texture_srv.name.c_str()); } + + // Shared memory (when memexport is used in the pipeline). + shader_object_.push_back(shared_memory_uav_name_offset); + // D3D_SIT_UAV_RWBYTEADDRESS. + shader_object_.push_back(8); + // D3D_RETURN_TYPE_MIXED. + shader_object_.push_back(6); + // D3D_UAV_DIMENSION_BUFFER. + shader_object_.push_back(1); + // Multisampling not applicable. + shader_object_.push_back(0); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); + // One binding. + shader_object_.push_back(1); + // No D3D_SHADER_INPUT_FLAGS. + shader_object_.push_back(0); + // Register space 0. + shader_object_.push_back(0); + // UAV ID U0. + shader_object_.push_back(0); } if (IsDxbcPixelShader() && edram_rov_used_) { @@ -3435,16 +3460,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(1); // Not multisampled. shader_object_.push_back(0xFFFFFFFFu); - // Register u0. - shader_object_.push_back(0); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); // One binding. shader_object_.push_back(1); // No D3D_SHADER_INPUT_FLAGS. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); - // UAV ID U0. - shader_object_.push_back(0); + // UAV ID U1 or U0 depending on whether there's U0. + shader_object_.push_back(GetEDRAMUAVIndex()); } // Constant buffers. @@ -3980,7 +4004,6 @@ void DxbcShaderTranslator::WriteShaderCode() { shader_object_.push_back(0); shader_object_.push_back(0); shader_object_.push_back(0); - // Textures. for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { const TextureSRV& texture_srv = texture_srvs_[i]; @@ -4015,8 +4038,21 @@ void DxbcShaderTranslator::WriteShaderCode() { } // Unordered access views. + if (!is_depth_only_pixel_shader_) { + // Shared memory RWByteAddressBuffer. + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE( + D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_object_.push_back(EncodeVectorSwizzledOperand( + D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); + shader_object_.push_back(0); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); + shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); + shader_object_.push_back(0); + } if (IsDxbcPixelShader() && edram_rov_used_) { - // EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0). + // EDRAM uint32 rasterizer-ordered buffer. shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE( D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) | @@ -4025,9 +4061,9 @@ void DxbcShaderTranslator::WriteShaderCode() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_object_.push_back(EncodeVectorSwizzledOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3)); - shader_object_.push_back(0); - shader_object_.push_back(0); - shader_object_.push_back(0); + shader_object_.push_back(GetEDRAMUAVIndex()); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); + shader_object_.push_back(uint32_t(UAVRegister::kEDRAM)); shader_object_.push_back( ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) | diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index be7c3dfa0..df004ffc8 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -47,6 +47,7 @@ class DxbcShaderTranslator : public ShaderTranslator { }; enum : uint32_t { + kSysFlag_SharedMemoryIsUAV_Shift, kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, @@ -70,6 +71,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_Color2Gamma_Shift, kSysFlag_Color3Gamma_Shift, + kSysFlag_SharedMemoryIsUAV = 1u << kSysFlag_SharedMemoryIsUAV_Shift, kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, @@ -482,6 +484,12 @@ class DxbcShaderTranslator : public ShaderTranslator { return sampler_bindings_.data(); } + // Unordered access view bindings in space 0. + enum class UAVRegister { + kSharedMemory, + kEDRAM, + }; + // Returns the bits that need to be added to the RT flags constant - needs to // be done externally, not in SetColorFormatConstants, because the flags // contain other state. @@ -829,6 +837,11 @@ class DxbcShaderTranslator : public ShaderTranslator { // any conditions. void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma); void CompletePixelShader_WriteToRTVs(); + inline uint32_t GetEDRAMUAVIndex() const { + // xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when + // there's no xe_shared_memory_uav, it's U0. + return is_depth_only_pixel_shader_ ? 0 : 1; + } // Performs depth/stencil testing. After the test, coverage_out_temp will // contain non-zero values for samples that passed the depth/stencil test and // are included in SV_Coverage, and zeros for those who didn't. diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 54e28b7a3..5a1ad79f6 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -409,6 +409,56 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( ++stat_.int_instruction_count; } + // Select whether shared memory is an SRV or a UAV (depending on whether + // memexport is used in the pipeline) - check the flag. + system_constants_used_ |= 1ull << kSysConst_Flags_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_Flags_Vec); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(kSysFlag_SharedMemoryIsUAV); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_NONZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(system_temp_pv_); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + // Load the vertex data from the shared memory at U0. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, (1 << load_dword_count) - 1, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, + kSwizzleXYZW & ((1 << (load_dword_count * 2)) - 1), 2)); + shader_code_.push_back(0); + shader_code_.push_back(uint32_t(UAVRegister::kSharedMemory)); + ++stat_.instruction_count; + ++stat_.texture_load_instructions; + + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + // Load the vertex data from the shared memory at T0, register t0. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); @@ -426,6 +476,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( ++stat_.instruction_count; ++stat_.texture_load_instructions; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + // Byte swap the data. SwapVertexData(vfetch_index, (1 << load_dword_count) - 1); diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index fe40f0f32..2a7b3460f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -1498,8 +1498,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( shader_code_.push_back(edram_dword_offset_temp); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2)); - shader_code_.push_back(0); - shader_code_.push_back(0); + shader_code_.push_back(GetEDRAMUAVIndex()); + shader_code_.push_back(uint32_t(UAVRegister::kEDRAM)); ++stat_.instruction_count; ++stat_.texture_load_instructions; @@ -2351,8 +2351,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); shader_code_.push_back(EncodeVectorMaskedOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2)); - shader_code_.push_back(0); - shader_code_.push_back(0); + shader_code_.push_back(GetEDRAMUAVIndex()); + shader_code_.push_back(uint32_t(UAVRegister::kEDRAM)); shader_code_.push_back( EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); shader_code_.push_back(edram_dword_offset_temp); @@ -5046,8 +5046,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { shader_code_.push_back(edram_coord_sample_temp); shader_code_.push_back(EncodeVectorReplicatedOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2)); - shader_code_.push_back(0); - shader_code_.push_back(0); + shader_code_.push_back(GetEDRAMUAVIndex()); + shader_code_.push_back(uint32_t(UAVRegister::kEDRAM)); ++stat_.instruction_count; ++stat_.texture_load_instructions; @@ -5403,8 +5403,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); shader_code_.push_back(EncodeVectorMaskedOperand( D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2)); - shader_code_.push_back(0); - shader_code_.push_back(0); + shader_code_.push_back(GetEDRAMUAVIndex()); + shader_code_.push_back(uint32_t(UAVRegister::kEDRAM)); shader_code_.push_back( EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, k, 1)); shader_code_.push_back(edram_coord_sample_temp);