[D3D12] Bind shared memory as UAV with memexport

This commit is contained in:
Triang3l 2018-12-22 15:39:47 +03:00
parent 645f450321
commit e803ee84d5
6 changed files with 184 additions and 42 deletions

View File

@ -288,11 +288,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
}
// Shared memory and, if ROVs are used, EDRAM.
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[2];
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3];
{
auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM];
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
parameter.DescriptorTable.NumDescriptorRanges = 1;
parameter.DescriptorTable.NumDescriptorRanges = 2;
parameter.DescriptorTable.pDescriptorRanges =
shared_memory_and_edram_ranges;
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
@ -302,14 +302,22 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
shared_memory_and_edram_ranges[0].BaseShaderRegister = 0;
shared_memory_and_edram_ranges[0].RegisterSpace = 0;
shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0;
shared_memory_and_edram_ranges[1].RangeType =
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
shared_memory_and_edram_ranges[1].BaseShaderRegister =
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
if (IsROVUsedForEDRAM()) {
++parameter.DescriptorTable.NumDescriptorRanges;
shared_memory_and_edram_ranges[1].RangeType =
shared_memory_and_edram_ranges[2].RangeType =
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
shared_memory_and_edram_ranges[1].BaseShaderRegister = 0;
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
shared_memory_and_edram_ranges[2].NumDescriptors = 1;
shared_memory_and_edram_ranges[2].BaseShaderRegister =
UINT(DxbcShaderTranslator::UAVRegister::kEDRAM);
shared_memory_and_edram_ranges[2].RegisterSpace = 0;
shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2;
}
}
@ -1335,7 +1343,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Update system constants before uploading them.
UpdateSystemConstantValues(
primitive_type,
memexport_used, primitive_type,
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
color_mask, pipeline_render_targets);
@ -1535,7 +1543,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
shared_memory_->GetGPUAddress() + index_base;
index_buffer_view.SizeInBytes = index_buffer_size;
}
shared_memory_->UseForReading();
if (memexport_used) {
shared_memory_->UseForWriting();
} else {
shared_memory_->UseForReading();
}
command_list->IASetIndexBuffer(&index_buffer_view);
SubmitBarriers();
if (adaptive_tessellation) {
@ -1550,7 +1562,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address =
primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count,
converted_index_count);
shared_memory_->UseForReading();
if (memexport_used) {
shared_memory_->UseForWriting();
} else {
shared_memory_->UseForReading();
}
SubmitBarriers();
if (conversion_gpu_address) {
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
@ -1564,6 +1580,18 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
}
}
if (memexport_used) {
// Commit shared memory writing.
PushUAVBarrier(shared_memory_->GetBuffer());
// Invalidate textures in memexported memory and watch for changes.
for (uint32_t i = 0; i < memexport_range_count; ++i) {
const MemExportRange& memexport_range = memexport_ranges[i];
shared_memory_->RangeWrittenByGPU(
memexport_range.base_address_dwords << 2,
memexport_range.size_dwords << 2);
}
}
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
return true;
@ -1868,7 +1896,8 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
}
void D3D12CommandProcessor::UpdateSystemConstantValues(
PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask,
bool shared_memory_is_uav, PrimitiveType primitive_type,
Endian index_endian, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
@ -1966,6 +1995,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Flags.
uint32_t flags = 0;
// Whether shared memory is an SRV or a UAV. Because a resource can't be in a
// read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in
// the pipeline uses memexport, the shared memory buffer must be a UAV.
if (shared_memory_is_uav) {
flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV;
}
// W0 division control.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
@ -2730,9 +2765,9 @@ bool D3D12CommandProcessor::UpdateBindings(
if (write_textures_pixel) {
view_count_partial_update += texture_count_pixel;
}
// All the constants + shared memory + textures.
// All the constants + shared memory SRV and UAV + textures.
uint32_t view_count_full_update =
6 + texture_count_vertex + texture_count_pixel;
7 + texture_count_vertex + texture_count_pixel;
if (IsROVUsedForEDRAM()) {
// + EDRAM UAV.
++view_count_full_update;
@ -2779,10 +2814,13 @@ bool D3D12CommandProcessor::UpdateBindings(
write_textures_pixel = texture_count_pixel != 0;
texture_bindings_written_vertex_ = false;
texture_bindings_written_pixel_ = false;
// If updating fully, write the shared memory descriptor (t0) and, if
// needed, the EDRAM descriptor (u0).
shared_memory_->CreateSRV(view_cpu_handle);
// If updating fully, write the shared memory SRV and UAV descriptors and,
// if needed, the EDRAM descriptor.
gpu_handle_shared_memory_and_edram_ = view_gpu_handle;
shared_memory_->CreateSRV(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
shared_memory_->CreateRawUAV(view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
if (IsROVUsedForEDRAM()) {

View File

@ -202,7 +202,8 @@ class D3D12CommandProcessor : public CommandProcessor {
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
void UpdateSystemConstantValues(
PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask,
bool shared_memory_is_uav, PrimitiveType primitive_type,
Endian index_endian, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
const D3D12Shader* vertex_shader,

View File

@ -3003,10 +3003,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Bound resource count (samplers, SRV, UAV, CBV).
uint32_t resource_count = cbuffer_count_;
if (!is_depth_only_pixel_shader_) {
// + 1 for shared memory (vfetches can probably appear in pixel shaders too,
// they are handled safely there anyway).
// + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders
// too, and the UAV is needed for memexport, however, the choice between
// SRV and UAV is per-pipeline, not per-shader - a resource can't be in a
// read-only state (SRV, IBV) if it's in a read/write state such as UAV).
resource_count +=
uint32_t(sampler_bindings_.size()) + 1 + uint32_t(texture_srvs_.size());
uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size());
}
if (IsDxbcPixelShader() && edram_rov_used_) {
// EDRAM.
@ -3318,20 +3320,23 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
sizeof(uint32_t);
uint32_t sampler_name_offset = 0;
uint32_t shared_memory_name_offset = 0;
uint32_t shared_memory_srv_name_offset = 0;
uint32_t texture_name_offset = 0;
uint32_t shared_memory_uav_name_offset = 0;
if (!is_depth_only_pixel_shader_) {
sampler_name_offset = new_offset;
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
new_offset +=
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
}
shared_memory_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_shared_memory");
shared_memory_srv_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_shared_memory_srv");
texture_name_offset = new_offset;
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
}
shared_memory_uav_name_offset = new_offset;
new_offset += AppendString(shader_object_, "xe_shared_memory_uav");
}
uint32_t edram_name_offset = new_offset;
if (IsDxbcPixelShader() && edram_rov_used_) {
@ -3367,8 +3372,8 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
sampler_name_offset += GetStringLength(sampler_binding.name.c_str());
}
// Shared memory.
shader_object_.push_back(shared_memory_name_offset);
// Shared memory (when memexport isn't used in the pipeline).
shader_object_.push_back(shared_memory_srv_name_offset);
// D3D_SIT_BYTEADDRESS.
shader_object_.push_back(7);
// D3D_RETURN_TYPE_MIXED.
@ -3422,6 +3427,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(1 + i);
texture_name_offset += GetStringLength(texture_srv.name.c_str());
}
// Shared memory (when memexport is used in the pipeline).
shader_object_.push_back(shared_memory_uav_name_offset);
// D3D_SIT_UAV_RWBYTEADDRESS.
shader_object_.push_back(8);
// D3D_RETURN_TYPE_MIXED.
shader_object_.push_back(6);
// D3D_UAV_DIMENSION_BUFFER.
shader_object_.push_back(1);
// Multisampling not applicable.
shader_object_.push_back(0);
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
// One binding.
shader_object_.push_back(1);
// No D3D_SHADER_INPUT_FLAGS.
shader_object_.push_back(0);
// Register space 0.
shader_object_.push_back(0);
// UAV ID U0.
shader_object_.push_back(0);
}
if (IsDxbcPixelShader() && edram_rov_used_) {
@ -3435,16 +3460,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(1);
// Not multisampled.
shader_object_.push_back(0xFFFFFFFFu);
// Register u0.
shader_object_.push_back(0);
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
// One binding.
shader_object_.push_back(1);
// No D3D_SHADER_INPUT_FLAGS.
shader_object_.push_back(0);
// Register space 0.
shader_object_.push_back(0);
// UAV ID U0.
shader_object_.push_back(0);
// UAV ID U1 or U0 depending on whether there's U0.
shader_object_.push_back(GetEDRAMUAVIndex());
}
// Constant buffers.
@ -3980,7 +4004,6 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(0);
shader_object_.push_back(0);
shader_object_.push_back(0);
// Textures.
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
const TextureSRV& texture_srv = texture_srvs_[i];
@ -4015,8 +4038,21 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
// Unordered access views.
if (!is_depth_only_pixel_shader_) {
// Shared memory RWByteAddressBuffer.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
shader_object_.push_back(0);
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
shader_object_.push_back(0);
}
if (IsDxbcPixelShader() && edram_rov_used_) {
// EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0).
// EDRAM uint32 rasterizer-ordered buffer.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
@ -4025,9 +4061,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
shader_object_.push_back(0);
shader_object_.push_back(0);
shader_object_.push_back(0);
shader_object_.push_back(GetEDRAMUAVIndex());
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_object_.push_back(
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) |
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) |

View File

@ -47,6 +47,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
enum : uint32_t {
kSysFlag_SharedMemoryIsUAV_Shift,
kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift,
@ -70,6 +71,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kSysFlag_Color2Gamma_Shift,
kSysFlag_Color3Gamma_Shift,
kSysFlag_SharedMemoryIsUAV = 1u << kSysFlag_SharedMemoryIsUAV_Shift,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
@ -482,6 +484,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
return sampler_bindings_.data();
}
// Unordered access view bindings in space 0.
enum class UAVRegister {
kSharedMemory,
kEDRAM,
};
// Returns the bits that need to be added to the RT flags constant - needs to
// be done externally, not in SetColorFormatConstants, because the flags
// contain other state.
@ -829,6 +837,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
// any conditions.
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
void CompletePixelShader_WriteToRTVs();
inline uint32_t GetEDRAMUAVIndex() const {
// xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when
// there's no xe_shared_memory_uav, it's U0.
return is_depth_only_pixel_shader_ ? 0 : 1;
}
// Performs depth/stencil testing. After the test, coverage_out_temp will
// contain non-zero values for samples that passed the depth/stencil test and
// are included in SV_Coverage, and zeros for those who didn't.

View File

@ -409,6 +409,56 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
++stat_.int_instruction_count;
}
// Select whether shared memory is an SRV or a UAV (depending on whether
// memexport is used in the pipeline) - check the flag.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_Flags_Vec);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(kSysFlag_SharedMemoryIsUAV);
++stat_.instruction_count;
++stat_.uint_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Load the vertex data from the shared memory at U0.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, (1 << load_dword_count) - 1, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW,
kSwizzleXYZW & ((1 << (load_dword_count * 2)) - 1), 2));
shader_code_.push_back(0);
shader_code_.push_back(uint32_t(UAVRegister::kSharedMemory));
++stat_.instruction_count;
++stat_.texture_load_instructions;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Load the vertex data from the shared memory at T0, register t0.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
@ -426,6 +476,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
++stat_.instruction_count;
++stat_.texture_load_instructions;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Byte swap the data.
SwapVertexData(vfetch_index, (1 << load_dword_count) - 1);

View File

@ -1498,8 +1498,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
shader_code_.push_back(edram_dword_offset_temp);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(GetEDRAMUAVIndex());
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
++stat_.instruction_count;
++stat_.texture_load_instructions;
@ -2351,8 +2351,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(GetEDRAMUAVIndex());
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(edram_dword_offset_temp);
@ -5046,8 +5046,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(edram_coord_sample_temp);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(GetEDRAMUAVIndex());
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
++stat_.instruction_count;
++stat_.texture_load_instructions;
@ -5403,8 +5403,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(GetEDRAMUAVIndex());
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, k, 1));
shader_code_.push_back(edram_coord_sample_temp);