[D3D12] Bind shared memory as UAV with memexport
This commit is contained in:
parent
645f450321
commit
e803ee84d5
|
@ -288,11 +288,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shared memory and, if ROVs are used, EDRAM.
|
// Shared memory and, if ROVs are used, EDRAM.
|
||||||
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[2];
|
D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3];
|
||||||
{
|
{
|
||||||
auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM];
|
auto& parameter = parameters[kRootParameter_SharedMemoryAndEDRAM];
|
||||||
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||||
parameter.DescriptorTable.NumDescriptorRanges = 1;
|
parameter.DescriptorTable.NumDescriptorRanges = 2;
|
||||||
parameter.DescriptorTable.pDescriptorRanges =
|
parameter.DescriptorTable.pDescriptorRanges =
|
||||||
shared_memory_and_edram_ranges;
|
shared_memory_and_edram_ranges;
|
||||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||||
|
@ -302,14 +302,22 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
||||||
shared_memory_and_edram_ranges[0].BaseShaderRegister = 0;
|
shared_memory_and_edram_ranges[0].BaseShaderRegister = 0;
|
||||||
shared_memory_and_edram_ranges[0].RegisterSpace = 0;
|
shared_memory_and_edram_ranges[0].RegisterSpace = 0;
|
||||||
shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||||
if (IsROVUsedForEDRAM()) {
|
|
||||||
++parameter.DescriptorTable.NumDescriptorRanges;
|
|
||||||
shared_memory_and_edram_ranges[1].RangeType =
|
shared_memory_and_edram_ranges[1].RangeType =
|
||||||
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||||
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
|
shared_memory_and_edram_ranges[1].NumDescriptors = 1;
|
||||||
shared_memory_and_edram_ranges[1].BaseShaderRegister = 0;
|
shared_memory_and_edram_ranges[1].BaseShaderRegister =
|
||||||
|
UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory);
|
||||||
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
|
shared_memory_and_edram_ranges[1].RegisterSpace = 0;
|
||||||
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||||
|
if (IsROVUsedForEDRAM()) {
|
||||||
|
++parameter.DescriptorTable.NumDescriptorRanges;
|
||||||
|
shared_memory_and_edram_ranges[2].RangeType =
|
||||||
|
D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||||
|
shared_memory_and_edram_ranges[2].NumDescriptors = 1;
|
||||||
|
shared_memory_and_edram_ranges[2].BaseShaderRegister =
|
||||||
|
UINT(DxbcShaderTranslator::UAVRegister::kEDRAM);
|
||||||
|
shared_memory_and_edram_ranges[2].RegisterSpace = 0;
|
||||||
|
shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1335,7 +1343,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
|
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(
|
UpdateSystemConstantValues(
|
||||||
primitive_type,
|
memexport_used, primitive_type,
|
||||||
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
|
indexed ? index_buffer_info->endianness : Endian::kUnspecified,
|
||||||
color_mask, pipeline_render_targets);
|
color_mask, pipeline_render_targets);
|
||||||
|
|
||||||
|
@ -1535,7 +1543,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
shared_memory_->GetGPUAddress() + index_base;
|
shared_memory_->GetGPUAddress() + index_base;
|
||||||
index_buffer_view.SizeInBytes = index_buffer_size;
|
index_buffer_view.SizeInBytes = index_buffer_size;
|
||||||
}
|
}
|
||||||
|
if (memexport_used) {
|
||||||
|
shared_memory_->UseForWriting();
|
||||||
|
} else {
|
||||||
shared_memory_->UseForReading();
|
shared_memory_->UseForReading();
|
||||||
|
}
|
||||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||||
SubmitBarriers();
|
SubmitBarriers();
|
||||||
if (adaptive_tessellation) {
|
if (adaptive_tessellation) {
|
||||||
|
@ -1550,7 +1562,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address =
|
D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address =
|
||||||
primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count,
|
primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count,
|
||||||
converted_index_count);
|
converted_index_count);
|
||||||
|
if (memexport_used) {
|
||||||
|
shared_memory_->UseForWriting();
|
||||||
|
} else {
|
||||||
shared_memory_->UseForReading();
|
shared_memory_->UseForReading();
|
||||||
|
}
|
||||||
SubmitBarriers();
|
SubmitBarriers();
|
||||||
if (conversion_gpu_address) {
|
if (conversion_gpu_address) {
|
||||||
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
|
||||||
|
@ -1564,6 +1580,18 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (memexport_used) {
|
||||||
|
// Commit shared memory writing.
|
||||||
|
PushUAVBarrier(shared_memory_->GetBuffer());
|
||||||
|
// Invalidate textures in memexported memory and watch for changes.
|
||||||
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
||||||
|
shared_memory_->RangeWrittenByGPU(
|
||||||
|
memexport_range.base_address_dwords << 2,
|
||||||
|
memexport_range.size_dwords << 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
|
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1868,7 +1896,8 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
|
||||||
}
|
}
|
||||||
|
|
||||||
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask,
|
bool shared_memory_is_uav, PrimitiveType primitive_type,
|
||||||
|
Endian index_endian, uint32_t color_mask,
|
||||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
|
@ -1966,6 +1995,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
|
|
||||||
// Flags.
|
// Flags.
|
||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
|
// Whether shared memory is an SRV or a UAV. Because a resource can't be in a
|
||||||
|
// read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in
|
||||||
|
// the pipeline uses memexport, the shared memory buffer must be a UAV.
|
||||||
|
if (shared_memory_is_uav) {
|
||||||
|
flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV;
|
||||||
|
}
|
||||||
// W0 division control.
|
// W0 division control.
|
||||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
||||||
|
@ -2730,9 +2765,9 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
if (write_textures_pixel) {
|
if (write_textures_pixel) {
|
||||||
view_count_partial_update += texture_count_pixel;
|
view_count_partial_update += texture_count_pixel;
|
||||||
}
|
}
|
||||||
// All the constants + shared memory + textures.
|
// All the constants + shared memory SRV and UAV + textures.
|
||||||
uint32_t view_count_full_update =
|
uint32_t view_count_full_update =
|
||||||
6 + texture_count_vertex + texture_count_pixel;
|
7 + texture_count_vertex + texture_count_pixel;
|
||||||
if (IsROVUsedForEDRAM()) {
|
if (IsROVUsedForEDRAM()) {
|
||||||
// + EDRAM UAV.
|
// + EDRAM UAV.
|
||||||
++view_count_full_update;
|
++view_count_full_update;
|
||||||
|
@ -2779,10 +2814,13 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
write_textures_pixel = texture_count_pixel != 0;
|
write_textures_pixel = texture_count_pixel != 0;
|
||||||
texture_bindings_written_vertex_ = false;
|
texture_bindings_written_vertex_ = false;
|
||||||
texture_bindings_written_pixel_ = false;
|
texture_bindings_written_pixel_ = false;
|
||||||
// If updating fully, write the shared memory descriptor (t0) and, if
|
// If updating fully, write the shared memory SRV and UAV descriptors and,
|
||||||
// needed, the EDRAM descriptor (u0).
|
// if needed, the EDRAM descriptor.
|
||||||
shared_memory_->CreateSRV(view_cpu_handle);
|
|
||||||
gpu_handle_shared_memory_and_edram_ = view_gpu_handle;
|
gpu_handle_shared_memory_and_edram_ = view_gpu_handle;
|
||||||
|
shared_memory_->CreateSRV(view_cpu_handle);
|
||||||
|
view_cpu_handle.ptr += descriptor_size_view;
|
||||||
|
view_gpu_handle.ptr += descriptor_size_view;
|
||||||
|
shared_memory_->CreateRawUAV(view_cpu_handle);
|
||||||
view_cpu_handle.ptr += descriptor_size_view;
|
view_cpu_handle.ptr += descriptor_size_view;
|
||||||
view_gpu_handle.ptr += descriptor_size_view;
|
view_gpu_handle.ptr += descriptor_size_view;
|
||||||
if (IsROVUsedForEDRAM()) {
|
if (IsROVUsedForEDRAM()) {
|
||||||
|
|
|
@ -202,7 +202,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
||||||
void UpdateSystemConstantValues(
|
void UpdateSystemConstantValues(
|
||||||
PrimitiveType primitive_type, Endian index_endian, uint32_t color_mask,
|
bool shared_memory_is_uav, PrimitiveType primitive_type,
|
||||||
|
Endian index_endian, uint32_t color_mask,
|
||||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
||||||
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
||||||
const D3D12Shader* vertex_shader,
|
const D3D12Shader* vertex_shader,
|
||||||
|
|
|
@ -3003,10 +3003,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||||
// Bound resource count (samplers, SRV, UAV, CBV).
|
// Bound resource count (samplers, SRV, UAV, CBV).
|
||||||
uint32_t resource_count = cbuffer_count_;
|
uint32_t resource_count = cbuffer_count_;
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (!is_depth_only_pixel_shader_) {
|
||||||
// + 1 for shared memory (vfetches can probably appear in pixel shaders too,
|
// + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders
|
||||||
// they are handled safely there anyway).
|
// too, and the UAV is needed for memexport, however, the choice between
|
||||||
|
// SRV and UAV is per-pipeline, not per-shader - a resource can't be in a
|
||||||
|
// read-only state (SRV, IBV) if it's in a read/write state such as UAV).
|
||||||
resource_count +=
|
resource_count +=
|
||||||
uint32_t(sampler_bindings_.size()) + 1 + uint32_t(texture_srvs_.size());
|
uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size());
|
||||||
}
|
}
|
||||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||||
// EDRAM.
|
// EDRAM.
|
||||||
|
@ -3318,20 +3320,23 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||||
sizeof(uint32_t);
|
sizeof(uint32_t);
|
||||||
uint32_t sampler_name_offset = 0;
|
uint32_t sampler_name_offset = 0;
|
||||||
uint32_t shared_memory_name_offset = 0;
|
uint32_t shared_memory_srv_name_offset = 0;
|
||||||
uint32_t texture_name_offset = 0;
|
uint32_t texture_name_offset = 0;
|
||||||
|
uint32_t shared_memory_uav_name_offset = 0;
|
||||||
if (!is_depth_only_pixel_shader_) {
|
if (!is_depth_only_pixel_shader_) {
|
||||||
sampler_name_offset = new_offset;
|
sampler_name_offset = new_offset;
|
||||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||||
new_offset +=
|
new_offset +=
|
||||||
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
|
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
|
||||||
}
|
}
|
||||||
shared_memory_name_offset = new_offset;
|
shared_memory_srv_name_offset = new_offset;
|
||||||
new_offset += AppendString(shader_object_, "xe_shared_memory");
|
new_offset += AppendString(shader_object_, "xe_shared_memory_srv");
|
||||||
texture_name_offset = new_offset;
|
texture_name_offset = new_offset;
|
||||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||||
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
|
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
|
||||||
}
|
}
|
||||||
|
shared_memory_uav_name_offset = new_offset;
|
||||||
|
new_offset += AppendString(shader_object_, "xe_shared_memory_uav");
|
||||||
}
|
}
|
||||||
uint32_t edram_name_offset = new_offset;
|
uint32_t edram_name_offset = new_offset;
|
||||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||||
|
@ -3367,8 +3372,8 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||||
sampler_name_offset += GetStringLength(sampler_binding.name.c_str());
|
sampler_name_offset += GetStringLength(sampler_binding.name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shared memory.
|
// Shared memory (when memexport isn't used in the pipeline).
|
||||||
shader_object_.push_back(shared_memory_name_offset);
|
shader_object_.push_back(shared_memory_srv_name_offset);
|
||||||
// D3D_SIT_BYTEADDRESS.
|
// D3D_SIT_BYTEADDRESS.
|
||||||
shader_object_.push_back(7);
|
shader_object_.push_back(7);
|
||||||
// D3D_RETURN_TYPE_MIXED.
|
// D3D_RETURN_TYPE_MIXED.
|
||||||
|
@ -3422,6 +3427,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||||
shader_object_.push_back(1 + i);
|
shader_object_.push_back(1 + i);
|
||||||
texture_name_offset += GetStringLength(texture_srv.name.c_str());
|
texture_name_offset += GetStringLength(texture_srv.name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shared memory (when memexport is used in the pipeline).
|
||||||
|
shader_object_.push_back(shared_memory_uav_name_offset);
|
||||||
|
// D3D_SIT_UAV_RWBYTEADDRESS.
|
||||||
|
shader_object_.push_back(8);
|
||||||
|
// D3D_RETURN_TYPE_MIXED.
|
||||||
|
shader_object_.push_back(6);
|
||||||
|
// D3D_UAV_DIMENSION_BUFFER.
|
||||||
|
shader_object_.push_back(1);
|
||||||
|
// Multisampling not applicable.
|
||||||
|
shader_object_.push_back(0);
|
||||||
|
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||||
|
// One binding.
|
||||||
|
shader_object_.push_back(1);
|
||||||
|
// No D3D_SHADER_INPUT_FLAGS.
|
||||||
|
shader_object_.push_back(0);
|
||||||
|
// Register space 0.
|
||||||
|
shader_object_.push_back(0);
|
||||||
|
// UAV ID U0.
|
||||||
|
shader_object_.push_back(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||||
|
@ -3435,16 +3460,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||||
shader_object_.push_back(1);
|
shader_object_.push_back(1);
|
||||||
// Not multisampled.
|
// Not multisampled.
|
||||||
shader_object_.push_back(0xFFFFFFFFu);
|
shader_object_.push_back(0xFFFFFFFFu);
|
||||||
// Register u0.
|
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
shader_object_.push_back(0);
|
|
||||||
// One binding.
|
// One binding.
|
||||||
shader_object_.push_back(1);
|
shader_object_.push_back(1);
|
||||||
// No D3D_SHADER_INPUT_FLAGS.
|
// No D3D_SHADER_INPUT_FLAGS.
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(0);
|
||||||
// Register space 0.
|
// Register space 0.
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(0);
|
||||||
// UAV ID U0.
|
// UAV ID U1 or U0 depending on whether there's U0.
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(GetEDRAMUAVIndex());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Constant buffers.
|
// Constant buffers.
|
||||||
|
@ -3980,7 +4004,6 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(0);
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(0);
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(0);
|
||||||
|
|
||||||
// Textures.
|
// Textures.
|
||||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||||
const TextureSRV& texture_srv = texture_srvs_[i];
|
const TextureSRV& texture_srv = texture_srvs_[i];
|
||||||
|
@ -4015,8 +4038,21 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unordered access views.
|
// Unordered access views.
|
||||||
|
if (!is_depth_only_pixel_shader_) {
|
||||||
|
// Shared memory RWByteAddressBuffer.
|
||||||
|
shader_object_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||||
|
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||||
|
shader_object_.push_back(0);
|
||||||
|
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||||
|
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||||
|
shader_object_.push_back(0);
|
||||||
|
}
|
||||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||||
// EDRAM uint32 rasterizer-ordered buffer (U0, at u0, space0).
|
// EDRAM uint32 rasterizer-ordered buffer.
|
||||||
shader_object_.push_back(
|
shader_object_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
|
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
|
||||||
|
@ -4025,9 +4061,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(GetEDRAMUAVIndex());
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
shader_object_.push_back(0);
|
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
shader_object_.push_back(
|
shader_object_.push_back(
|
||||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) |
|
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) |
|
||||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) |
|
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) |
|
||||||
|
|
|
@ -47,6 +47,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum : uint32_t {
|
enum : uint32_t {
|
||||||
|
kSysFlag_SharedMemoryIsUAV_Shift,
|
||||||
kSysFlag_XYDividedByW_Shift,
|
kSysFlag_XYDividedByW_Shift,
|
||||||
kSysFlag_ZDividedByW_Shift,
|
kSysFlag_ZDividedByW_Shift,
|
||||||
kSysFlag_WNotReciprocal_Shift,
|
kSysFlag_WNotReciprocal_Shift,
|
||||||
|
@ -70,6 +71,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_Color2Gamma_Shift,
|
kSysFlag_Color2Gamma_Shift,
|
||||||
kSysFlag_Color3Gamma_Shift,
|
kSysFlag_Color3Gamma_Shift,
|
||||||
|
|
||||||
|
kSysFlag_SharedMemoryIsUAV = 1u << kSysFlag_SharedMemoryIsUAV_Shift,
|
||||||
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
|
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
|
||||||
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
|
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
|
||||||
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
||||||
|
@ -482,6 +484,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
return sampler_bindings_.data();
|
return sampler_bindings_.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Unordered access view bindings in space 0.
|
||||||
|
enum class UAVRegister {
|
||||||
|
kSharedMemory,
|
||||||
|
kEDRAM,
|
||||||
|
};
|
||||||
|
|
||||||
// Returns the bits that need to be added to the RT flags constant - needs to
|
// Returns the bits that need to be added to the RT flags constant - needs to
|
||||||
// be done externally, not in SetColorFormatConstants, because the flags
|
// be done externally, not in SetColorFormatConstants, because the flags
|
||||||
// contain other state.
|
// contain other state.
|
||||||
|
@ -829,6 +837,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// any conditions.
|
// any conditions.
|
||||||
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
|
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
|
||||||
void CompletePixelShader_WriteToRTVs();
|
void CompletePixelShader_WriteToRTVs();
|
||||||
|
inline uint32_t GetEDRAMUAVIndex() const {
|
||||||
|
// xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when
|
||||||
|
// there's no xe_shared_memory_uav, it's U0.
|
||||||
|
return is_depth_only_pixel_shader_ ? 0 : 1;
|
||||||
|
}
|
||||||
// Performs depth/stencil testing. After the test, coverage_out_temp will
|
// Performs depth/stencil testing. After the test, coverage_out_temp will
|
||||||
// contain non-zero values for samples that passed the depth/stencil test and
|
// contain non-zero values for samples that passed the depth/stencil test and
|
||||||
// are included in SV_Coverage, and zeros for those who didn't.
|
// are included in SV_Coverage, and zeros for those who didn't.
|
||||||
|
|
|
@ -409,6 +409,56 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
||||||
++stat_.int_instruction_count;
|
++stat_.int_instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Select whether shared memory is an SRV or a UAV (depending on whether
|
||||||
|
// memexport is used in the pipeline) - check the flag.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_Flags_Vec);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(kSysFlag_SharedMemoryIsUAV);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
// Load the vertex data from the shared memory at U0.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, (1 << load_dword_count) - 1, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(system_temp_pv_);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW,
|
||||||
|
kSwizzleXYZW & ((1 << (load_dword_count * 2)) - 1), 2));
|
||||||
|
shader_code_.push_back(0);
|
||||||
|
shader_code_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.texture_load_instructions;
|
||||||
|
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
// Load the vertex data from the shared memory at T0, register t0.
|
// Load the vertex data from the shared memory at T0, register t0.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_LD_RAW) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
@ -426,6 +476,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.texture_load_instructions;
|
++stat_.texture_load_instructions;
|
||||||
|
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
// Byte swap the data.
|
// Byte swap the data.
|
||||||
SwapVertexData(vfetch_index, (1 << load_dword_count) - 1);
|
SwapVertexData(vfetch_index, (1 << load_dword_count) - 1);
|
||||||
|
|
||||||
|
|
|
@ -1498,8 +1498,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
shader_code_.push_back(edram_dword_offset_temp);
|
shader_code_.push_back(edram_dword_offset_temp);
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(GetEDRAMUAVIndex());
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.texture_load_instructions;
|
++stat_.texture_load_instructions;
|
||||||
|
|
||||||
|
@ -2351,8 +2351,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(GetEDRAMUAVIndex());
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(edram_dword_offset_temp);
|
shader_code_.push_back(edram_dword_offset_temp);
|
||||||
|
@ -5046,8 +5046,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(edram_coord_sample_temp);
|
shader_code_.push_back(edram_coord_sample_temp);
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0, 2));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(GetEDRAMUAVIndex());
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.texture_load_instructions;
|
++stat_.texture_load_instructions;
|
||||||
|
|
||||||
|
@ -5403,8 +5403,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
|
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, 0b1111, 2));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(GetEDRAMUAVIndex());
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, k, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, k, 1));
|
||||||
shader_code_.push_back(edram_coord_sample_temp);
|
shader_code_.push_back(edram_coord_sample_temp);
|
||||||
|
|
Loading…
Reference in New Issue