[D3D12] Cleanup primitive types and front/back facing

This commit is contained in:
Triang3l 2019-07-13 22:25:03 +03:00
parent a16a746432
commit 4825e69fda
12 changed files with 323 additions and 303 deletions

View File

@ -139,17 +139,13 @@ void D3D12CommandProcessor::SubmitBarriers() {
ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
PrimitiveType primitive_type) { bool tessellated) {
assert_true(vertex_shader->is_translated()); assert_true(vertex_shader->is_translated());
assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); assert_true(pixel_shader == nullptr || pixel_shader->is_translated());
D3D12_SHADER_VISIBILITY vertex_visibility; D3D12_SHADER_VISIBILITY vertex_visibility =
if (primitive_type == PrimitiveType::kTrianglePatch || tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN
primitive_type == PrimitiveType::kQuadPatch) { : D3D12_SHADER_VISIBILITY_VERTEX;
vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN;
} else {
vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX;
}
uint32_t texture_count_vertex, sampler_count_vertex; uint32_t texture_count_vertex, sampler_count_vertex;
vertex_shader->GetTextureSRVs(texture_count_vertex); vertex_shader->GetTextureSRVs(texture_count_vertex);
@ -172,8 +168,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
index_offset += D3D12Shader::kMaxTextureSRVIndexBits; index_offset += D3D12Shader::kMaxTextureSRVIndexBits;
index |= sampler_count_vertex << index_offset; index |= sampler_count_vertex << index_offset;
index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits;
index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN) index |= (tessellated ? 1 : 0) << index_offset;
<< index_offset;
++index_offset; ++index_offset;
assert_true(index_offset <= 32); assert_true(index_offset <= 32);
@ -1157,6 +1152,15 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
return true; return true;
} }
// Check if using tessellation to get the correct primitive type.
bool tessellated;
if (uint32_t(primitive_type) >=
uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) {
tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1;
} else {
tessellated = false;
}
// Shaders will have already been defined by previous loads. // Shaders will have already been defined by previous loads.
// We need them to do just about anything so validate here. // We need them to do just about anything so validate here.
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader()); auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
@ -1176,7 +1180,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// which is needed by the render target cache, to check the possibility of // which is needed by the render target cache, to check the possibility of
// doing early depth/stencil, and also to get used textures and samplers. // doing early depth/stencil, and also to get used textures and samplers.
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader, if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
primitive_type)) { tessellated, primitive_type)) {
return false; return false;
} }
@ -1189,11 +1193,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
!pixel_shader->memexport_stream_constants().empty(); !pixel_shader->memexport_stream_constants().empty();
bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool memexport_used = memexport_used_vertex || memexport_used_pixel;
if (!memexport_used_vertex && bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
primitive_type != PrimitiveType::kPointList && if (!memexport_used_vertex && primitive_two_faced &&
primitive_type != PrimitiveType::kRectangleList) { (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) {
// Both sides are culled - can't reproduce this with rasterizer state. // Both sides are culled.
return true; return true;
} }
@ -1221,8 +1225,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Adaptive tessellation requires an index buffer, but it contains per-edge // Adaptive tessellation requires an index buffer, but it contains per-edge
// tessellation factors (as floats) instead of control point indices. // tessellation factors (as floats) instead of control point indices.
bool adaptive_tessellation; bool adaptive_tessellation;
if (primitive_type == PrimitiveType::kTrianglePatch || if (tessellated) {
primitive_type == PrimitiveType::kQuadPatch) {
TessellationMode tessellation_mode = TessellationMode tessellation_mode =
TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3); TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3);
adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive; adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive;
@ -1245,44 +1248,54 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} else { } else {
adaptive_tessellation = false; adaptive_tessellation = false;
} }
PrimitiveType primitive_type_converted = PrimitiveType primitive_type_converted;
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
switch (primitive_type_converted) { if (tessellated) {
case PrimitiveType::kPointList: primitive_type_converted = primitive_type;
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; switch (primitive_type_converted) {
break; // TODO(Triang3l): Support line patches.
case PrimitiveType::kLineList: case PrimitiveType::kTrianglePatch:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST;
break; break;
case PrimitiveType::kLineStrip: case PrimitiveType::kQuadPatch:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST;
break; break;
case PrimitiveType::kTriangleList: default:
case PrimitiveType::kRectangleList: return false;
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; }
break; } else {
case PrimitiveType::kTriangleStrip: primitive_type_converted =
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
break; switch (primitive_type_converted) {
case PrimitiveType::kQuadList: case PrimitiveType::kPointList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
break; break;
case PrimitiveType::kTrianglePatch: case PrimitiveType::kLineList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
break; break;
case PrimitiveType::kQuadPatch: case PrimitiveType::kLineStrip:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
break; break;
default: case PrimitiveType::kTriangleList:
return false; case PrimitiveType::kRectangleList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
break;
case PrimitiveType::kTriangleStrip:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
break;
case PrimitiveType::kQuadList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
break;
default:
return false;
}
} }
if (primitive_topology_ != primitive_topology) { if (primitive_topology_ != primitive_topology) {
primitive_topology_ = primitive_topology; primitive_topology_ = primitive_topology;
deferred_command_list_->D3DIASetPrimitiveTopology(primitive_topology); deferred_command_list_->D3DIASetPrimitiveTopology(primitive_topology);
} }
uint32_t line_loop_closing_index; uint32_t line_loop_closing_index;
if (primitive_type == PrimitiveType::kLineLoop && !indexed && if (!tessellated && primitive_type == PrimitiveType::kLineLoop && !indexed &&
index_count >= 3) { index_count >= 3) {
// Add a vertex to close the loop, and make the vertex shader replace its // Add a vertex to close the loop, and make the vertex shader replace its
// index (before adding the offset) with 0 to fetch the first vertex again. // index (before adding the offset) with 0 to fetch the first vertex again.
@ -1318,7 +1331,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
void* pipeline_handle; void* pipeline_handle;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
if (!pipeline_cache_->ConfigurePipeline( if (!pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type_converted, vertex_shader, pixel_shader, tessellated, primitive_type_converted,
indexed ? index_buffer_info->format : IndexFormat::kInt16, early_z, indexed ? index_buffer_info->format : IndexFormat::kInt16, early_z,
pipeline_render_targets, &pipeline_handle, &root_signature)) { pipeline_render_targets, &pipeline_handle, &root_signature)) {
return false; return false;
@ -1335,7 +1348,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues( UpdateSystemConstantValues(
memexport_used, primitive_type, line_loop_closing_index, memexport_used, primitive_two_faced, line_loop_closing_index,
indexed ? index_buffer_info->endianness : Endian::kUnspecified, indexed ? index_buffer_info->endianness : Endian::kUnspecified,
adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0, adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0,
early_z, color_mask, pipeline_render_targets); early_z, color_mask, pipeline_render_targets);
@ -1494,18 +1507,23 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32
? DXGI_FORMAT_R32_UINT ? DXGI_FORMAT_R32_UINT
: DXGI_FORMAT_R16_UINT; : DXGI_FORMAT_R16_UINT;
PrimitiveConverter::ConversionResult conversion_result;
uint32_t converted_index_count; uint32_t converted_index_count;
PrimitiveConverter::ConversionResult conversion_result = if (tessellated) {
primitive_converter_->ConvertPrimitives( conversion_result =
primitive_type, index_buffer_info->guest_base, index_count, PrimitiveConverter::ConversionResult::kConversionNotNeeded;
index_buffer_info->format, index_buffer_info->endianness, } else {
index_buffer_view.BufferLocation, converted_index_count); conversion_result = primitive_converter_->ConvertPrimitives(
if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) { primitive_type, index_buffer_info->guest_base, index_count,
return false; index_buffer_info->format, index_buffer_info->endianness,
} index_buffer_view.BufferLocation, converted_index_count);
if (conversion_result == if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) {
PrimitiveConverter::ConversionResult::kPrimitiveEmpty) { return false;
return true; }
if (conversion_result ==
PrimitiveConverter::ConversionResult::kPrimitiveEmpty) {
return true;
}
} }
ID3D12Resource* scratch_index_buffer = nullptr; ID3D12Resource* scratch_index_buffer = nullptr;
if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) { if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) {
@ -1564,10 +1582,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
} else { } else {
// Check if need to draw using a conversion index buffer. // Check if need to draw using a conversion index buffer.
uint32_t converted_index_count; uint32_t converted_index_count = 0;
D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address = D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address =
primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count, tessellated ? 0
converted_index_count); : primitive_converter_->GetStaticIndexBuffer(
primitive_type, index_count, converted_index_count);
if (memexport_used) { if (memexport_used) {
shared_memory_->UseForWriting(); shared_memory_->UseForWriting();
} else { } else {
@ -1954,7 +1973,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState() {
} }
void D3D12CommandProcessor::UpdateSystemConstantValues( void D3D12CommandProcessor::UpdateSystemConstantValues(
bool shared_memory_is_uav, PrimitiveType primitive_type, bool shared_memory_is_uav, bool primitive_two_faced,
uint32_t line_loop_closing_index, Endian index_endian, uint32_t line_loop_closing_index, Endian index_endian,
uint32_t edge_factor_base, bool early_z, uint32_t color_mask, uint32_t edge_factor_base, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) { const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
@ -2195,18 +2214,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
bool gl_clip_space_def = bool gl_clip_space_def =
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
float ndc_scale_x, ndc_scale_y; float ndc_scale_x, ndc_scale_y, ndc_scale_z;
if (pa_cl_vte_cntl & (1 << 0)) { if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) {
ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; // Kill all primitives if both faces are culled, but the vertex shader still
// needs to do memexport (not NaN because of comparison for setting the
// dirty flag).
ndc_scale_x = ndc_scale_y = ndc_scale_z = 0;
} else { } else {
ndc_scale_x = 1.0f / 1280.0f; if (pa_cl_vte_cntl & (1 << 0)) {
ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f;
} else {
ndc_scale_x = 1.0f / 1280.0f;
}
if (pa_cl_vte_cntl & (1 << 2)) {
ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f;
} else {
ndc_scale_y = -1.0f / 1280.0f;
}
ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
} }
if (pa_cl_vte_cntl & (1 << 2)) {
ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f;
} else {
ndc_scale_y = -1.0f / 1280.0f;
}
float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f;
float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f;
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
@ -2376,6 +2402,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format); DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format);
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
system_constants_.edram_rt_format_flags[i] = format_flags; system_constants_.edram_rt_format_flags[i] = format_flags;
// Can't do float comparisons here because NaNs would result in always
// setting the dirty flag.
dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i], dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i],
4 * sizeof(float)) != 0; 4 * sizeof(float)) != 0;
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
@ -2419,25 +2447,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.edram_depth_range_offset != depth_range_offset; dirty |= system_constants_.edram_depth_range_offset != depth_range_offset;
system_constants_.edram_depth_range_offset = depth_range_offset; system_constants_.edram_depth_range_offset = depth_range_offset;
// For points and lines, front polygon offset is used, and it's enabled if // For non-polygons, front polygon offset is used, and it's enabled if
// POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back // POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back
// are used. // are used.
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
if (primitive_type == PrimitiveType::kPointList || if (primitive_two_faced) {
primitive_type == PrimitiveType::kLineList ||
primitive_type == PrimitiveType::kLineStrip ||
primitive_type == PrimitiveType::kLineLoop ||
primitive_type == PrimitiveType::k2DLineStrip) {
if (pa_su_sc_mode_cntl & (1 << 13)) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
} else {
if (pa_su_sc_mode_cntl & (1 << 11)) { if (pa_su_sc_mode_cntl & (1 << 11)) {
poly_offset_front_scale = poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
@ -2450,6 +2465,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
poly_offset_back_offset = poly_offset_back_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
} }
} else {
if (pa_su_sc_mode_cntl & (1 << 13)) {
poly_offset_front_scale =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
poly_offset_front_offset =
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_back_scale = poly_offset_front_scale;
poly_offset_back_offset = poly_offset_front_offset;
}
} }
// "slope computed in subpixels (1/12 or 1/16)" - R5xx Acceleration. Also: // "slope computed in subpixels (1/12 or 1/16)" - R5xx Acceleration. Also:
// https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943 // https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943
@ -2511,7 +2535,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.edram_stencil_front_comparison != stencil_value; system_constants_.edram_stencil_front_comparison != stencil_value;
system_constants_.edram_stencil_front_comparison = stencil_value; system_constants_.edram_stencil_front_comparison = stencil_value;
if (rb_depthcontrol & 0x80) { if (primitive_two_faced && (rb_depthcontrol & 0x80)) {
stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7]; stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7];
dirty |= system_constants_.edram_stencil_back_fail != stencil_value; dirty |= system_constants_.edram_stencil_back_fail != stencil_value;
system_constants_.edram_stencil_back_fail = stencil_value; system_constants_.edram_stencil_back_fail = stencil_value;

View File

@ -82,7 +82,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// Finds or creates root signature for a pipeline. // Finds or creates root signature for a pipeline.
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
const D3D12Shader* pixel_shader, const D3D12Shader* pixel_shader,
PrimitiveType primitive_type); bool tessellated);
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const { ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
return constant_buffer_pool_.get(); return constant_buffer_pool_.get();
@ -210,7 +210,7 @@ class D3D12CommandProcessor : public CommandProcessor {
void UpdateFixedFunctionState(); void UpdateFixedFunctionState();
void UpdateSystemConstantValues( void UpdateSystemConstantValues(
bool shared_memory_is_uav, PrimitiveType primitive_type, bool shared_memory_is_uav, bool primitive_two_faced,
uint32_t line_loop_closing_index, Endian index_endian, uint32_t line_loop_closing_index, Endian index_endian,
uint32_t edge_factor_base, bool early_z, uint32_t color_mask, uint32_t edge_factor_base, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]); const RenderTargetCache::PipelineRenderTarget render_targets[4]);

View File

@ -25,15 +25,6 @@ class D3D12Shader : public Shader {
D3D12Shader(ShaderType shader_type, uint64_t data_hash, D3D12Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count); const uint32_t* dword_ptr, uint32_t dword_count);
// For checking if it's a domain shader rather than a vertex shader when used
// (since when a shader is used for the first time, it's translated either
// into a vertex shader or a domain shader, depending on the primitive type).
PrimitiveType GetDomainShaderPrimitiveType() const {
return domain_shader_primitive_type_;
}
void SetDomainShaderPrimitiveType(PrimitiveType primitive_type) {
domain_shader_primitive_type_ = primitive_type;
}
void SetTexturesAndSamplers( void SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureSRV* texture_srvs, const DxbcShaderTranslator::TextureSRV* texture_srvs,
uint32_t texture_srv_count, uint32_t texture_srv_count,
@ -88,8 +79,6 @@ class D3D12Shader : public Shader {
} }
private: private:
PrimitiveType domain_shader_primitive_type_ = PrimitiveType::kNone;
std::vector<TextureSRV> texture_srvs_; std::vector<TextureSRV> texture_srvs_;
uint32_t used_texture_mask_ = 0; uint32_t used_texture_mask_ = 0;
std::vector<SamplerBinding> sampler_bindings_; std::vector<SamplerBinding> sampler_bindings_;

View File

@ -194,6 +194,7 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type,
bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader, D3D12Shader* pixel_shader,
bool tessellated,
PrimitiveType primitive_type) { PrimitiveType primitive_type) {
auto& regs = *register_file_; auto& regs = *register_file_;
@ -207,12 +208,14 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
xenos::xe_gpu_program_cntl_t sq_program_cntl; xenos::xe_gpu_program_cntl_t sq_program_cntl;
sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
if (!vertex_shader->is_translated() && if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl, primitive_type)) { !TranslateShader(vertex_shader, sq_program_cntl, tessellated,
primitive_type)) {
XELOGE("Failed to translate the vertex shader!"); XELOGE("Failed to translate the vertex shader!");
return false; return false;
} }
if (pixel_shader != nullptr && !pixel_shader->is_translated() && if (pixel_shader != nullptr && !pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, sq_program_cntl, primitive_type)) { !TranslateShader(pixel_shader, sq_program_cntl, tessellated,
primitive_type)) {
XELOGE("Failed to translate the pixel shader!"); XELOGE("Failed to translate the pixel shader!");
return false; return false;
} }
@ -220,7 +223,7 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader,
} }
bool PipelineCache::ConfigurePipeline( bool PipelineCache::ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z, PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) { void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) {
@ -232,9 +235,9 @@ bool PipelineCache::ConfigurePipeline(
assert_not_null(root_signature_out); assert_not_null(root_signature_out);
PipelineDescription description; PipelineDescription description;
if (!GetCurrentStateDescription(vertex_shader, pixel_shader, primitive_type, if (!GetCurrentStateDescription(vertex_shader, pixel_shader, tessellated,
index_format, early_z, render_targets, primitive_type, index_format, early_z,
description)) { render_targets, description)) {
return false; return false;
} }
@ -260,7 +263,8 @@ bool PipelineCache::ConfigurePipeline(
} }
} }
if (!EnsureShadersTranslated(vertex_shader, pixel_shader, primitive_type)) { if (!EnsureShadersTranslated(vertex_shader, pixel_shader, tessellated,
primitive_type)) {
return false; return false;
} }
@ -289,33 +293,17 @@ bool PipelineCache::ConfigurePipeline(
bool PipelineCache::TranslateShader(D3D12Shader* shader, bool PipelineCache::TranslateShader(D3D12Shader* shader,
xenos::xe_gpu_program_cntl_t cntl, xenos::xe_gpu_program_cntl_t cntl,
bool tessellated,
PrimitiveType primitive_type) { PrimitiveType primitive_type) {
// Set the target for vertex shader translation.
DxbcShaderTranslator::VertexShaderType vertex_shader_type;
if (primitive_type == PrimitiveType::kTrianglePatch) {
vertex_shader_type =
DxbcShaderTranslator::VertexShaderType::kTriangleDomain;
} else if (primitive_type == PrimitiveType::kQuadPatch) {
vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kQuadDomain;
} else {
vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kVertex;
}
shader_translator_->SetVertexShaderType(vertex_shader_type);
// Perform translation. // Perform translation.
// If this fails the shader will be marked as invalid and ignored later. // If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(shader, cntl)) { if (!shader_translator_->Translate(
shader, tessellated ? primitive_type : PrimitiveType::kNone, cntl)) {
XELOGE("Shader %.16" PRIX64 " translation failed; marking as ignored", XELOGE("Shader %.16" PRIX64 " translation failed; marking as ignored",
shader->ucode_data_hash()); shader->ucode_data_hash());
return false; return false;
} }
if (vertex_shader_type != DxbcShaderTranslator::VertexShaderType::kVertex) {
// For checking later for safety (so a vertex shader won't be accidentally
// used as a domain shader or vice versa).
shader->SetDomainShaderPrimitiveType(primitive_type);
}
uint32_t texture_srv_count; uint32_t texture_srv_count;
const DxbcShaderTranslator::TextureSRV* texture_srvs = const DxbcShaderTranslator::TextureSRV* texture_srvs =
shader_translator_->GetTextureSRVs(texture_srv_count); shader_translator_->GetTextureSRVs(texture_srv_count);
@ -360,19 +348,20 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader,
} }
bool PipelineCache::GetCurrentStateDescription( bool PipelineCache::GetCurrentStateDescription(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z, PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineDescription& description_out) { PipelineDescription& description_out) {
auto& regs = *register_file_; auto& regs = *register_file_;
uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32;
bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type);
// Initialize all unused fields to zero for comparison/hashing. // Initialize all unused fields to zero for comparison/hashing.
std::memset(&description_out, 0, sizeof(description_out)); std::memset(&description_out, 0, sizeof(description_out));
// Root signature. // Root signature.
description_out.root_signature = command_processor_->GetRootSignature( description_out.root_signature = command_processor_->GetRootSignature(
vertex_shader, pixel_shader, primitive_type); vertex_shader, pixel_shader, tessellated);
if (description_out.root_signature == nullptr) { if (description_out.root_signature == nullptr) {
return false; return false;
} }
@ -393,93 +382,79 @@ bool PipelineCache::GetCurrentStateDescription(
} }
// Primitive topology type, tessellation mode and geometry shader. // Primitive topology type, tessellation mode and geometry shader.
description_out.tessellation_mode = PipelineTessellationMode::kNone; if (tessellated) {
switch (primitive_type) { switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) {
case PrimitiveType::kPointList: case TessellationMode::kContinuous:
description_out.primitive_topology_type = description_out.tessellation_mode =
PipelinePrimitiveTopologyType::kPoint; PipelineTessellationMode::kContinuous;
break; break;
case PrimitiveType::kLineList: case TessellationMode::kAdaptive:
case PrimitiveType::kLineStrip: description_out.tessellation_mode =
case PrimitiveType::kLineLoop: FLAGS_d3d12_tessellation_adaptive
// Quads are emulated as line lists with adjacency. ? PipelineTessellationMode::kAdaptive
case PrimitiveType::kQuadList: : PipelineTessellationMode::kContinuous;
case PrimitiveType::k2DLineStrip: break;
description_out.primitive_topology_type = default:
PipelinePrimitiveTopologyType::kLine; description_out.tessellation_mode = PipelineTessellationMode::kDiscrete;
break; break;
case PrimitiveType::kTrianglePatch: }
case PrimitiveType::kQuadPatch: description_out.primitive_topology_type =
description_out.primitive_topology_type = PipelinePrimitiveTopologyType::kPatch;
PipelinePrimitiveTopologyType::kPatch; switch (primitive_type) {
switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { case PrimitiveType::kLinePatch:
case TessellationMode::kContinuous: description_out.patch_type = PipelinePatchType::kLine;
description_out.tessellation_mode = break;
PipelineTessellationMode::kContinuous; case PrimitiveType::kTrianglePatch:
break; description_out.patch_type = PipelinePatchType::kTriangle;
case TessellationMode::kAdaptive: break;
description_out.tessellation_mode = case PrimitiveType::kQuadPatch:
FLAGS_d3d12_tessellation_adaptive description_out.patch_type = PipelinePatchType::kQuad;
? PipelineTessellationMode::kAdaptive break;
: PipelineTessellationMode::kContinuous; default:
break; assert_unhandled_case(primitive_type);
default: return false;
description_out.tessellation_mode = }
PipelineTessellationMode::kDiscrete; description_out.geometry_shader = PipelineGeometryShader::kNone;
break; } else {
} description_out.tessellation_mode = PipelineTessellationMode::kNone;
break; switch (primitive_type) {
default: case PrimitiveType::kPointList:
description_out.primitive_topology_type = description_out.primitive_topology_type =
PipelinePrimitiveTopologyType::kTriangle; PipelinePrimitiveTopologyType::kPoint;
break; break;
} case PrimitiveType::kLineList:
switch (primitive_type) { case PrimitiveType::kLineStrip:
case PrimitiveType::kLinePatch: case PrimitiveType::kLineLoop:
description_out.patch_type = PipelinePatchType::kLine; // Quads are emulated as line lists with adjacency.
break; case PrimitiveType::kQuadList:
case PrimitiveType::kTrianglePatch: case PrimitiveType::k2DLineStrip:
description_out.patch_type = PipelinePatchType::kTriangle; description_out.primitive_topology_type =
break; PipelinePrimitiveTopologyType::kLine;
case PrimitiveType::kQuadPatch: break;
description_out.patch_type = PipelinePatchType::kQuad; default:
break; description_out.primitive_topology_type =
default: PipelinePrimitiveTopologyType::kTriangle;
description_out.patch_type = PipelinePatchType::kNone; break;
break; }
} description_out.patch_type = PipelinePatchType::kNone;
switch (primitive_type) { switch (primitive_type) {
case PrimitiveType::kPointList: case PrimitiveType::kPointList:
description_out.geometry_shader = PipelineGeometryShader::kPointList; description_out.geometry_shader = PipelineGeometryShader::kPointList;
break; break;
case PrimitiveType::kRectangleList: case PrimitiveType::kRectangleList:
description_out.geometry_shader = PipelineGeometryShader::kRectangleList; description_out.geometry_shader =
break; PipelineGeometryShader::kRectangleList;
case PrimitiveType::kQuadList: break;
description_out.geometry_shader = PipelineGeometryShader::kQuadList; case PrimitiveType::kQuadList:
break; description_out.geometry_shader = PipelineGeometryShader::kQuadList;
default: break;
description_out.geometry_shader = PipelineGeometryShader::kNone; default:
break; description_out.geometry_shader = PipelineGeometryShader::kNone;
break;
}
} }
// Rasterizer state. // Rasterizer state.
uint32_t cull_mode;
if (primitive_type == PrimitiveType::kPointList ||
primitive_type == PrimitiveType::kRectangleList) {
cull_mode = 0;
} else {
cull_mode = pa_su_sc_mode_cntl & 0x3;
}
if (cull_mode & 1) {
// More special, so checked first - generally back faces are culled.
description_out.cull_mode = PipelineCullMode::kFront;
} else if (cull_mode & 2) {
description_out.cull_mode = PipelineCullMode::kBack;
} else {
description_out.cull_mode = PipelineCullMode::kNone;
}
description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0;
// Because Direct3D 12 doesn't support per-side fill mode and depth bias, the // Because Direct3D 12 doesn't support per-side fill mode and depth bias, the
// values to use depends on the current culling state. // values to use depends on the current culling state.
// If front faces are culled, use the ones for back faces. // If front faces are culled, use the ones for back faces.
@ -495,33 +470,56 @@ bool PipelineCache::GetCurrentStateDescription(
// Here we also assume that only one side is culled - if two sides are culled, // Here we also assume that only one side is culled - if two sides are culled,
// the D3D12 command processor will drop such draw early. // the D3D12 command processor will drop such draw early.
float poly_offset = 0.0f, poly_offset_scale = 0.0f; float poly_offset = 0.0f, poly_offset_scale = 0.0f;
// With ROV, the depth bias is applied in the pixel shader because per-sample if (primitive_two_faced) {
// depth is needed for MSAA. uint32_t cull_mode = pa_su_sc_mode_cntl & 0x3;
if (!(cull_mode & 1)) { description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0;
// Front faces aren't culled. if (cull_mode == 1) {
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; description_out.cull_mode = PipelineCullMode::kFront;
if (fill_mode == 0 || fill_mode == 1) { } else if (cull_mode == 2) {
description_out.fill_mode_wireframe = 1; description_out.cull_mode = PipelineCullMode::kBack;
} else {
description_out.cull_mode = PipelineCullMode::kNone;
} }
if (!edram_rov_used_ && ((pa_su_sc_mode_cntl >> 11) & 0x1)) { // With ROV, the depth bias is applied in the pixel shader because
// per-sample depth is needed for MSAA.
if (cull_mode != 1) {
// Front faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7;
if (fill_mode == 0 || fill_mode == 1) {
description_out.fill_mode_wireframe = 1;
}
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
}
}
if (cull_mode != 2) {
// Back faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7;
if (fill_mode == 0 || fill_mode == 1) {
description_out.fill_mode_wireframe = 1;
}
// Prefer front depth bias because in general, front faces are the ones
// that are rendered (except for shadow volumes).
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) &&
poly_offset == 0.0f && poly_offset_scale == 0.0f) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
}
}
if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) {
// Fill mode is disabled.
description_out.fill_mode_wireframe = 0;
}
} else {
// Filled front faces only.
// Use front depth bias if POLY_OFFSET_PARA_ENABLED
// (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives).
if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
} }
} }
if (!(cull_mode & 2)) {
// Back faces aren't culled.
uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7;
if (fill_mode == 0 || fill_mode == 1) {
description_out.fill_mode_wireframe = 1;
}
// Prefer front depth bias because in general, front faces are the ones
// that are rendered (except for shadow volumes).
if (!edram_rov_used_ && ((pa_su_sc_mode_cntl >> 12) & 0x1) &&
poly_offset == 0.0f && poly_offset_scale == 0.0f) {
poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
}
}
if (!edram_rov_used_) { if (!edram_rov_used_) {
// Conversion based on the calculations in Call of Duty 4 and the values it // Conversion based on the calculations in Call of Duty 4 and the values it
// writes to the registers, and also on: // writes to the registers, and also on:
@ -551,12 +549,9 @@ bool PipelineCache::GetCurrentStateDescription(
description_out.depth_bias_slope_scaled = description_out.depth_bias_slope_scaled =
poly_offset_scale * (1.0f / 16.0f); poly_offset_scale * (1.0f / 16.0f);
} }
if ((pa_su_sc_mode_cntl & (0x3 << 3)) == 0) { if (FLAGS_d3d12_tessellation_wireframe && tessellated &&
// Fill mode is disabled. (primitive_type == PrimitiveType::kTrianglePatch ||
description_out.fill_mode_wireframe = 0; primitive_type == PrimitiveType::kQuadPatch)) {
}
if (FLAGS_d3d12_tessellation_wireframe &&
description_out.tessellation_mode != PipelineTessellationMode::kNone) {
description_out.fill_mode_wireframe = 1; description_out.fill_mode_wireframe = 1;
} }
// CLIP_DISABLE // CLIP_DISABLE
@ -596,7 +591,7 @@ bool PipelineCache::GetCurrentStateDescription(
(rb_depthcontrol >> 17) & 0x7; (rb_depthcontrol >> 17) & 0x7;
description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7; description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7;
description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7; description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7;
if (rb_depthcontrol & 0x80) { if (primitive_two_faced && (rb_depthcontrol & 0x80)) {
description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7; description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7;
description_out.stencil_back_depth_fail_op = description_out.stencil_back_depth_fail_op =
(rb_depthcontrol >> 29) & 0x7; (rb_depthcontrol >> 29) & 0x7;
@ -768,7 +763,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
if (description.tessellation_mode != PipelineTessellationMode::kNone) { if (description.tessellation_mode != PipelineTessellationMode::kNone) {
switch (description.patch_type) { switch (description.patch_type) {
case PipelinePatchType::kTriangle: case PipelinePatchType::kTriangle:
if (description.vertex_shader->GetDomainShaderPrimitiveType() != if (description.vertex_shader->patch_primitive_type() !=
PrimitiveType::kTrianglePatch) { PrimitiveType::kTrianglePatch) {
XELOGE( XELOGE(
"Tried to use vertex shader %.16" PRIX64 "Tried to use vertex shader %.16" PRIX64
@ -794,7 +789,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
state_desc.VS.BytecodeLength = sizeof(tessellation_triangle_vs); state_desc.VS.BytecodeLength = sizeof(tessellation_triangle_vs);
break; break;
case PipelinePatchType::kQuad: case PipelinePatchType::kQuad:
if (description.vertex_shader->GetDomainShaderPrimitiveType() != if (description.vertex_shader->patch_primitive_type() !=
PrimitiveType::kQuadPatch) { PrimitiveType::kQuadPatch) {
XELOGE("Tried to use vertex shader %.16" PRIX64 XELOGE("Tried to use vertex shader %.16" PRIX64
" for quad patch tessellation, but it's not a tessellation " " for quad patch tessellation, but it's not a tessellation "
@ -825,7 +820,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState(
state_desc.DS.BytecodeLength = state_desc.DS.BytecodeLength =
description.vertex_shader->translated_binary().size(); description.vertex_shader->translated_binary().size();
} else { } else {
if (description.vertex_shader->GetDomainShaderPrimitiveType() != if (description.vertex_shader->patch_primitive_type() !=
PrimitiveType::kNone) { PrimitiveType::kNone) {
XELOGE("Tried to use vertex shader %.16" PRIX64 XELOGE("Tried to use vertex shader %.16" PRIX64
" without tessellation, but it's a tessellation domain shader", " without tessellation, but it's a tessellation domain shader",

View File

@ -49,11 +49,11 @@ class PipelineCache {
// Translates shaders if needed, also making shader info up to date. // Translates shaders if needed, also making shader info up to date.
bool EnsureShadersTranslated(D3D12Shader* vertex_shader, bool EnsureShadersTranslated(D3D12Shader* vertex_shader,
D3D12Shader* pixel_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type); PrimitiveType primitive_type);
bool ConfigurePipeline( bool ConfigurePipeline(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z, PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
void** pipeline_handle_out, ID3D12RootSignature** root_signature_out); void** pipeline_handle_out, ID3D12RootSignature** root_signature_out);
@ -172,10 +172,10 @@ class PipelineCache {
}; };
bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl,
PrimitiveType primitive_type); bool tessellated, PrimitiveType primitive_type);
bool GetCurrentStateDescription( bool GetCurrentStateDescription(
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated,
PrimitiveType primitive_type, IndexFormat index_format, bool early_z, PrimitiveType primitive_type, IndexFormat index_format, bool early_z,
const RenderTargetCache::PipelineRenderTarget render_targets[5], const RenderTargetCache::PipelineRenderTarget render_targets[5],
PipelineDescription& description_out); PipelineDescription& description_out);

View File

@ -761,14 +761,15 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// Copy the domain location to r0.yz (for quad patches) or r0.xyz (for // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for
// triangle patches), and also set the domain in STAT. // triangle patches), and also set the domain in STAT.
uint32_t domain_location_mask, domain_location_swizzle; uint32_t domain_location_mask, domain_location_swizzle;
if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
domain_location_mask = 0b0111; domain_location_mask = 0b0111;
// ZYX swizzle with r1.y == 0, according to the water shader in // ZYX swizzle with r1.y == 0, according to the water shader in
// Banjo-Kazooie: Nuts & Bolts. // Banjo-Kazooie: Nuts & Bolts.
domain_location_swizzle = 0b00000110; domain_location_swizzle = 0b00000110;
stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
} else { } else {
assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); // TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
// According to the ground shader in Viva Pinata, though it's impossible // According to the ground shader in Viva Pinata, though it's impossible
// (as of December 12th, 2018) to test there since it possibly requires // (as of December 12th, 2018) to test there since it possibly requires
// memexport for ground control points (the memory region with them is // memexport for ground control points (the memory region with them is
@ -807,8 +808,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// TODO(Triang3l): Investigate what should be written for primitives (or // TODO(Triang3l): Investigate what should be written for primitives (or
// even control points) for non-adaptive tessellation modes (they may // even control points) for non-adaptive tessellation modes (they may
// possibly have an index buffer). // possibly have an index buffer).
// TODO(Triang3l): Support line patches.
uint32_t primitive_id_gpr_index = uint32_t primitive_id_gpr_index =
vertex_shader_type_ == VertexShaderType::kTriangleDomain ? 1 : 0; patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0;
if (register_count() > primitive_id_gpr_index) { if (register_count() > primitive_id_gpr_index) {
uint32_t primitive_id_temp = uses_register_dynamic_addressing() uint32_t primitive_id_temp = uses_register_dynamic_addressing()
@ -877,9 +879,11 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
// //
// Direct3D 12 appears to be passing the coordinates in a consistent // Direct3D 12 appears to be passing the coordinates in a consistent
// order, so we can just use ZYX for triangle patches. // order, so we can just use ZYX for triangle patches.
//
// TODO(Triang3l): Support line patches.
uint32_t domain_location_swizzle_mask = uint32_t domain_location_swizzle_mask =
vertex_shader_type_ == VertexShaderType::kTriangleDomain ? 0b0010 patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010
: 0b0001; : 0b0001;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(
3 + temp_register_operand_length)); 3 + temp_register_operand_length));
@ -4152,11 +4156,12 @@ void DxbcShaderTranslator::WritePatchConstantSignature() {
// FXC refuses to compile without SV_TessFactor and SV_InsideTessFactor input, // FXC refuses to compile without SV_TessFactor and SV_InsideTessFactor input,
// so this is required. // so this is required.
uint32_t tess_factor_count_edge, tess_factor_count_inside; uint32_t tess_factor_count_edge, tess_factor_count_inside;
if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
tess_factor_count_edge = 3; tess_factor_count_edge = 3;
tess_factor_count_inside = 1; tess_factor_count_inside = 1;
} else { } else {
assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); // TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
tess_factor_count_edge = 4; tess_factor_count_edge = 4;
tess_factor_count_inside = 2; tess_factor_count_inside = 2;
} }
@ -4172,7 +4177,7 @@ void DxbcShaderTranslator::WritePatchConstantSignature() {
shader_object_.push_back(0); shader_object_.push_back(0);
shader_object_.push_back( shader_object_.push_back(
i < tess_factor_count_edge ? i : (i - tess_factor_count_edge)); i < tess_factor_count_edge ? i : (i - tess_factor_count_edge));
if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
if (i < tess_factor_count_edge) { if (i < tess_factor_count_edge) {
// D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR. // D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR.
shader_object_.push_back(13); shader_object_.push_back(13);
@ -4181,7 +4186,8 @@ void DxbcShaderTranslator::WritePatchConstantSignature() {
shader_object_.push_back(14); shader_object_.push_back(14);
} }
} else { } else {
assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); // TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
if (i < tess_factor_count_edge) { if (i < tess_factor_count_edge) {
// D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR. // D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR.
shader_object_.push_back(11); shader_object_.push_back(11);
@ -4401,11 +4407,12 @@ void DxbcShaderTranslator::WriteShaderCode() {
// as both vertex shader and domain shader. // as both vertex shader and domain shader.
uint32_t control_point_count; uint32_t control_point_count;
D3D11_SB_TESSELLATOR_DOMAIN domain; D3D11_SB_TESSELLATOR_DOMAIN domain;
if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
control_point_count = 3; control_point_count = 3;
domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI;
} else { } else {
assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); // TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
control_point_count = 4; control_point_count = 4;
domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD;
} }
@ -4597,10 +4604,11 @@ void DxbcShaderTranslator::WriteShaderCode() {
if (IsDxbcDomainShader()) { if (IsDxbcDomainShader()) {
// Domain location input (barycentric for triangles, UV for quads). // Domain location input (barycentric for triangles, UV for quads).
uint32_t domain_location_mask; uint32_t domain_location_mask;
if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { if (patch_primitive_type() == PrimitiveType::kTrianglePatch) {
domain_location_mask = 0b0111; domain_location_mask = 0b0111;
} else { } else {
assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); // TODO(Triang3l): Support line patches.
assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch);
domain_location_mask = 0b0011; domain_location_mask = 0b0011;
} }
shader_object_.push_back( shader_object_.push_back(

View File

@ -31,13 +31,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used); DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used);
~DxbcShaderTranslator() override; ~DxbcShaderTranslator() override;
enum class VertexShaderType { kVertex, kTriangleDomain, kQuadDomain };
// Sets the type (shader model and input layout) of the next vertex shader
// that will be converted.
void SetVertexShaderType(VertexShaderType type) {
vertex_shader_type_ = type;
}
// Constant buffer bindings in space 0. // Constant buffer bindings in space 0.
enum class CbufferRegister { enum class CbufferRegister {
kSystemConstants, kSystemConstants,
@ -646,12 +639,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
} }
inline bool IsDxbcVertexShader() const { inline bool IsDxbcVertexShader() const {
return IsDxbcVertexOrDomainShader() && return IsDxbcVertexOrDomainShader() &&
vertex_shader_type_ == VertexShaderType::kVertex; patch_primitive_type() == PrimitiveType::kNone;
} }
inline bool IsDxbcDomainShader() const { inline bool IsDxbcDomainShader() const {
return IsDxbcVertexOrDomainShader() && return IsDxbcVertexOrDomainShader() &&
(vertex_shader_type_ == VertexShaderType::kTriangleDomain || patch_primitive_type() != PrimitiveType::kNone;
vertex_shader_type_ == VertexShaderType::kQuadDomain);
} }
inline bool IsDxbcPixelShader() const { inline bool IsDxbcPixelShader() const {
return is_depth_only_pixel_shader_ || is_pixel_shader(); return is_depth_only_pixel_shader_ || is_pixel_shader();
@ -957,7 +949,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether the output merger should be emulated in pixel shaders. // Whether the output merger should be emulated in pixel shaders.
bool edram_rov_used_; bool edram_rov_used_;
VertexShaderType vertex_shader_type_ = VertexShaderType::kVertex;
// Is currently writing the empty depth-only pixel shader, for // Is currently writing the empty depth-only pixel shader, for
// CompleteTranslation. // CompleteTranslation.
bool is_depth_only_pixel_shader_; bool is_depth_only_pixel_shader_;

View File

@ -587,6 +587,10 @@ class Shader {
// Whether the shader is identified as a vertex or pixel shader. // Whether the shader is identified as a vertex or pixel shader.
ShaderType type() const { return shader_type_; } ShaderType type() const { return shader_type_; }
// Tessellation patch primitive type for a vertex shader translated into a
// domain shader, or PrimitiveType::kNone for a normal vertex shader.
PrimitiveType patch_primitive_type() const { return patch_primitive_type_; }
// Microcode dwords in host endianness. // Microcode dwords in host endianness.
const std::vector<uint32_t>& ucode_data() const { return ucode_data_; } const std::vector<uint32_t>& ucode_data() const { return ucode_data_; }
uint64_t ucode_data_hash() const { return ucode_data_hash_; } uint64_t ucode_data_hash() const { return ucode_data_hash_; }
@ -667,6 +671,7 @@ class Shader {
friend class ShaderTranslator; friend class ShaderTranslator;
ShaderType shader_type_; ShaderType shader_type_;
PrimitiveType patch_primitive_type_ = PrimitiveType::kNone;
std::vector<uint32_t> ucode_data_; std::vector<uint32_t> ucode_data_;
uint64_t ucode_data_hash_; uint64_t ucode_data_hash_;

View File

@ -35,10 +35,10 @@ DEFINE_string(shader_input_type, "",
DEFINE_string(shader_output, "", "Output shader file path."); DEFINE_string(shader_output, "", "Output shader file path.");
DEFINE_string(shader_output_type, "ucode", DEFINE_string(shader_output_type, "ucode",
"Translator to use: [ucode, glsl45, spirv, spirvtext, dxbc]."); "Translator to use: [ucode, glsl45, spirv, spirvtext, dxbc].");
DEFINE_string(shader_output_domain, "", DEFINE_string(shader_output_patch, "",
"Abstract patch type in the generated tessellation evaluation " "Tessellation patch type in the generated tessellation "
"(domain) shader, or unspecified to produce a vertex shader: " "evaluation (domain) shader, or unspecified to produce a vertex "
"[triangle, quad]."); "shader: [line, triangle, quad].");
DEFINE_bool(shader_output_dxbc_rov, false, DEFINE_bool(shader_output_dxbc_rov, false,
"Output ROV-based output-merger code in DXBC pixel shaders."); "Output ROV-based output-merger code in DXBC pixel shaders.");
@ -108,23 +108,22 @@ int shader_compiler_main(const std::vector<std::wstring>& args) {
} else if (FLAGS_shader_output_type == "dxbc") { } else if (FLAGS_shader_output_type == "dxbc") {
translator = translator =
std::make_unique<DxbcShaderTranslator>(0, FLAGS_shader_output_dxbc_rov); std::make_unique<DxbcShaderTranslator>(0, FLAGS_shader_output_dxbc_rov);
DxbcShaderTranslator::VertexShaderType dxbc_vertex_shader_type;
if (FLAGS_shader_output_domain == "triangle") {
dxbc_vertex_shader_type =
DxbcShaderTranslator::VertexShaderType::kTriangleDomain;
} else if (FLAGS_shader_output_domain == "quad") {
dxbc_vertex_shader_type =
DxbcShaderTranslator::VertexShaderType::kQuadDomain;
} else {
dxbc_vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kVertex;
}
static_cast<DxbcShaderTranslator*>(translator.get())
->SetVertexShaderType(dxbc_vertex_shader_type);
} else { } else {
translator = std::make_unique<UcodeShaderTranslator>(); translator = std::make_unique<UcodeShaderTranslator>();
} }
translator->Translate(shader.get()); PrimitiveType patch_primitive_type = PrimitiveType::kNone;
if (shader_type == ShaderType::kVertex) {
if (FLAGS_shader_output_patch == "line") {
patch_primitive_type == PrimitiveType::kLinePatch;
} else if (FLAGS_shader_output_patch == "triangle") {
patch_primitive_type == PrimitiveType::kTrianglePatch;
} else if (FLAGS_shader_output_patch == "quad") {
patch_primitive_type == PrimitiveType::kQuadPatch;
}
}
translator->Translate(shader.get(), patch_primitive_type);
const void* source_data = shader->translated_binary().data(); const void* source_data = shader->translated_binary().data();
size_t source_data_size = shader->translated_binary().size(); size_t source_data_size = shader->translated_binary().size();

View File

@ -107,22 +107,25 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
return true; return true;
} }
bool ShaderTranslator::Translate(Shader* shader, bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type,
xenos::xe_gpu_program_cntl_t cntl) { xenos::xe_gpu_program_cntl_t cntl) {
Reset(); Reset();
register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1
: cntl.ps_regs + 1; : cntl.ps_regs + 1;
return TranslateInternal(shader); return TranslateInternal(shader, patch_type);
} }
bool ShaderTranslator::Translate(Shader* shader) { bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type) {
Reset(); Reset();
return TranslateInternal(shader); return TranslateInternal(shader, patch_type);
} }
bool ShaderTranslator::TranslateInternal(Shader* shader) { bool ShaderTranslator::TranslateInternal(Shader* shader,
PrimitiveType patch_type) {
shader_type_ = shader->type(); shader_type_ = shader->type();
patch_primitive_type_ =
shader_type_ == ShaderType::kVertex ? patch_type : PrimitiveType::kNone;
ucode_dwords_ = shader->ucode_dwords(); ucode_dwords_ = shader->ucode_dwords();
ucode_dword_count_ = shader->ucode_dword_count(); ucode_dword_count_ = shader->ucode_dword_count();
@ -190,6 +193,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader) {
shader->errors_ = std::move(errors_); shader->errors_ = std::move(errors_);
shader->translated_binary_ = CompleteTranslation(); shader->translated_binary_ = CompleteTranslation();
shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string();
shader->patch_primitive_type_ = patch_primitive_type_;
shader->vertex_bindings_ = std::move(vertex_bindings_); shader->vertex_bindings_ = std::move(vertex_bindings_);
shader->texture_bindings_ = std::move(texture_bindings_); shader->texture_bindings_ = std::move(texture_bindings_);
shader->constant_register_map_ = std::move(constant_register_map_); shader->constant_register_map_ = std::move(constant_register_map_);

View File

@ -32,8 +32,9 @@ class ShaderTranslator {
// DEPRECATED(benvanik): remove this when shader cache is removed. // DEPRECATED(benvanik): remove this when shader cache is removed.
bool GatherAllBindingInformation(Shader* shader); bool GatherAllBindingInformation(Shader* shader);
bool Translate(Shader* shader, xenos::xe_gpu_program_cntl_t cntl); bool Translate(Shader* shader, PrimitiveType patch_type,
bool Translate(Shader* shader); xenos::xe_gpu_program_cntl_t cntl);
bool Translate(Shader* shader, PrimitiveType patch_type);
protected: protected:
ShaderTranslator(); ShaderTranslator();
@ -45,6 +46,9 @@ class ShaderTranslator {
uint32_t register_count() const { return register_count_; } uint32_t register_count() const { return register_count_; }
// True if the current shader is a vertex shader. // True if the current shader is a vertex shader.
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
// Tessellation patch primitive type for a vertex shader translated into a
// domain shader, or PrimitiveType::kNone for a normal vertex shader.
PrimitiveType patch_primitive_type() const { return patch_primitive_type_; }
// True if the current shader is a pixel shader. // True if the current shader is a pixel shader.
bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
const Shader::ConstantRegisterMap& constant_register_map() const { const Shader::ConstantRegisterMap& constant_register_map() const {
@ -176,7 +180,7 @@ class ShaderTranslator {
bool disable_implicit_early_z; bool disable_implicit_early_z;
}; };
bool TranslateInternal(Shader* shader); bool TranslateInternal(Shader* shader, PrimitiveType patch_type);
void MarkUcodeInstruction(uint32_t dword_offset); void MarkUcodeInstruction(uint32_t dword_offset);
void AppendUcodeDisasm(char c); void AppendUcodeDisasm(char c);
@ -225,6 +229,7 @@ class ShaderTranslator {
// Input shader metadata and microcode. // Input shader metadata and microcode.
ShaderType shader_type_; ShaderType shader_type_;
PrimitiveType patch_primitive_type_;
const uint32_t* ucode_dwords_; const uint32_t* ucode_dwords_;
size_t ucode_dword_count_; size_t ucode_dword_count_;
xenos::xe_gpu_program_cntl_t program_cntl_; xenos::xe_gpu_program_cntl_t program_cntl_;

View File

@ -367,7 +367,7 @@ bool PipelineCache::TranslateShader(VulkanShader* shader,
xenos::xe_gpu_program_cntl_t cntl) { xenos::xe_gpu_program_cntl_t cntl) {
// Perform translation. // Perform translation.
// If this fails the shader will be marked as invalid and ignored later. // If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_->Translate(shader, cntl)) { if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) {
XELOGE("Shader translation failed; marking shader as ignored"); XELOGE("Shader translation failed; marking shader as ignored");
return false; return false;
} }