From 4825e69fda11d43c2a5fa514aedf3f1ceb8d89d3 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 13 Jul 2019 22:25:03 +0300 Subject: [PATCH] [D3D12] Cleanup primitive types and front/back facing --- .../gpu/d3d12/d3d12_command_processor.cc | 208 +++++++------ src/xenia/gpu/d3d12/d3d12_command_processor.h | 4 +- src/xenia/gpu/d3d12/d3d12_shader.h | 11 - src/xenia/gpu/d3d12/pipeline_cache.cc | 283 +++++++++--------- src/xenia/gpu/d3d12/pipeline_cache.h | 8 +- src/xenia/gpu/dxbc_shader_translator.cc | 34 ++- src/xenia/gpu/dxbc_shader_translator.h | 13 +- src/xenia/gpu/shader.h | 5 + src/xenia/gpu/shader_compiler_main.cc | 33 +- src/xenia/gpu/shader_translator.cc | 14 +- src/xenia/gpu/shader_translator.h | 11 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 2 +- 12 files changed, 323 insertions(+), 303 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index b994e1a3d..96bfebca7 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -139,17 +139,13 @@ void D3D12CommandProcessor::SubmitBarriers() { ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - PrimitiveType primitive_type) { + bool tessellated) { assert_true(vertex_shader->is_translated()); assert_true(pixel_shader == nullptr || pixel_shader->is_translated()); - D3D12_SHADER_VISIBILITY vertex_visibility; - if (primitive_type == PrimitiveType::kTrianglePatch || - primitive_type == PrimitiveType::kQuadPatch) { - vertex_visibility = D3D12_SHADER_VISIBILITY_DOMAIN; - } else { - vertex_visibility = D3D12_SHADER_VISIBILITY_VERTEX; - } + D3D12_SHADER_VISIBILITY vertex_visibility = + tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN + : D3D12_SHADER_VISIBILITY_VERTEX; uint32_t texture_count_vertex, sampler_count_vertex; vertex_shader->GetTextureSRVs(texture_count_vertex); @@ -172,8 +168,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( index_offset += D3D12Shader::kMaxTextureSRVIndexBits; index |= sampler_count_vertex << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; - index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN) - << index_offset; + index |= (tessellated ? 1 : 0) << index_offset; ++index_offset; assert_true(index_offset <= 32); @@ -1157,6 +1152,15 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } + // Check if using tessellation to get the correct primitive type. + bool tessellated; + if (uint32_t(primitive_type) >= + uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { + tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1; + } else { + tessellated = false; + } + // Shaders will have already been defined by previous loads. // We need them to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); @@ -1176,7 +1180,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // which is needed by the render target cache, to check the possibility of // doing early depth/stencil, and also to get used textures and samplers. if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader, - primitive_type)) { + tessellated, primitive_type)) { return false; } @@ -1189,11 +1193,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, !pixel_shader->memexport_stream_constants().empty(); bool memexport_used = memexport_used_vertex || memexport_used_pixel; - if (!memexport_used_vertex && - (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 && - primitive_type != PrimitiveType::kPointList && - primitive_type != PrimitiveType::kRectangleList) { - // Both sides are culled - can't reproduce this with rasterizer state. + bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); + + if (!memexport_used_vertex && primitive_two_faced && + (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) { + // Both sides are culled. return true; } @@ -1221,8 +1225,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Adaptive tessellation requires an index buffer, but it contains per-edge // tessellation factors (as floats) instead of control point indices. bool adaptive_tessellation; - if (primitive_type == PrimitiveType::kTrianglePatch || - primitive_type == PrimitiveType::kQuadPatch) { + if (tessellated) { TessellationMode tessellation_mode = TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3); adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive; @@ -1245,44 +1248,54 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } else { adaptive_tessellation = false; } - PrimitiveType primitive_type_converted = - PrimitiveConverter::GetReplacementPrimitiveType(primitive_type); + PrimitiveType primitive_type_converted; D3D_PRIMITIVE_TOPOLOGY primitive_topology; - switch (primitive_type_converted) { - case PrimitiveType::kPointList: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; - break; - case PrimitiveType::kLineList: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; - break; - case PrimitiveType::kLineStrip: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; - break; - case PrimitiveType::kTriangleList: - case PrimitiveType::kRectangleList: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - break; - case PrimitiveType::kTriangleStrip: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; - break; - case PrimitiveType::kQuadList: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; - break; - case PrimitiveType::kTrianglePatch: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; - break; - case PrimitiveType::kQuadPatch: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST; - break; - default: - return false; + if (tessellated) { + primitive_type_converted = primitive_type; + switch (primitive_type_converted) { + // TODO(Triang3l): Support line patches. + case PrimitiveType::kTrianglePatch: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; + break; + case PrimitiveType::kQuadPatch: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST; + break; + default: + return false; + } + } else { + primitive_type_converted = + PrimitiveConverter::GetReplacementPrimitiveType(primitive_type); + switch (primitive_type_converted) { + case PrimitiveType::kPointList: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + break; + case PrimitiveType::kLineList: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; + break; + case PrimitiveType::kLineStrip: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + break; + case PrimitiveType::kTriangleList: + case PrimitiveType::kRectangleList: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + break; + case PrimitiveType::kTriangleStrip: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + break; + case PrimitiveType::kQuadList: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + break; + default: + return false; + } } if (primitive_topology_ != primitive_topology) { primitive_topology_ = primitive_topology; deferred_command_list_->D3DIASetPrimitiveTopology(primitive_topology); } uint32_t line_loop_closing_index; - if (primitive_type == PrimitiveType::kLineLoop && !indexed && + if (!tessellated && primitive_type == PrimitiveType::kLineLoop && !indexed && index_count >= 3) { // Add a vertex to close the loop, and make the vertex shader replace its // index (before adding the offset) with 0 to fetch the first vertex again. @@ -1318,7 +1331,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, void* pipeline_handle; ID3D12RootSignature* root_signature; if (!pipeline_cache_->ConfigurePipeline( - vertex_shader, pixel_shader, primitive_type_converted, + vertex_shader, pixel_shader, tessellated, primitive_type_converted, indexed ? index_buffer_info->format : IndexFormat::kInt16, early_z, pipeline_render_targets, &pipeline_handle, &root_signature)) { return false; @@ -1335,7 +1348,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Update system constants before uploading them. UpdateSystemConstantValues( - memexport_used, primitive_type, line_loop_closing_index, + memexport_used, primitive_two_faced, line_loop_closing_index, indexed ? index_buffer_info->endianness : Endian::kUnspecified, adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0, early_z, color_mask, pipeline_render_targets); @@ -1494,18 +1507,23 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; + PrimitiveConverter::ConversionResult conversion_result; uint32_t converted_index_count; - PrimitiveConverter::ConversionResult conversion_result = - primitive_converter_->ConvertPrimitives( - primitive_type, index_buffer_info->guest_base, index_count, - index_buffer_info->format, index_buffer_info->endianness, - index_buffer_view.BufferLocation, converted_index_count); - if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) { - return false; - } - if (conversion_result == - PrimitiveConverter::ConversionResult::kPrimitiveEmpty) { - return true; + if (tessellated) { + conversion_result = + PrimitiveConverter::ConversionResult::kConversionNotNeeded; + } else { + conversion_result = primitive_converter_->ConvertPrimitives( + primitive_type, index_buffer_info->guest_base, index_count, + index_buffer_info->format, index_buffer_info->endianness, + index_buffer_view.BufferLocation, converted_index_count); + if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) { + return false; + } + if (conversion_result == + PrimitiveConverter::ConversionResult::kPrimitiveEmpty) { + return true; + } } ID3D12Resource* scratch_index_buffer = nullptr; if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) { @@ -1564,10 +1582,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } } else { // Check if need to draw using a conversion index buffer. - uint32_t converted_index_count; + uint32_t converted_index_count = 0; D3D12_GPU_VIRTUAL_ADDRESS conversion_gpu_address = - primitive_converter_->GetStaticIndexBuffer(primitive_type, index_count, - converted_index_count); + tessellated ? 0 + : primitive_converter_->GetStaticIndexBuffer( + primitive_type, index_count, converted_index_count); if (memexport_used) { shared_memory_->UseForWriting(); } else { @@ -1954,7 +1973,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState() { } void D3D12CommandProcessor::UpdateSystemConstantValues( - bool shared_memory_is_uav, PrimitiveType primitive_type, + bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, Endian index_endian, uint32_t edge_factor_base, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]) { @@ -2195,18 +2214,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; bool gl_clip_space_def = !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); - float ndc_scale_x, ndc_scale_y; - if (pa_cl_vte_cntl & (1 << 0)) { - ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; + float ndc_scale_x, ndc_scale_y, ndc_scale_z; + if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) { + // Kill all primitives if both faces are culled, but the vertex shader still + // needs to do memexport (not NaN because of comparison for setting the + // dirty flag). + ndc_scale_x = ndc_scale_y = ndc_scale_z = 0; } else { - ndc_scale_x = 1.0f / 1280.0f; + if (pa_cl_vte_cntl & (1 << 0)) { + ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; + } else { + ndc_scale_x = 1.0f / 1280.0f; + } + if (pa_cl_vte_cntl & (1 << 2)) { + ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; + } else { + ndc_scale_y = -1.0f / 1280.0f; + } + ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { - ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; - } else { - ndc_scale_y = -1.0f / 1280.0f; - } - float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; @@ -2376,6 +2402,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format); dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; + // Can't do float comparisons here because NaNs would result in always + // setting the dirty flag. dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], @@ -2419,25 +2447,12 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( dirty |= system_constants_.edram_depth_range_offset != depth_range_offset; system_constants_.edram_depth_range_offset = depth_range_offset; - // For points and lines, front polygon offset is used, and it's enabled if + // For non-polygons, front polygon offset is used, and it's enabled if // POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back // are used. float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; - if (primitive_type == PrimitiveType::kPointList || - primitive_type == PrimitiveType::kLineList || - primitive_type == PrimitiveType::kLineStrip || - primitive_type == PrimitiveType::kLineLoop || - primitive_type == PrimitiveType::k2DLineStrip) { - if (pa_su_sc_mode_cntl & (1 << 13)) { - poly_offset_front_scale = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; - poly_offset_front_offset = - regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; - poly_offset_back_scale = poly_offset_front_scale; - poly_offset_back_offset = poly_offset_front_offset; - } - } else { + if (primitive_two_faced) { if (pa_su_sc_mode_cntl & (1 << 11)) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; @@ -2450,6 +2465,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( poly_offset_back_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; } + } else { + if (pa_su_sc_mode_cntl & (1 << 13)) { + poly_offset_front_scale = + regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + poly_offset_front_offset = + regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + poly_offset_back_scale = poly_offset_front_scale; + poly_offset_back_offset = poly_offset_front_offset; + } } // "slope computed in subpixels (1/12 or 1/16)" - R5xx Acceleration. Also: // https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L943 @@ -2511,7 +2535,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_stencil_front_comparison != stencil_value; system_constants_.edram_stencil_front_comparison = stencil_value; - if (rb_depthcontrol & 0x80) { + if (primitive_two_faced && (rb_depthcontrol & 0x80)) { stencil_value = kStencilOpMap[(rb_depthcontrol >> 23) & 0x7]; dirty |= system_constants_.edram_stencil_back_fail != stencil_value; system_constants_.edram_stencil_back_fail = stencil_value; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index c5df90f25..e170ac153 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -82,7 +82,7 @@ class D3D12CommandProcessor : public CommandProcessor { // Finds or creates root signature for a pipeline. ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, - PrimitiveType primitive_type); + bool tessellated); ui::d3d12::UploadBufferPool* GetConstantBufferPool() const { return constant_buffer_pool_.get(); @@ -210,7 +210,7 @@ class D3D12CommandProcessor : public CommandProcessor { void UpdateFixedFunctionState(); void UpdateSystemConstantValues( - bool shared_memory_is_uav, PrimitiveType primitive_type, + bool shared_memory_is_uav, bool primitive_two_faced, uint32_t line_loop_closing_index, Endian index_endian, uint32_t edge_factor_base, bool early_z, uint32_t color_mask, const RenderTargetCache::PipelineRenderTarget render_targets[4]); diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index cbed15f9e..cf0380959 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -25,15 +25,6 @@ class D3D12Shader : public Shader { D3D12Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, uint32_t dword_count); - // For checking if it's a domain shader rather than a vertex shader when used - // (since when a shader is used for the first time, it's translated either - // into a vertex shader or a domain shader, depending on the primitive type). - PrimitiveType GetDomainShaderPrimitiveType() const { - return domain_shader_primitive_type_; - } - void SetDomainShaderPrimitiveType(PrimitiveType primitive_type) { - domain_shader_primitive_type_ = primitive_type; - } void SetTexturesAndSamplers( const DxbcShaderTranslator::TextureSRV* texture_srvs, uint32_t texture_srv_count, @@ -88,8 +79,6 @@ class D3D12Shader : public Shader { } private: - PrimitiveType domain_shader_primitive_type_ = PrimitiveType::kNone; - std::vector texture_srvs_; uint32_t used_texture_mask_ = 0; std::vector sampler_bindings_; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 54817a51b..6742608b0 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -194,6 +194,7 @@ D3D12Shader* PipelineCache::LoadShader(ShaderType shader_type, bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + bool tessellated, PrimitiveType primitive_type) { auto& regs = *register_file_; @@ -207,12 +208,14 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, xenos::xe_gpu_program_cntl_t sq_program_cntl; sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; if (!vertex_shader->is_translated() && - !TranslateShader(vertex_shader, sq_program_cntl, primitive_type)) { + !TranslateShader(vertex_shader, sq_program_cntl, tessellated, + primitive_type)) { XELOGE("Failed to translate the vertex shader!"); return false; } if (pixel_shader != nullptr && !pixel_shader->is_translated() && - !TranslateShader(pixel_shader, sq_program_cntl, primitive_type)) { + !TranslateShader(pixel_shader, sq_program_cntl, tessellated, + primitive_type)) { XELOGE("Failed to translate the pixel shader!"); return false; } @@ -220,7 +223,7 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, } bool PipelineCache::ConfigurePipeline( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated, PrimitiveType primitive_type, IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], void** pipeline_handle_out, ID3D12RootSignature** root_signature_out) { @@ -232,9 +235,9 @@ bool PipelineCache::ConfigurePipeline( assert_not_null(root_signature_out); PipelineDescription description; - if (!GetCurrentStateDescription(vertex_shader, pixel_shader, primitive_type, - index_format, early_z, render_targets, - description)) { + if (!GetCurrentStateDescription(vertex_shader, pixel_shader, tessellated, + primitive_type, index_format, early_z, + render_targets, description)) { return false; } @@ -260,7 +263,8 @@ bool PipelineCache::ConfigurePipeline( } } - if (!EnsureShadersTranslated(vertex_shader, pixel_shader, primitive_type)) { + if (!EnsureShadersTranslated(vertex_shader, pixel_shader, tessellated, + primitive_type)) { return false; } @@ -289,33 +293,17 @@ bool PipelineCache::ConfigurePipeline( bool PipelineCache::TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, + bool tessellated, PrimitiveType primitive_type) { - // Set the target for vertex shader translation. - DxbcShaderTranslator::VertexShaderType vertex_shader_type; - if (primitive_type == PrimitiveType::kTrianglePatch) { - vertex_shader_type = - DxbcShaderTranslator::VertexShaderType::kTriangleDomain; - } else if (primitive_type == PrimitiveType::kQuadPatch) { - vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kQuadDomain; - } else { - vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kVertex; - } - shader_translator_->SetVertexShaderType(vertex_shader_type); - // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - if (!shader_translator_->Translate(shader, cntl)) { + if (!shader_translator_->Translate( + shader, tessellated ? primitive_type : PrimitiveType::kNone, cntl)) { XELOGE("Shader %.16" PRIX64 " translation failed; marking as ignored", shader->ucode_data_hash()); return false; } - if (vertex_shader_type != DxbcShaderTranslator::VertexShaderType::kVertex) { - // For checking later for safety (so a vertex shader won't be accidentally - // used as a domain shader or vice versa). - shader->SetDomainShaderPrimitiveType(primitive_type); - } - uint32_t texture_srv_count; const DxbcShaderTranslator::TextureSRV* texture_srvs = shader_translator_->GetTextureSRVs(texture_srv_count); @@ -360,19 +348,20 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, } bool PipelineCache::GetCurrentStateDescription( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated, PrimitiveType primitive_type, IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineDescription& description_out) { auto& regs = *register_file_; uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); // Initialize all unused fields to zero for comparison/hashing. std::memset(&description_out, 0, sizeof(description_out)); // Root signature. description_out.root_signature = command_processor_->GetRootSignature( - vertex_shader, pixel_shader, primitive_type); + vertex_shader, pixel_shader, tessellated); if (description_out.root_signature == nullptr) { return false; } @@ -393,93 +382,79 @@ bool PipelineCache::GetCurrentStateDescription( } // Primitive topology type, tessellation mode and geometry shader. - description_out.tessellation_mode = PipelineTessellationMode::kNone; - switch (primitive_type) { - case PrimitiveType::kPointList: - description_out.primitive_topology_type = - PipelinePrimitiveTopologyType::kPoint; - break; - case PrimitiveType::kLineList: - case PrimitiveType::kLineStrip: - case PrimitiveType::kLineLoop: - // Quads are emulated as line lists with adjacency. - case PrimitiveType::kQuadList: - case PrimitiveType::k2DLineStrip: - description_out.primitive_topology_type = - PipelinePrimitiveTopologyType::kLine; - break; - case PrimitiveType::kTrianglePatch: - case PrimitiveType::kQuadPatch: - description_out.primitive_topology_type = - PipelinePrimitiveTopologyType::kPatch; - switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { - case TessellationMode::kContinuous: - description_out.tessellation_mode = - PipelineTessellationMode::kContinuous; - break; - case TessellationMode::kAdaptive: - description_out.tessellation_mode = - FLAGS_d3d12_tessellation_adaptive - ? PipelineTessellationMode::kAdaptive - : PipelineTessellationMode::kContinuous; - break; - default: - description_out.tessellation_mode = - PipelineTessellationMode::kDiscrete; - break; - } - break; - default: - description_out.primitive_topology_type = - PipelinePrimitiveTopologyType::kTriangle; - break; - } - switch (primitive_type) { - case PrimitiveType::kLinePatch: - description_out.patch_type = PipelinePatchType::kLine; - break; - case PrimitiveType::kTrianglePatch: - description_out.patch_type = PipelinePatchType::kTriangle; - break; - case PrimitiveType::kQuadPatch: - description_out.patch_type = PipelinePatchType::kQuad; - break; - default: - description_out.patch_type = PipelinePatchType::kNone; - break; - } - switch (primitive_type) { - case PrimitiveType::kPointList: - description_out.geometry_shader = PipelineGeometryShader::kPointList; - break; - case PrimitiveType::kRectangleList: - description_out.geometry_shader = PipelineGeometryShader::kRectangleList; - break; - case PrimitiveType::kQuadList: - description_out.geometry_shader = PipelineGeometryShader::kQuadList; - break; - default: - description_out.geometry_shader = PipelineGeometryShader::kNone; - break; + if (tessellated) { + switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { + case TessellationMode::kContinuous: + description_out.tessellation_mode = + PipelineTessellationMode::kContinuous; + break; + case TessellationMode::kAdaptive: + description_out.tessellation_mode = + FLAGS_d3d12_tessellation_adaptive + ? PipelineTessellationMode::kAdaptive + : PipelineTessellationMode::kContinuous; + break; + default: + description_out.tessellation_mode = PipelineTessellationMode::kDiscrete; + break; + } + description_out.primitive_topology_type = + PipelinePrimitiveTopologyType::kPatch; + switch (primitive_type) { + case PrimitiveType::kLinePatch: + description_out.patch_type = PipelinePatchType::kLine; + break; + case PrimitiveType::kTrianglePatch: + description_out.patch_type = PipelinePatchType::kTriangle; + break; + case PrimitiveType::kQuadPatch: + description_out.patch_type = PipelinePatchType::kQuad; + break; + default: + assert_unhandled_case(primitive_type); + return false; + } + description_out.geometry_shader = PipelineGeometryShader::kNone; + } else { + description_out.tessellation_mode = PipelineTessellationMode::kNone; + switch (primitive_type) { + case PrimitiveType::kPointList: + description_out.primitive_topology_type = + PipelinePrimitiveTopologyType::kPoint; + break; + case PrimitiveType::kLineList: + case PrimitiveType::kLineStrip: + case PrimitiveType::kLineLoop: + // Quads are emulated as line lists with adjacency. + case PrimitiveType::kQuadList: + case PrimitiveType::k2DLineStrip: + description_out.primitive_topology_type = + PipelinePrimitiveTopologyType::kLine; + break; + default: + description_out.primitive_topology_type = + PipelinePrimitiveTopologyType::kTriangle; + break; + } + description_out.patch_type = PipelinePatchType::kNone; + switch (primitive_type) { + case PrimitiveType::kPointList: + description_out.geometry_shader = PipelineGeometryShader::kPointList; + break; + case PrimitiveType::kRectangleList: + description_out.geometry_shader = + PipelineGeometryShader::kRectangleList; + break; + case PrimitiveType::kQuadList: + description_out.geometry_shader = PipelineGeometryShader::kQuadList; + break; + default: + description_out.geometry_shader = PipelineGeometryShader::kNone; + break; + } } // Rasterizer state. - uint32_t cull_mode; - if (primitive_type == PrimitiveType::kPointList || - primitive_type == PrimitiveType::kRectangleList) { - cull_mode = 0; - } else { - cull_mode = pa_su_sc_mode_cntl & 0x3; - } - if (cull_mode & 1) { - // More special, so checked first - generally back faces are culled. - description_out.cull_mode = PipelineCullMode::kFront; - } else if (cull_mode & 2) { - description_out.cull_mode = PipelineCullMode::kBack; - } else { - description_out.cull_mode = PipelineCullMode::kNone; - } - description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0; // Because Direct3D 12 doesn't support per-side fill mode and depth bias, the // values to use depends on the current culling state. // If front faces are culled, use the ones for back faces. @@ -495,33 +470,56 @@ bool PipelineCache::GetCurrentStateDescription( // Here we also assume that only one side is culled - if two sides are culled, // the D3D12 command processor will drop such draw early. float poly_offset = 0.0f, poly_offset_scale = 0.0f; - // With ROV, the depth bias is applied in the pixel shader because per-sample - // depth is needed for MSAA. - if (!(cull_mode & 1)) { - // Front faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { - description_out.fill_mode_wireframe = 1; + if (primitive_two_faced) { + uint32_t cull_mode = pa_su_sc_mode_cntl & 0x3; + description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0; + if (cull_mode == 1) { + description_out.cull_mode = PipelineCullMode::kFront; + } else if (cull_mode == 2) { + description_out.cull_mode = PipelineCullMode::kBack; + } else { + description_out.cull_mode = PipelineCullMode::kNone; } - if (!edram_rov_used_ && ((pa_su_sc_mode_cntl >> 11) & 0x1)) { + // With ROV, the depth bias is applied in the pixel shader because + // per-sample depth is needed for MSAA. + if (cull_mode != 1) { + // Front faces aren't culled. + uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; + if (fill_mode == 0 || fill_mode == 1) { + description_out.fill_mode_wireframe = 1; + } + if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) { + poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; + poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; + } + } + if (cull_mode != 2) { + // Back faces aren't culled. + uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; + if (fill_mode == 0 || fill_mode == 1) { + description_out.fill_mode_wireframe = 1; + } + // Prefer front depth bias because in general, front faces are the ones + // that are rendered (except for shadow volumes). + if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) && + poly_offset == 0.0f && poly_offset_scale == 0.0f) { + poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; + poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; + } + } + if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) { + // Fill mode is disabled. + description_out.fill_mode_wireframe = 0; + } + } else { + // Filled front faces only. + // Use front depth bias if POLY_OFFSET_PARA_ENABLED + // (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives). + if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; } } - if (!(cull_mode & 2)) { - // Back faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { - description_out.fill_mode_wireframe = 1; - } - // Prefer front depth bias because in general, front faces are the ones - // that are rendered (except for shadow volumes). - if (!edram_rov_used_ && ((pa_su_sc_mode_cntl >> 12) & 0x1) && - poly_offset == 0.0f && poly_offset_scale == 0.0f) { - poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; - poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; - } - } if (!edram_rov_used_) { // Conversion based on the calculations in Call of Duty 4 and the values it // writes to the registers, and also on: @@ -551,12 +549,9 @@ bool PipelineCache::GetCurrentStateDescription( description_out.depth_bias_slope_scaled = poly_offset_scale * (1.0f / 16.0f); } - if ((pa_su_sc_mode_cntl & (0x3 << 3)) == 0) { - // Fill mode is disabled. - description_out.fill_mode_wireframe = 0; - } - if (FLAGS_d3d12_tessellation_wireframe && - description_out.tessellation_mode != PipelineTessellationMode::kNone) { + if (FLAGS_d3d12_tessellation_wireframe && tessellated && + (primitive_type == PrimitiveType::kTrianglePatch || + primitive_type == PrimitiveType::kQuadPatch)) { description_out.fill_mode_wireframe = 1; } // CLIP_DISABLE @@ -596,7 +591,7 @@ bool PipelineCache::GetCurrentStateDescription( (rb_depthcontrol >> 17) & 0x7; description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7; description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7; - if (rb_depthcontrol & 0x80) { + if (primitive_two_faced && (rb_depthcontrol & 0x80)) { description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7; description_out.stencil_back_depth_fail_op = (rb_depthcontrol >> 29) & 0x7; @@ -768,7 +763,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( if (description.tessellation_mode != PipelineTessellationMode::kNone) { switch (description.patch_type) { case PipelinePatchType::kTriangle: - if (description.vertex_shader->GetDomainShaderPrimitiveType() != + if (description.vertex_shader->patch_primitive_type() != PrimitiveType::kTrianglePatch) { XELOGE( "Tried to use vertex shader %.16" PRIX64 @@ -794,7 +789,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( state_desc.VS.BytecodeLength = sizeof(tessellation_triangle_vs); break; case PipelinePatchType::kQuad: - if (description.vertex_shader->GetDomainShaderPrimitiveType() != + if (description.vertex_shader->patch_primitive_type() != PrimitiveType::kQuadPatch) { XELOGE("Tried to use vertex shader %.16" PRIX64 " for quad patch tessellation, but it's not a tessellation " @@ -825,7 +820,7 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( state_desc.DS.BytecodeLength = description.vertex_shader->translated_binary().size(); } else { - if (description.vertex_shader->GetDomainShaderPrimitiveType() != + if (description.vertex_shader->patch_primitive_type() != PrimitiveType::kNone) { XELOGE("Tried to use vertex shader %.16" PRIX64 " without tessellation, but it's a tessellation domain shader", diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index daa16caf7..179b851d0 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -49,11 +49,11 @@ class PipelineCache { // Translates shaders if needed, also making shader info up to date. bool EnsureShadersTranslated(D3D12Shader* vertex_shader, - D3D12Shader* pixel_shader, + D3D12Shader* pixel_shader, bool tessellated, PrimitiveType primitive_type); bool ConfigurePipeline( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated, PrimitiveType primitive_type, IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], void** pipeline_handle_out, ID3D12RootSignature** root_signature_out); @@ -172,10 +172,10 @@ class PipelineCache { }; bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, - PrimitiveType primitive_type); + bool tessellated, PrimitiveType primitive_type); bool GetCurrentStateDescription( - D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, + D3D12Shader* vertex_shader, D3D12Shader* pixel_shader, bool tessellated, PrimitiveType primitive_type, IndexFormat index_format, bool early_z, const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineDescription& description_out); diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 84effdaae..1e10b496f 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -761,14 +761,15 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for // triangle patches), and also set the domain in STAT. uint32_t domain_location_mask, domain_location_swizzle; - if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { domain_location_mask = 0b0111; // ZYX swizzle with r1.y == 0, according to the water shader in // Banjo-Kazooie: Nuts & Bolts. domain_location_swizzle = 0b00000110; stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; } else { - assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); // According to the ground shader in Viva Pinata, though it's impossible // (as of December 12th, 2018) to test there since it possibly requires // memexport for ground control points (the memory region with them is @@ -807,8 +808,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // TODO(Triang3l): Investigate what should be written for primitives (or // even control points) for non-adaptive tessellation modes (they may // possibly have an index buffer). + // TODO(Triang3l): Support line patches. uint32_t primitive_id_gpr_index = - vertex_shader_type_ == VertexShaderType::kTriangleDomain ? 1 : 0; + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; if (register_count() > primitive_id_gpr_index) { uint32_t primitive_id_temp = uses_register_dynamic_addressing() @@ -877,9 +879,11 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // // Direct3D 12 appears to be passing the coordinates in a consistent // order, so we can just use ZYX for triangle patches. + // + // TODO(Triang3l): Support line patches. uint32_t domain_location_swizzle_mask = - vertex_shader_type_ == VertexShaderType::kTriangleDomain ? 0b0010 - : 0b0001; + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010 + : 0b0001; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( 3 + temp_register_operand_length)); @@ -4152,11 +4156,12 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { // FXC refuses to compile without SV_TessFactor and SV_InsideTessFactor input, // so this is required. uint32_t tess_factor_count_edge, tess_factor_count_inside; - if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { tess_factor_count_edge = 3; tess_factor_count_inside = 1; } else { - assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); tess_factor_count_edge = 4; tess_factor_count_inside = 2; } @@ -4172,7 +4177,7 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { shader_object_.push_back(0); shader_object_.push_back( i < tess_factor_count_edge ? i : (i - tess_factor_count_edge)); - if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { if (i < tess_factor_count_edge) { // D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR. shader_object_.push_back(13); @@ -4181,7 +4186,8 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { shader_object_.push_back(14); } } else { - assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); if (i < tess_factor_count_edge) { // D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR. shader_object_.push_back(11); @@ -4401,11 +4407,12 @@ void DxbcShaderTranslator::WriteShaderCode() { // as both vertex shader and domain shader. uint32_t control_point_count; D3D11_SB_TESSELLATOR_DOMAIN domain; - if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { control_point_count = 3; domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; } else { - assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); control_point_count = 4; domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; } @@ -4597,10 +4604,11 @@ void DxbcShaderTranslator::WriteShaderCode() { if (IsDxbcDomainShader()) { // Domain location input (barycentric for triangles, UV for quads). uint32_t domain_location_mask; - if (vertex_shader_type_ == VertexShaderType::kTriangleDomain) { + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { domain_location_mask = 0b0111; } else { - assert_true(vertex_shader_type_ == VertexShaderType::kQuadDomain); + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); domain_location_mask = 0b0011; } shader_object_.push_back( diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 0639ca19e..3d5aa824f 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -31,13 +31,6 @@ class DxbcShaderTranslator : public ShaderTranslator { DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used); ~DxbcShaderTranslator() override; - enum class VertexShaderType { kVertex, kTriangleDomain, kQuadDomain }; - // Sets the type (shader model and input layout) of the next vertex shader - // that will be converted. - void SetVertexShaderType(VertexShaderType type) { - vertex_shader_type_ = type; - } - // Constant buffer bindings in space 0. enum class CbufferRegister { kSystemConstants, @@ -646,12 +639,11 @@ class DxbcShaderTranslator : public ShaderTranslator { } inline bool IsDxbcVertexShader() const { return IsDxbcVertexOrDomainShader() && - vertex_shader_type_ == VertexShaderType::kVertex; + patch_primitive_type() == PrimitiveType::kNone; } inline bool IsDxbcDomainShader() const { return IsDxbcVertexOrDomainShader() && - (vertex_shader_type_ == VertexShaderType::kTriangleDomain || - vertex_shader_type_ == VertexShaderType::kQuadDomain); + patch_primitive_type() != PrimitiveType::kNone; } inline bool IsDxbcPixelShader() const { return is_depth_only_pixel_shader_ || is_pixel_shader(); @@ -957,7 +949,6 @@ class DxbcShaderTranslator : public ShaderTranslator { // Whether the output merger should be emulated in pixel shaders. bool edram_rov_used_; - VertexShaderType vertex_shader_type_ = VertexShaderType::kVertex; // Is currently writing the empty depth-only pixel shader, for // CompleteTranslation. bool is_depth_only_pixel_shader_; diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 8dce339e1..92e8f6bfd 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -587,6 +587,10 @@ class Shader { // Whether the shader is identified as a vertex or pixel shader. ShaderType type() const { return shader_type_; } + // Tessellation patch primitive type for a vertex shader translated into a + // domain shader, or PrimitiveType::kNone for a normal vertex shader. + PrimitiveType patch_primitive_type() const { return patch_primitive_type_; } + // Microcode dwords in host endianness. const std::vector& ucode_data() const { return ucode_data_; } uint64_t ucode_data_hash() const { return ucode_data_hash_; } @@ -667,6 +671,7 @@ class Shader { friend class ShaderTranslator; ShaderType shader_type_; + PrimitiveType patch_primitive_type_ = PrimitiveType::kNone; std::vector ucode_data_; uint64_t ucode_data_hash_; diff --git a/src/xenia/gpu/shader_compiler_main.cc b/src/xenia/gpu/shader_compiler_main.cc index 7fe20750e..3b6310f29 100644 --- a/src/xenia/gpu/shader_compiler_main.cc +++ b/src/xenia/gpu/shader_compiler_main.cc @@ -35,10 +35,10 @@ DEFINE_string(shader_input_type, "", DEFINE_string(shader_output, "", "Output shader file path."); DEFINE_string(shader_output_type, "ucode", "Translator to use: [ucode, glsl45, spirv, spirvtext, dxbc]."); -DEFINE_string(shader_output_domain, "", - "Abstract patch type in the generated tessellation evaluation " - "(domain) shader, or unspecified to produce a vertex shader: " - "[triangle, quad]."); +DEFINE_string(shader_output_patch, "", + "Tessellation patch type in the generated tessellation " + "evaluation (domain) shader, or unspecified to produce a vertex " + "shader: [line, triangle, quad]."); DEFINE_bool(shader_output_dxbc_rov, false, "Output ROV-based output-merger code in DXBC pixel shaders."); @@ -108,23 +108,22 @@ int shader_compiler_main(const std::vector& args) { } else if (FLAGS_shader_output_type == "dxbc") { translator = std::make_unique(0, FLAGS_shader_output_dxbc_rov); - DxbcShaderTranslator::VertexShaderType dxbc_vertex_shader_type; - if (FLAGS_shader_output_domain == "triangle") { - dxbc_vertex_shader_type = - DxbcShaderTranslator::VertexShaderType::kTriangleDomain; - } else if (FLAGS_shader_output_domain == "quad") { - dxbc_vertex_shader_type = - DxbcShaderTranslator::VertexShaderType::kQuadDomain; - } else { - dxbc_vertex_shader_type = DxbcShaderTranslator::VertexShaderType::kVertex; - } - static_cast(translator.get()) - ->SetVertexShaderType(dxbc_vertex_shader_type); } else { translator = std::make_unique(); } - translator->Translate(shader.get()); + PrimitiveType patch_primitive_type = PrimitiveType::kNone; + if (shader_type == ShaderType::kVertex) { + if (FLAGS_shader_output_patch == "line") { + patch_primitive_type == PrimitiveType::kLinePatch; + } else if (FLAGS_shader_output_patch == "triangle") { + patch_primitive_type == PrimitiveType::kTrianglePatch; + } else if (FLAGS_shader_output_patch == "quad") { + patch_primitive_type == PrimitiveType::kQuadPatch; + } + } + + translator->Translate(shader.get(), patch_primitive_type); const void* source_data = shader->translated_binary().data(); size_t source_data_size = shader->translated_binary().size(); diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index c3cd78ad6..ba3a3cbcf 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -107,22 +107,25 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { return true; } -bool ShaderTranslator::Translate(Shader* shader, +bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, xenos::xe_gpu_program_cntl_t cntl) { Reset(); register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 : cntl.ps_regs + 1; - return TranslateInternal(shader); + return TranslateInternal(shader, patch_type); } -bool ShaderTranslator::Translate(Shader* shader) { +bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type) { Reset(); - return TranslateInternal(shader); + return TranslateInternal(shader, patch_type); } -bool ShaderTranslator::TranslateInternal(Shader* shader) { +bool ShaderTranslator::TranslateInternal(Shader* shader, + PrimitiveType patch_type) { shader_type_ = shader->type(); + patch_primitive_type_ = + shader_type_ == ShaderType::kVertex ? patch_type : PrimitiveType::kNone; ucode_dwords_ = shader->ucode_dwords(); ucode_dword_count_ = shader->ucode_dword_count(); @@ -190,6 +193,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader) { shader->errors_ = std::move(errors_); shader->translated_binary_ = CompleteTranslation(); shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string(); + shader->patch_primitive_type_ = patch_primitive_type_; shader->vertex_bindings_ = std::move(vertex_bindings_); shader->texture_bindings_ = std::move(texture_bindings_); shader->constant_register_map_ = std::move(constant_register_map_); diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index dac02fc0c..7a12abc10 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -32,8 +32,9 @@ class ShaderTranslator { // DEPRECATED(benvanik): remove this when shader cache is removed. bool GatherAllBindingInformation(Shader* shader); - bool Translate(Shader* shader, xenos::xe_gpu_program_cntl_t cntl); - bool Translate(Shader* shader); + bool Translate(Shader* shader, PrimitiveType patch_type, + xenos::xe_gpu_program_cntl_t cntl); + bool Translate(Shader* shader, PrimitiveType patch_type); protected: ShaderTranslator(); @@ -45,6 +46,9 @@ class ShaderTranslator { uint32_t register_count() const { return register_count_; } // True if the current shader is a vertex shader. bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } + // Tessellation patch primitive type for a vertex shader translated into a + // domain shader, or PrimitiveType::kNone for a normal vertex shader. + PrimitiveType patch_primitive_type() const { return patch_primitive_type_; } // True if the current shader is a pixel shader. bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } const Shader::ConstantRegisterMap& constant_register_map() const { @@ -176,7 +180,7 @@ class ShaderTranslator { bool disable_implicit_early_z; }; - bool TranslateInternal(Shader* shader); + bool TranslateInternal(Shader* shader, PrimitiveType patch_type); void MarkUcodeInstruction(uint32_t dword_offset); void AppendUcodeDisasm(char c); @@ -225,6 +229,7 @@ class ShaderTranslator { // Input shader metadata and microcode. ShaderType shader_type_; + PrimitiveType patch_primitive_type_; const uint32_t* ucode_dwords_; size_t ucode_dword_count_; xenos::xe_gpu_program_cntl_t program_cntl_; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 6ef67ad40..e9b6a40bd 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -367,7 +367,7 @@ bool PipelineCache::TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. - if (!shader_translator_->Translate(shader, cntl)) { + if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) { XELOGE("Shader translation failed; marking shader as ignored"); return false; }