diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index aee9ec161..ce940da49 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1216,10 +1216,11 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { "xe_out_fragment_data_2", "xe_out_fragment_data_3", }; - uint32_t shader_writes_color_targets = - current_shader().writes_color_targets(); + uint32_t fragment_data_outputs_written = + current_shader().writes_color_targets() & + ~GetSpirvShaderModification().pixel.color_outputs_disabled; for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (!(shader_writes_color_targets & (uint32_t(1) << i))) { + if (!(fragment_data_outputs_written & (uint32_t(1) << i))) { continue; } spv::Id output_fragment_data_rt = @@ -1252,11 +1253,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { } // Initialize the colors for safety. - uint32_t shader_writes_color_targets = - current_shader().writes_color_targets(); for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (shader_writes_color_targets & (uint32_t(1) << i)) { - builder_->createStore(const_float4_0_, output_fragment_data_[i]); + spv::Id output_fragment_data_rt = output_fragment_data_[i]; + if (output_fragment_data_rt != spv::NoResult) { + builder_->createStore(const_float4_0_, output_fragment_data_rt); } } } @@ -1618,7 +1618,10 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, assert_not_zero(used_write_mask); assert_true(current_shader().writes_color_target(result.storage_index)); target_pointer = output_fragment_data_[result.storage_index]; - assert_true(target_pointer != spv::NoResult); + // May be spv::NoResult if the color output is explicitly removed due to + // an empty write mask without independent blending. + // TODO(Triang3l): Store the alpha of the first output in this case for + // alpha test and alpha to coverage. break; default: // TODO(Triang3l): All storage targets. diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index c5f41df09..932bd608f 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -34,7 +34,7 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid // prototyping stage (easier to do small granular updates with an // incremental counter). - static constexpr uint32_t kVersion = 2; + static constexpr uint32_t kVersion = 3; struct { // Dynamically indexable register count from SQ_PROGRAM_CNTL. @@ -46,6 +46,11 @@ class SpirvShaderTranslator : public ShaderTranslator { struct PixelShaderModification { // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; + // Color outputs removed from the shader to implement a zero color write + // mask when independent blending (and thus independent write masks) is + // not supported without switching to a render pass with some attachments + // actually excluded. + uint32_t color_outputs_disabled : 4; } pixel; uint64_t value = 0; diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.cc b/src/xenia/gpu/vulkan/deferred_command_buffer.cc index efb34d252..470d8adde 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.cc +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.cc @@ -168,6 +168,18 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { args.image_memory_barrier_count, image_memory_barriers); } break; + case Command::kVkSetBlendConstants: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants); + } break; + + case Command::kVkSetDepthBias: { + auto& args = *reinterpret_cast(stream); + dfn.vkCmdSetDepthBias(command_buffer, args.depth_bias_constant_factor, + args.depth_bias_clamp, + args.depth_bias_slope_factor); + } break; + case Command::kVkSetScissor: { auto& args = *reinterpret_cast(stream); dfn.vkCmdSetScissor( @@ -177,6 +189,27 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) { xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D)))); } break; + case Command::kVkSetStencilCompareMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilCompareMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilReference: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilReference(command_buffer, args.face_mask, + args.mask_reference); + } break; + + case Command::kVkSetStencilWriteMask: { + auto& args = + *reinterpret_cast(stream); + dfn.vkCmdSetStencilWriteMask(command_buffer, args.face_mask, + args.mask_reference); + } break; + case Command::kVkSetViewport: { auto& args = *reinterpret_cast(stream); dfn.vkCmdSetViewport( diff --git a/src/xenia/gpu/vulkan/deferred_command_buffer.h b/src/xenia/gpu/vulkan/deferred_command_buffer.h index 9ed39557b..ac4c88f85 100644 --- a/src/xenia/gpu/vulkan/deferred_command_buffer.h +++ b/src/xenia/gpu/vulkan/deferred_command_buffer.h @@ -162,6 +162,22 @@ class DeferredCommandBuffer { uint32_t image_memory_barrier_count, const VkImageMemoryBarrier* image_memory_barriers); + void CmdVkSetBlendConstants(const float* blend_constants) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants))); + std::memcpy(args.blend_constants, blend_constants, sizeof(float) * 4); + } + + void CmdVkSetDepthBias(float depth_bias_constant_factor, + float depth_bias_clamp, + float depth_bias_slope_factor) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetDepthBias, sizeof(ArgsVkSetDepthBias))); + args.depth_bias_constant_factor = depth_bias_constant_factor; + args.depth_bias_clamp = depth_bias_clamp; + args.depth_bias_slope_factor = depth_bias_slope_factor; + } + void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count, const VkRect2D* scissors) { const size_t header_size = @@ -176,6 +192,31 @@ class DeferredCommandBuffer { sizeof(VkRect2D) * scissor_count); } + void CmdVkSetStencilCompareMask(VkStencilFaceFlags face_mask, + uint32_t compare_mask) { + auto& args = *reinterpret_cast( + WriteCommand(Command::kVkSetStencilCompareMask, + sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = compare_mask; + } + + void CmdVkSetStencilReference(VkStencilFaceFlags face_mask, + uint32_t reference) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilReference, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = reference; + } + + void CmdVkSetStencilWriteMask(VkStencilFaceFlags face_mask, + uint32_t write_mask) { + auto& args = *reinterpret_cast(WriteCommand( + Command::kVkSetStencilWriteMask, sizeof(ArgsSetStencilMaskReference))); + args.face_mask = face_mask; + args.mask_reference = write_mask; + } + void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count, const VkViewport* viewports) { const size_t header_size = @@ -201,7 +242,12 @@ class DeferredCommandBuffer { kVkDrawIndexed, kVkEndRenderPass, kVkPipelineBarrier, + kVkSetBlendConstants, + kVkSetDepthBias, kVkSetScissor, + kVkSetStencilCompareMask, + kVkSetStencilReference, + kVkSetStencilWriteMask, kVkSetViewport, }; @@ -280,6 +326,16 @@ class DeferredCommandBuffer { static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t)); }; + struct ArgsVkSetBlendConstants { + float blend_constants[4]; + }; + + struct ArgsVkSetDepthBias { + float depth_bias_constant_factor; + float depth_bias_clamp; + float depth_bias_slope_factor; + }; + struct ArgsVkSetScissor { uint32_t first_scissor; uint32_t scissor_count; @@ -287,6 +343,11 @@ class DeferredCommandBuffer { static_assert(alignof(VkRect2D) <= alignof(uintmax_t)); }; + struct ArgsSetStencilMaskReference { + VkStencilFaceFlags face_mask; + uint32_t mask_reference; + }; + struct ArgsVkSetViewport { uint32_t first_viewport; uint32_t viewport_count; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 820bcc7eb..afdb32b03 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "xenia/base/assert.h" @@ -530,7 +531,7 @@ void VulkanCommandProcessor::ShutdownContext() { for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( - device, pipeline_layout_pair.second.pipeline_layout, nullptr); + device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); } pipeline_layouts_.clear(); for (const auto& descriptor_set_layout_pair : @@ -824,8 +825,8 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, deferred_command_buffer_.CmdVkBeginRenderPass( &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - ff_viewport_update_needed_ = true; - ff_scissor_update_needed_ = true; + dynamic_viewport_update_needed_ = true; + dynamic_scissor_update_needed_ = true; VkViewport viewport; viewport.x = 0.0f; viewport.y = 0.0f; @@ -841,11 +842,7 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, scissor_rect.extent.height = scaled_height; deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); - // Bind a non-emulation graphics pipeline and invalidate the bindings. - current_graphics_pipeline_ = VK_NULL_HANDLE; - current_graphics_pipeline_layout_ = nullptr; - deferred_command_buffer_.CmdVkBindPipeline( - VK_PIPELINE_BIND_POINT_GRAPHICS, swap_pipeline_); + BindExternalGraphicsPipeline(swap_pipeline_); deferred_command_buffer_.CmdVkDraw(3, 1, 0, 0); @@ -1043,18 +1040,42 @@ VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel, texture_count_pixel, texture_count_vertex); return nullptr; } - PipelineLayout pipeline_layout_entry; - pipeline_layout_entry.pipeline_layout = pipeline_layout; - pipeline_layout_entry.descriptor_set_layout_textures_pixel_ref = - descriptor_set_layout_textures_pixel; - pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref = - descriptor_set_layout_textures_vertex; - auto emplaced_pair = - pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry); + auto emplaced_pair = pipeline_layouts_.emplace( + std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key.key), + std::forward_as_tuple(pipeline_layout, + descriptor_set_layout_textures_vertex, + descriptor_set_layout_textures_pixel)); // unordered_map insertion doesn't invalidate element references. return &emplaced_pair.first->second; } +void VulkanCommandProcessor::BindExternalGraphicsPipeline( + VkPipeline pipeline, bool keep_dynamic_depth_bias, + bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) { + if (!keep_dynamic_depth_bias) { + dynamic_depth_bias_update_needed_ = true; + } + if (!keep_dynamic_blend_constants) { + dynamic_blend_constants_update_needed_ = true; + } + if (!keep_dynamic_stencil_mask_ref) { + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; + } + if (current_external_graphics_pipeline_ == pipeline) { + return; + } + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_external_graphics_pipeline_ = pipeline; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = VK_NULL_HANDLE; +} + Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, @@ -1134,20 +1155,23 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } + uint32_t normalized_color_mask = + pixel_shader ? draw_util::GetNormalizedColorMask( + regs, pixel_shader->writes_color_targets()) + : 0; + // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = - pixel_shader - ? pipeline_cache_->GetCurrentPixelShaderModification(*pixel_shader) - : SpirvShaderTranslator::Modification(0); + pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( + *pixel_shader, normalized_color_mask) + : SpirvShaderTranslator::Modification(0); // Set up the render targets - this may perform dispatches and draws. - uint32_t pixel_shader_writes_color_targets = - pixel_shader ? pixel_shader->writes_color_targets() : 0; if (!render_target_cache_->Update(is_rasterization_done, - pixel_shader_writes_color_targets)) { + normalized_color_mask)) { return false; } @@ -1164,37 +1188,41 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // Update the graphics pipeline, and if the new graphics pipeline has a // different layout, invalidate incompatible descriptor sets before updating - // current_graphics_pipeline_layout_. + // current_guest_graphics_pipeline_layout_. VkPipeline pipeline; const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( vertex_shader_translation, pixel_shader_translation, - primitive_processing_result, + primitive_processing_result, normalized_color_mask, render_target_cache_->last_update_render_pass_key(), pipeline, pipeline_layout_provider)) { return false; } - deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); + if (current_guest_graphics_pipeline_ != pipeline) { + deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + current_guest_graphics_pipeline_ = pipeline; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + } auto pipeline_layout = static_cast(pipeline_layout_provider); - if (current_graphics_pipeline_layout_ != pipeline_layout) { - if (current_graphics_pipeline_layout_) { + if (current_guest_graphics_pipeline_layout_ != pipeline_layout) { + if (current_guest_graphics_pipeline_layout_) { // Keep descriptor set layouts for which the new pipeline layout is // compatible with the previous one (pipeline layouts are compatible for // set N if set layouts 0 through N are compatible). uint32_t descriptor_sets_kept = uint32_t(SpirvShaderTranslator::kDescriptorSetCount); - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref != - pipeline_layout->descriptor_set_layout_textures_vertex_ref) { + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref() != + pipeline_layout->descriptor_set_layout_textures_vertex_ref()) { descriptor_sets_kept = std::min( descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); } - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref != - pipeline_layout->descriptor_set_layout_textures_pixel_ref) { + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref() != + pipeline_layout->descriptor_set_layout_textures_pixel_ref()) { descriptor_sets_kept = std::min( descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel)); @@ -1204,7 +1232,7 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // indeterminate state. current_graphics_descriptor_sets_bound_up_to_date_ = 0; } - current_graphics_pipeline_layout_ = pipeline_layout; + current_guest_graphics_pipeline_layout_ = pipeline_layout; } const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); @@ -1234,8 +1262,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, device_properties.limits.maxViewportDimensions[1], true, false, false, false, viewport_info); - // Update fixed-function dynamic state. - UpdateFixedFunctionState(viewport_info); + // Update dynamic graphics pipeline state. + UpdateDynamicState(viewport_info, primitive_polygonal); // Update system constants before uploading them. UpdateSystemConstantValues(primitive_processing_result.host_index_endian, @@ -1550,12 +1578,21 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { deferred_command_buffer_.Reset(); // Reset cached state of the command buffer. - ff_viewport_update_needed_ = true; - ff_scissor_update_needed_ = true; + dynamic_viewport_update_needed_ = true; + dynamic_scissor_update_needed_ = true; + dynamic_depth_bias_update_needed_ = true; + dynamic_blend_constants_update_needed_ = true; + dynamic_stencil_compare_mask_front_update_needed_ = true; + dynamic_stencil_compare_mask_back_update_needed_ = true; + dynamic_stencil_write_mask_front_update_needed_ = true; + dynamic_stencil_write_mask_back_update_needed_ = true; + dynamic_stencil_reference_front_update_needed_ = true; + dynamic_stencil_reference_back_update_needed_ = true; current_render_pass_ = VK_NULL_HANDLE; current_framebuffer_ = VK_NULL_HANDLE; - current_graphics_pipeline_ = VK_NULL_HANDLE; - current_graphics_pipeline_layout_ = nullptr; + current_guest_graphics_pipeline_ = VK_NULL_HANDLE; + current_external_graphics_pipeline_ = VK_NULL_HANDLE; + current_guest_graphics_pipeline_layout_ = nullptr; current_graphics_descriptor_sets_bound_up_to_date_ = 0; primitive_processor_->BeginSubmission(); @@ -1825,7 +1862,7 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { for (const auto& pipeline_layout_pair : pipeline_layouts_) { dfn.vkDestroyPipelineLayout( - device, pipeline_layout_pair.second.pipeline_layout, nullptr); + device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr); } pipeline_layouts_.clear(); for (const auto& descriptor_set_layout_pair : @@ -1859,8 +1896,8 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags() return stages; } -void VulkanCommandProcessor::UpdateFixedFunctionState( - const draw_util::ViewportInfo& viewport_info) { +void VulkanCommandProcessor::UpdateDynamicState( + const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES @@ -1891,16 +1928,19 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( } viewport.minDepth = viewport_info.z_min; viewport.maxDepth = viewport_info.z_max; - ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x; - ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y; - ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width; - ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height; - ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth; - ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth; - if (ff_viewport_update_needed_) { - ff_viewport_ = viewport; - deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport); - ff_viewport_update_needed_ = false; + dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x; + dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y; + dynamic_viewport_update_needed_ |= dynamic_viewport_.width != viewport.width; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.height != viewport.height; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.minDepth != viewport.minDepth; + dynamic_viewport_update_needed_ |= + dynamic_viewport_.maxDepth != viewport.maxDepth; + if (dynamic_viewport_update_needed_) { + dynamic_viewport_ = viewport; + deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_); + dynamic_viewport_update_needed_ = false; } // Scissor. @@ -1911,17 +1951,191 @@ void VulkanCommandProcessor::UpdateFixedFunctionState( scissor_rect.offset.y = int32_t(scissor.offset[1]); scissor_rect.extent.width = scissor.extent[0]; scissor_rect.extent.height = scissor.extent[1]; - ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor_rect.offset.x; - ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor_rect.offset.y; - ff_scissor_update_needed_ |= - ff_scissor_.extent.width != scissor_rect.extent.width; - ff_scissor_update_needed_ |= - ff_scissor_.extent.height != scissor_rect.extent.height; - if (ff_scissor_update_needed_) { - ff_scissor_ = scissor_rect; - deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor_rect); - ff_scissor_update_needed_ = false; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.x != scissor_rect.offset.x; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.offset.y != scissor_rect.offset.y; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.width != scissor_rect.extent.width; + dynamic_scissor_update_needed_ |= + dynamic_scissor_.extent.height != scissor_rect.extent.height; + if (dynamic_scissor_update_needed_) { + dynamic_scissor_ = scissor_rect; + deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_); + dynamic_scissor_update_needed_ = false; } + + // Depth bias. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + float depth_bias_constant_factor, depth_bias_slope_factor; + draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal, + depth_bias_slope_factor, + depth_bias_constant_factor); + depth_bias_constant_factor *= draw_util::GetD3D10PolygonOffsetFactor( + regs.Get().depth_format, true); + // With non-square resolution scaling, make sure the worst-case impact is + // reverted (slope only along the scaled axis), thus max. More bias is better + // than less bias, because less bias means Z fighting with the background is + // more likely. + depth_bias_slope_factor *= + xenos::kPolygonOffsetScaleSubpixelUnit * + float(std::max(render_target_cache_->GetResolutionScaleX(), + render_target_cache_->GetResolutionScaleY())); + // std::memcmp instead of != so in case of NaN, every draw won't be + // invalidating it. + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_constant_factor_, + &depth_bias_constant_factor, sizeof(float)) != 0; + dynamic_depth_bias_update_needed_ |= + std::memcmp(&dynamic_depth_bias_slope_factor_, &depth_bias_slope_factor, + sizeof(float)) != 0; + if (dynamic_depth_bias_update_needed_) { + dynamic_depth_bias_constant_factor_ = depth_bias_constant_factor; + dynamic_depth_bias_slope_factor_ = depth_bias_slope_factor; + deferred_command_buffer_.CmdVkSetDepthBias( + dynamic_depth_bias_constant_factor_, 0.0f, + dynamic_depth_bias_slope_factor_); + dynamic_depth_bias_update_needed_ = false; + } + + // Blend constants. + float blend_constants[] = { + regs[XE_GPU_REG_RB_BLEND_RED].f32, + regs[XE_GPU_REG_RB_BLEND_GREEN].f32, + regs[XE_GPU_REG_RB_BLEND_BLUE].f32, + regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, + }; + dynamic_blend_constants_update_needed_ |= + std::memcmp(dynamic_blend_constants_, blend_constants, + sizeof(float) * 4) != 0; + if (dynamic_blend_constants_update_needed_) { + std::memcpy(dynamic_blend_constants_, blend_constants, sizeof(float) * 4); + deferred_command_buffer_.CmdVkSetBlendConstants(dynamic_blend_constants_); + dynamic_blend_constants_update_needed_ = false; + } + + // Stencil masks and references. + // Due to pretty complex conditions involving registers not directly related + // to stencil (primitive type, culling), changing the values only when stencil + // is actually needed. However, due to the way dynamic state needs to be set + // in Vulkan, which doesn't take into account whether the state actually has + // effect on drawing, and because the masks and the references are always + // dynamic in Xenia guest pipelines, they must be set in the command buffer + // before any draw. + auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs); + if (rb_depthcontrol.stencil_enable) { + Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg; + if (primitive_polygonal && rb_depthcontrol.backface_enable) { + const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!device_portability_subset_features || + device_portability_subset_features->separateStencilMaskRef) { + // Choose the back face values only if drawing only back faces. + stencil_ref_mask_front_reg = + regs.Get().cull_front + ? XE_GPU_REG_RB_STENCILREFMASK_BF + : XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = stencil_ref_mask_front_reg; + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; + } + } else { + stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK; + stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK; + } + auto stencil_ref_mask_front = + regs.Get(stencil_ref_mask_front_reg); + auto stencil_ref_mask_back = + regs.Get(stencil_ref_mask_back_reg); + // Compare mask. + dynamic_stencil_compare_mask_front_update_needed_ |= + dynamic_stencil_compare_mask_front_ != + stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask; + dynamic_stencil_compare_mask_back_update_needed_ |= + dynamic_stencil_compare_mask_back_ != stencil_ref_mask_back.stencilmask; + dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask; + // Write mask. + dynamic_stencil_write_mask_front_update_needed_ |= + dynamic_stencil_write_mask_front_ != + stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_front_ = stencil_ref_mask_front.stencilwritemask; + dynamic_stencil_write_mask_back_update_needed_ |= + dynamic_stencil_write_mask_back_ != + stencil_ref_mask_back.stencilwritemask; + dynamic_stencil_write_mask_back_ = stencil_ref_mask_back.stencilwritemask; + // Reference. + dynamic_stencil_reference_front_update_needed_ |= + dynamic_stencil_reference_front_ != stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_front_ = stencil_ref_mask_front.stencilref; + dynamic_stencil_reference_back_update_needed_ |= + dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref; + dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref; + } + // Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on the + // Vulkan portability subset without separateStencilMaskRef. + if (dynamic_stencil_compare_mask_front_update_needed_ || + dynamic_stencil_compare_mask_back_update_needed_) { + if (dynamic_stencil_compare_mask_front_ == + dynamic_stencil_compare_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_compare_mask_front_); + } else { + if (dynamic_stencil_compare_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_compare_mask_front_); + } + if (dynamic_stencil_compare_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilCompareMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_compare_mask_back_); + } + } + dynamic_stencil_compare_mask_front_update_needed_ = false; + dynamic_stencil_compare_mask_back_update_needed_ = false; + } + if (dynamic_stencil_write_mask_front_update_needed_ || + dynamic_stencil_write_mask_back_update_needed_) { + if (dynamic_stencil_write_mask_front_ == dynamic_stencil_write_mask_back_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_); + } else { + if (dynamic_stencil_write_mask_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_write_mask_front_); + } + if (dynamic_stencil_write_mask_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilWriteMask( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_write_mask_back_); + } + } + dynamic_stencil_write_mask_front_update_needed_ = false; + dynamic_stencil_write_mask_back_update_needed_ = false; + } + if (dynamic_stencil_reference_front_update_needed_ || + dynamic_stencil_reference_back_update_needed_) { + if (dynamic_stencil_reference_front_ == dynamic_stencil_reference_back_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_reference_front_); + } else { + if (dynamic_stencil_reference_front_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_reference_front_); + } + if (dynamic_stencil_reference_back_update_needed_) { + deferred_command_buffer_.CmdVkSetStencilReference( + VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_reference_back_); + } + } + dynamic_stencil_reference_front_update_needed_ = false; + dynamic_stencil_reference_back_update_needed_ = false; + } + + // TODO(Triang3l): VK_EXT_extended_dynamic_state and + // VK_EXT_extended_dynamic_state2. } void VulkanCommandProcessor::UpdateSystemConstantValues( @@ -2201,14 +2415,14 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Bind the new descriptor sets. uint32_t descriptor_sets_needed = (uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref == + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref() == descriptor_set_layout_empty_) { descriptor_sets_needed &= ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); } - if (current_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref == + if (current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref() == descriptor_set_layout_empty_) { descriptor_sets_needed &= ~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); @@ -2226,7 +2440,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // geometry shaders. deferred_command_buffer_.CmdVkBindDescriptorSets( VK_PIPELINE_BIND_POINT_GRAPHICS, - current_graphics_pipeline_layout_->pipeline_layout, + current_guest_graphics_pipeline_layout_->GetPipelineLayout(), descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index, current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr); if (descriptor_set_mask_tzcnt >= 32) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index bc43d9ad6..a01f14feb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -90,6 +90,17 @@ class VulkanCommandProcessor : public CommandProcessor { const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( uint32_t texture_count_pixel, uint32_t texture_count_vertex); + // Binds a graphics pipeline for host-specific purposes, invalidating the + // affected state. keep_dynamic_* must be false (to invalidate the dynamic + // state after binding the pipeline with the same state being static, or if + // the caller changes the dynamic state bypassing the VulkanCommandProcessor) + // unless the caller has these state variables as dynamic and uses the + // tracking in VulkanCommandProcessor to modify them. + void BindExternalGraphicsPipeline(VkPipeline pipeline, + bool keep_dynamic_depth_bias = false, + bool keep_dynamic_blend_constants = false, + bool keep_dynamic_stencil_mask_ref = false); + protected: bool SetupContext() override; void ShutdownContext() override; @@ -146,12 +157,29 @@ class VulkanCommandProcessor : public CommandProcessor { class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider { public: + PipelineLayout( + VkPipelineLayout pipeline_layout, + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref) + : pipeline_layout_(pipeline_layout), + descriptor_set_layout_textures_vertex_ref_( + descriptor_set_layout_textures_vertex_ref), + descriptor_set_layout_textures_pixel_ref_( + descriptor_set_layout_textures_pixel_ref) {} VkPipelineLayout GetPipelineLayout() const override { - return pipeline_layout; + return pipeline_layout_; } - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref; - VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { + return descriptor_set_layout_textures_vertex_ref_; + } + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { + return descriptor_set_layout_textures_pixel_ref_; + } + + private: + VkPipelineLayout pipeline_layout_; + VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; }; // BeginSubmission and EndSubmission may be called at any time. If there's an @@ -179,7 +207,8 @@ class VulkanCommandProcessor : public CommandProcessor { VkShaderStageFlags GetGuestVertexShaderStageFlags() const; - void UpdateFixedFunctionState(const draw_util::ViewportInfo& viewport_info); + void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, + bool primitive_polygonal); void UpdateSystemConstantValues(xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info); bool UpdateBindings(const VulkanShader* vertex_shader, @@ -285,22 +314,52 @@ class VulkanCommandProcessor : public CommandProcessor { swap_framebuffers_; std::deque> swap_framebuffers_outdated_; - // The current fixed-function drawing state. - VkViewport ff_viewport_; - VkRect2D ff_scissor_; - bool ff_viewport_update_needed_; - bool ff_scissor_update_needed_; + // The current dynamic state of the graphics pipeline bind point. Note that + // binding any pipeline to the bind point with static state (even if it's + // unused, like depth bias being disabled, but the values themselves still not + // declared as dynamic in the pipeline) invalidates such dynamic state. + VkViewport dynamic_viewport_; + VkRect2D dynamic_scissor_; + float dynamic_depth_bias_constant_factor_; + float dynamic_depth_bias_slope_factor_; + float dynamic_blend_constants_[4]; + // The stencil values are pre-initialized (to D3D11_DEFAULT_STENCIL_*, and the + // initial values for front and back are the same for portability subset + // safety) because they're updated conditionally to avoid changing the back + // face values when stencil is disabled and the primitive type is changed + // between polygonal and non-polygonal. + uint32_t dynamic_stencil_compare_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_compare_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_front_ = UINT8_MAX; + uint32_t dynamic_stencil_write_mask_back_ = UINT8_MAX; + uint32_t dynamic_stencil_reference_front_ = 0; + uint32_t dynamic_stencil_reference_back_ = 0; + bool dynamic_viewport_update_needed_; + bool dynamic_scissor_update_needed_; + bool dynamic_depth_bias_update_needed_; + bool dynamic_blend_constants_update_needed_; + bool dynamic_stencil_compare_mask_front_update_needed_; + bool dynamic_stencil_compare_mask_back_update_needed_; + bool dynamic_stencil_write_mask_front_update_needed_; + bool dynamic_stencil_write_mask_back_update_needed_; + bool dynamic_stencil_reference_front_update_needed_; + bool dynamic_stencil_reference_back_update_needed_; // Cache render pass currently started in the command buffer with the // framebuffer. VkRenderPass current_render_pass_; VkFramebuffer current_framebuffer_; - // Cache graphics pipeline currently bound to the command buffer. - VkPipeline current_graphics_pipeline_; + // Currently bound graphics pipeline, either from the pipeline cache (with + // potentially deferred creation - current_external_graphics_pipeline_ is + // VK_NULL_HANDLE in this case) or a non-Xenos one + // (current_guest_graphics_pipeline_ is VK_NULL_HANDLE in this case). + // TODO(Triang3l): Change to a deferred compilation handle. + VkPipeline current_guest_graphics_pipeline_; + VkPipeline current_external_graphics_pipeline_; - // Pipeline layout of the current graphics pipeline. - const PipelineLayout* current_graphics_pipeline_layout_; + // Pipeline layout of the current guest graphics pipeline. + const PipelineLayout* current_guest_graphics_pipeline_layout_; VkDescriptorSet current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetCount]; // Whether descriptor sets in current_graphics_descriptor_sets_ point to diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 33d94ad4f..2e3c32d8c 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,6 +10,7 @@ #include "xenia/gpu/vulkan/vulkan_pipeline_cache.h" #include +#include #include #include @@ -45,11 +46,6 @@ bool VulkanPipelineCache::Initialize() { const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanProvider(); - device_pipeline_features_.features = 0; - // TODO(Triang3l): Support the portability subset. - device_pipeline_features_.point_polygons = 1; - device_pipeline_features_.triangle_fans = 1; - shader_translator_ = std::make_unique( SpirvShaderTranslator::Features(provider)); @@ -119,21 +115,52 @@ VulkanPipelineCache::GetCurrentVertexShaderModification( SpirvShaderTranslator::Modification VulkanPipelineCache::GetCurrentPixelShaderModification( - const Shader& shader) const { + const Shader& shader, uint32_t normalized_color_mask) const { assert_true(shader.type() == xenos::ShaderType::kPixel); assert_true(shader.is_ucode_analyzed()); const auto& regs = register_file_; + auto sq_program_cntl = regs.Get(); - return SpirvShaderTranslator::Modification( + SpirvShaderTranslator::Modification modification( shader_translator_->GetDefaultPixelShaderModification( shader.GetDynamicAddressableRegisterCount( sq_program_cntl.ps_num_reg))); + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + if (!device_features.independentBlend) { + // Since without independent blending, the write mask is common for all + // attachments, but the render pass may still include the attachments from + // previous draws (to prevent excessive render pass changes potentially + // doing stores and loads), disable writing to render targets with a + // completely empty write mask by removing the output from the shader. + // Only explicitly excluding render targets that the shader actually writes + // to, for better pipeline storage compatibility between devices with and + // without independent blending (so in the usual situation - the shader + // doesn't write to any render targets disabled via the color mask - no + // explicit disabling of shader outputs will be needed, and the disabled + // output mask will be 0). + uint32_t color_targets_remaining = shader.writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(uint32_t(1) << color_target_index); + if (!(normalized_color_mask & + (uint32_t(0b1111) << (4 * color_target_index)))) { + modification.pixel.color_outputs_disabled |= uint32_t(1) + << color_target_index; + } + } + } + + return modification; } bool VulkanPipelineCache::ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out) { @@ -174,9 +201,9 @@ bool VulkanPipelineCache::ConfigurePipeline( } PipelineDescription description; - if (!GetCurrentStateDescription(vertex_shader, pixel_shader, - primitive_processing_result, render_pass_key, - description)) { + if (!GetCurrentStateDescription( + vertex_shader, pixel_shader, primitive_processing_result, + normalized_color_mask, render_pass_key, description)) { return false; } if (last_pipeline_ && last_pipeline_->first == description) { @@ -231,14 +258,92 @@ bool VulkanPipelineCache::TranslateAnalyzedShader( return translation.GetOrCreateShaderModule() != VK_NULL_HANDLE; } +void VulkanPipelineCache::WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const { + if (write_mask) { + assert_zero(write_mask & ~uint32_t(0b1111)); + // 32 because of 0x1F mask, for safety (all unknown to zero). + static const PipelineBlendFactor kBlendFactorMap[32] = { + /* 0 */ PipelineBlendFactor::kZero, + /* 1 */ PipelineBlendFactor::kOne, + /* 2 */ PipelineBlendFactor::kZero, // ? + /* 3 */ PipelineBlendFactor::kZero, // ? + /* 4 */ PipelineBlendFactor::kSrcColor, + /* 5 */ PipelineBlendFactor::kOneMinusSrcColor, + /* 6 */ PipelineBlendFactor::kSrcAlpha, + /* 7 */ PipelineBlendFactor::kOneMinusSrcAlpha, + /* 8 */ PipelineBlendFactor::kDstColor, + /* 9 */ PipelineBlendFactor::kOneMinusDstColor, + /* 10 */ PipelineBlendFactor::kDstAlpha, + /* 11 */ PipelineBlendFactor::kOneMinusDstAlpha, + /* 12 */ PipelineBlendFactor::kConstantColor, + /* 13 */ PipelineBlendFactor::kOneMinusConstantColor, + /* 14 */ PipelineBlendFactor::kConstantAlpha, + /* 15 */ PipelineBlendFactor::kOneMinusConstantAlpha, + /* 16 */ PipelineBlendFactor::kSrcAlphaSaturate, + }; + render_target_out.src_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_srcblend)]; + render_target_out.dst_color_blend_factor = + kBlendFactorMap[uint32_t(blend_control.color_destblend)]; + render_target_out.color_blend_op = blend_control.color_comb_fcn; + render_target_out.src_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_srcblend)]; + render_target_out.dst_alpha_blend_factor = + kBlendFactorMap[uint32_t(blend_control.alpha_destblend)]; + render_target_out.alpha_blend_op = blend_control.alpha_comb_fcn; + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features && + !device_portability_subset_features->constantAlphaColorBlendFactors) { + if (blend_control.color_srcblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_srcblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.src_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } + if (blend_control.color_destblend == xenos::BlendFactor::kConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kConstantColor; + } else if (blend_control.color_destblend == + xenos::BlendFactor::kOneMinusConstantAlpha) { + render_target_out.dst_color_blend_factor = + PipelineBlendFactor::kOneMinusConstantColor; + } + } + } else { + render_target_out.src_color_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_color_blend_factor = PipelineBlendFactor::kZero; + render_target_out.color_blend_op = xenos::BlendOp::kAdd; + render_target_out.src_alpha_blend_factor = PipelineBlendFactor::kOne; + render_target_out.dst_alpha_blend_factor = PipelineBlendFactor::kZero; + render_target_out.alpha_blend_op = xenos::BlendOp::kAdd; + } + render_target_out.color_write_mask = write_mask; +} + bool VulkanPipelineCache::GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const { description_out.Reset(); + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + const RegisterFile& regs = register_file_; auto pa_su_sc_mode_cntl = regs.Get(); @@ -268,6 +373,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( primitive_topology = PipelinePrimitiveTopology::kTriangleList; break; case xenos::PrimitiveType::kTriangleFan: + // The check should be performed at primitive processing time. + assert_true(!device_portability_subset_features || + device_portability_subset_features->triangleFans); primitive_topology = PipelinePrimitiveTopology::kTriangleFan; break; case xenos::PrimitiveType::kTriangleStrip: @@ -284,6 +392,9 @@ bool VulkanPipelineCache::GetCurrentStateDescription( description_out.primitive_restart = primitive_processing_result.host_primitive_reset_enabled; + description_out.depth_clamp_enable = + regs.Get().clip_disable; + // TODO(Triang3l): Tessellation. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); if (primitive_polygonal) { @@ -313,9 +424,11 @@ bool VulkanPipelineCache::GetCurrentStateDescription( case xenos::PolygonType::kPoints: // When points are not supported, use lines instead, preserving // debug-like purpose. - description_out.polygon_mode = device_pipeline_features_.point_polygons - ? PipelinePolygonMode::kPoint - : PipelinePolygonMode::kLine; + description_out.polygon_mode = + (!device_portability_subset_features || + device_portability_subset_features->pointPolygons) + ? PipelinePolygonMode::kPoint + : PipelinePolygonMode::kLine; break; case xenos::PolygonType::kLines: description_out.polygon_mode = PipelinePolygonMode::kLine; @@ -332,6 +445,196 @@ bool VulkanPipelineCache::GetCurrentStateDescription( description_out.polygon_mode = PipelinePolygonMode::kFill; } + // TODO(Triang3l): Skip depth / stencil and color state for the fragment + // shader interlock RB implementation. + + if (render_pass_key.depth_and_color_used & 1) { + auto rb_depthcontrol = draw_util::GetDepthControlForCurrentEdramMode(regs); + if (rb_depthcontrol.z_enable) { + description_out.depth_write_enable = rb_depthcontrol.z_write_enable; + description_out.depth_compare_op = rb_depthcontrol.zfunc; + } else { + description_out.depth_compare_op = xenos::CompareFunction::kAlways; + } + if (rb_depthcontrol.stencil_enable) { + description_out.stencil_test_enable = 1; + description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail; + description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass; + description_out.stencil_front_depth_fail_op = + rb_depthcontrol.stencilzfail; + description_out.stencil_front_compare_op = rb_depthcontrol.stencilfunc; + if (primitive_polygonal && rb_depthcontrol.backface_enable) { + description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf; + description_out.stencil_back_pass_op = rb_depthcontrol.stencilzpass_bf; + description_out.stencil_back_depth_fail_op = + rb_depthcontrol.stencilzfail_bf; + description_out.stencil_back_compare_op = + rb_depthcontrol.stencilfunc_bf; + } else { + description_out.stencil_back_fail_op = + description_out.stencil_front_fail_op; + description_out.stencil_back_pass_op = + description_out.stencil_front_pass_op; + description_out.stencil_back_depth_fail_op = + description_out.stencil_front_depth_fail_op; + description_out.stencil_back_compare_op = + description_out.stencil_front_compare_op; + } + } + } + + // Color blending and write masks (filled only for the attachments present in + // the render pass object). + uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1; + if (device_features.independentBlend) { + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices[color_rt_index]), + (normalized_color_mask >> (color_rt_index * 4)) & 0b1111, + description_out.render_targets[color_rt_index]); + } + } else { + // Take the blend control for the first render target that the guest wants + // to write to (consider it the most important) and use it for all render + // targets, if any. + // TODO(Triang3l): Implement an option for independent blending via multiple + // draw calls with different pipelines maybe? Though independent blending + // support is pretty wide, with a quite prominent exception of Adreno 4xx + // apparently. + uint32_t render_pass_color_rts_remaining = render_pass_color_rts; + uint32_t render_pass_first_color_rt_index; + if (xe::bit_scan_forward(render_pass_color_rts_remaining, + &render_pass_first_color_rt_index)) { + render_pass_color_rts_remaining &= + ~(uint32_t(1) << render_pass_first_color_rt_index); + PipelineRenderTarget& render_pass_first_color_rt = + description_out.render_targets[render_pass_first_color_rt_index]; + uint32_t common_blend_rt_index; + if (xe::bit_scan_forward(normalized_color_mask, &common_blend_rt_index)) { + common_blend_rt_index >>= 2; + // If a common write mask will be used for multiple render targets, use + // the original RB_COLOR_MASK instead of the normalized color mask as + // the normalized color mask has non-existent components forced to + // written (don't need reading to be preserved), while the number of + // components may vary between render targets. The attachments in the + // pass that must not be written to at all will be excluded via a shader + // modification. + WritePipelineRenderTargetDescription( + regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices + [common_blend_rt_index]), + (((normalized_color_mask & + ~(uint32_t(0b1111) << (4 * common_blend_rt_index))) + ? regs[XE_GPU_REG_RB_COLOR_MASK].u32 + : normalized_color_mask) >> + (4 * common_blend_rt_index)) & + 0b1111, + render_pass_first_color_rt); + } else { + // No render targets are written to, though the render pass still may + // contain color attachments - set them to not written and not blending. + render_pass_first_color_rt.src_color_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_color_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.color_blend_op = xenos::BlendOp::kAdd; + render_pass_first_color_rt.src_alpha_blend_factor = + PipelineBlendFactor::kOne; + render_pass_first_color_rt.dst_alpha_blend_factor = + PipelineBlendFactor::kZero; + render_pass_first_color_rt.alpha_blend_op = xenos::BlendOp::kAdd; + } + // Reuse the same blending settings for all render targets in the pass, + // for description consistency. + uint32_t color_rt_index; + while (xe::bit_scan_forward(render_pass_color_rts_remaining, + &color_rt_index)) { + render_pass_color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + description_out.render_targets[color_rt_index] = + render_pass_first_color_rt; + } + } + } + + return true; +} + +bool VulkanPipelineCache::ArePipelineRequirementsMet( + const PipelineDescription& description) const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); + + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (device_portability_subset_features) { + if (description.primitive_topology == + PipelinePrimitiveTopology::kTriangleFan && + device_portability_subset_features->triangleFans) { + return false; + } + if (description.polygon_mode == PipelinePolygonMode::kPoint && + device_portability_subset_features->pointPolygons) { + return false; + } + if (!device_portability_subset_features->constantAlphaColorBlendFactors) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.src_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kConstantAlpha || + color_rt.dst_color_blend_factor == + PipelineBlendFactor::kOneMinusConstantAlpha) { + return false; + } + } + } + } + + if (!device_features.independentBlend) { + uint32_t color_rts_remaining = + description.render_pass_key.depth_and_color_used >> 1; + uint32_t first_color_rt_index; + if (xe::bit_scan_forward(color_rts_remaining, &first_color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << first_color_rt_index); + const PipelineRenderTarget& first_color_rt = + description.render_targets[first_color_rt_index]; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != + first_color_rt.src_color_blend_factor || + color_rt.dst_color_blend_factor != + first_color_rt.dst_color_blend_factor || + color_rt.color_blend_op != first_color_rt.color_blend_op || + color_rt.src_alpha_blend_factor != + first_color_rt.src_alpha_blend_factor || + color_rt.dst_alpha_blend_factor != + first_color_rt.dst_alpha_blend_factor || + color_rt.alpha_blend_op != first_color_rt.alpha_blend_op || + color_rt.color_write_mask != first_color_rt.color_write_mask) { + return false; + } + } + } + } + return true; } @@ -355,6 +658,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( } const PipelineDescription& description = creation_arguments.pipeline->first; + if (!ArePipelineRequirementsMet(description)) { + assert_always( + "When creating a new pipeline, the description must not require " + "unsupported features, and when loading the pipeline storage, " + "unsupported supported must be filtered out"); + return false; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDeviceFeatures& device_features = provider.device_features(); VkPipelineShaderStageCreateInfo shader_stages[2]; uint32_t shader_stage_count = 0; @@ -434,10 +748,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; break; case PipelinePrimitiveTopology::kTriangleFan: - assert_true(device_pipeline_features_.triangle_fans); - if (!device_pipeline_features_.triangle_fans) { - return false; - } input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; break; case PipelinePrimitiveTopology::kLineListWithAdjacency: @@ -474,6 +784,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated( VkPipelineRasterizationStateCreateInfo rasterization_state = {}; rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterization_state.depthClampEnable = + description.depth_clamp_enable ? VK_TRUE : VK_FALSE; switch (description.polygon_mode) { case PipelinePolygonMode::kFill: rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; @@ -482,10 +794,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( rasterization_state.polygonMode = VK_POLYGON_MODE_LINE; break; case PipelinePolygonMode::kPoint: - assert_true(device_pipeline_features_.point_polygons); - if (!device_pipeline_features_.point_polygons) { - return false; - } rasterization_state.polygonMode = VK_POLYGON_MODE_POINT; break; default: @@ -502,6 +810,17 @@ bool VulkanPipelineCache::EnsurePipelineCreated( rasterization_state.frontFace = description.front_face_clockwise ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE; + // Depth bias is dynamic (even toggling - pipeline creation is expensive). + // "If no depth attachment is present, r is undefined" in the depth bias + // formula, though Z has no effect on anything if a depth attachment is not + // used (the guest shader can't access Z), enabling only when there's a + // depth / stencil attachment for correctness. + // TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB + // implementation. + rasterization_state.depthBiasEnable = + (description.render_pass_key.depth_and_color_used & 0b1) ? VK_TRUE + : VK_FALSE; + // TODO(Triang3l): Wide lines. rasterization_state.lineWidth = 1.0f; VkPipelineMultisampleStateCreateInfo multisample_state = {}; @@ -510,42 +829,156 @@ bool VulkanPipelineCache::EnsurePipelineCreated( multisample_state.rasterizationSamples = VkSampleCountFlagBits( uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); - // TODO(Triang3l): Depth / stencil state. VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; depth_stencil_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depth_stencil_state.pNext = nullptr; + if (description.depth_write_enable || + description.depth_compare_op != xenos::CompareFunction::kAlways) { + depth_stencil_state.depthTestEnable = VK_TRUE; + depth_stencil_state.depthWriteEnable = + description.depth_write_enable ? VK_TRUE : VK_FALSE; + depth_stencil_state.depthCompareOp = VkCompareOp( + uint32_t(VK_COMPARE_OP_NEVER) + uint32_t(description.depth_compare_op)); + } + if (description.stencil_test_enable) { + depth_stencil_state.stencilTestEnable = VK_TRUE; + depth_stencil_state.front.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_fail_op)); + depth_stencil_state.front.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_pass_op)); + depth_stencil_state.front.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_depth_fail_op)); + depth_stencil_state.front.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_front_compare_op)); + depth_stencil_state.back.failOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_fail_op)); + depth_stencil_state.back.passOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_pass_op)); + depth_stencil_state.back.depthFailOp = + VkStencilOp(uint32_t(VK_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_depth_fail_op)); + depth_stencil_state.back.compareOp = + VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) + + uint32_t(description.stencil_back_compare_op)); + } - // TODO(Triang3l): Color blend state. - // TODO(Triang3l): Handle disabled separate blending. VkPipelineColorBlendAttachmentState color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; - for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { - if (!(description.render_pass_key.depth_and_color_used & (1 << (1 + i)))) { - continue; + uint32_t color_rts_used = + description.render_pass_key.depth_and_color_used >> 1; + { + static const VkBlendFactor kBlendFactorMap[] = { + VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_ONE, + VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + VK_BLEND_FACTOR_CONSTANT_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + VK_BLEND_FACTOR_CONSTANT_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + }; + // 8 entries for safety since 3 bits from the guest are passed directly. + static const VkBlendOp kBlendOpMap[] = {VK_BLEND_OP_ADD, + VK_BLEND_OP_SUBTRACT, + VK_BLEND_OP_MIN, + VK_BLEND_OP_MAX, + VK_BLEND_OP_REVERSE_SUBTRACT, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD, + VK_BLEND_OP_ADD}; + uint32_t color_rts_remaining = color_rts_used; + uint32_t color_rt_index; + while (xe::bit_scan_forward(color_rts_remaining, &color_rt_index)) { + color_rts_remaining &= ~(uint32_t(1) << color_rt_index); + VkPipelineColorBlendAttachmentState& color_blend_attachment = + color_blend_attachments[color_rt_index]; + const PipelineRenderTarget& color_rt = + description.render_targets[color_rt_index]; + if (color_rt.src_color_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_color_blend_factor != PipelineBlendFactor::kZero || + color_rt.color_blend_op != xenos::BlendOp::kAdd || + color_rt.src_alpha_blend_factor != PipelineBlendFactor::kOne || + color_rt.dst_alpha_blend_factor != PipelineBlendFactor::kZero || + color_rt.alpha_blend_op != xenos::BlendOp::kAdd) { + color_blend_attachment.blendEnable = VK_TRUE; + color_blend_attachment.srcColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_color_blend_factor)]; + color_blend_attachment.dstColorBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_color_blend_factor)]; + color_blend_attachment.colorBlendOp = + kBlendOpMap[uint32_t(color_rt.color_blend_op)]; + color_blend_attachment.srcAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.src_alpha_blend_factor)]; + color_blend_attachment.dstAlphaBlendFactor = + kBlendFactorMap[uint32_t(color_rt.dst_alpha_blend_factor)]; + color_blend_attachment.alphaBlendOp = + kBlendOpMap[uint32_t(color_rt.alpha_blend_op)]; + } + color_blend_attachment.colorWriteMask = + VkColorComponentFlags(color_rt.color_write_mask); + if (!device_features.independentBlend) { + // For non-independent blend, the pAttachments element for the first + // actually used color will be replicated into all. + break; + } } - color_blend_attachments[i].colorWriteMask = - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; } VkPipelineColorBlendStateCreateInfo color_blend_state = {}; color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - color_blend_state.attachmentCount = - 32 - xe::lzcnt( - uint32_t(description.render_pass_key.depth_and_color_used >> 1)); + color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used); color_blend_state.pAttachments = color_blend_attachments; + if (color_rts_used && !device_features.independentBlend) { + // "If the independent blending feature is not enabled, all elements of + // pAttachments must be identical." + uint32_t first_color_rt_index; + xe::bit_scan_forward(color_rts_used, &first_color_rt_index); + for (uint32_t i = 0; i < color_blend_state.attachmentCount; ++i) { + if (i == first_color_rt_index) { + continue; + } + color_blend_attachments[i] = + color_blend_attachments[first_color_rt_index]; + } + } - static const VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; + std::array dynamic_states; VkPipelineDynamicStateCreateInfo dynamic_state; dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; dynamic_state.pNext = nullptr; dynamic_state.flags = 0; - dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states)); - dynamic_state.pDynamicStates = dynamic_states; + dynamic_state.dynamicStateCount = 0; + dynamic_state.pDynamicStates = dynamic_states.data(); + // Regardless of whether some of this state actually has any effect on the + // pipeline, marking all as dynamic because otherwise, binding any pipeline + // with such state not marked as dynamic will cause the dynamic state to be + // invalidated (again, even if it has no effect). + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT; + dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_DEPTH_BIAS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_BLEND_CONSTANTS; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK; + dynamic_states[dynamic_state.dynamicStateCount++] = + VK_DYNAMIC_STATE_STENCIL_REFERENCE; VkGraphicsPipelineCreateInfo pipeline_create_info; pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -569,8 +1002,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE; pipeline_create_info.basePipelineIndex = UINT32_MAX; - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); VkPipeline pipeline; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index 7ca83a751..c753e4dfa 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -21,6 +21,7 @@ #include "xenia/base/xxhash.h" #include "xenia/gpu/primitive_processor.h" #include "xenia/gpu/register_file.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/spirv_shader_translator.h" #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" @@ -41,6 +42,9 @@ class VulkanPipelineCache { public: virtual ~PipelineLayoutProvider() {} virtual VkPipelineLayout GetPipelineLayout() const = 0; + + protected: + PipelineLayoutProvider() = default; }; VulkanPipelineCache(VulkanCommandProcessor& command_processor, @@ -65,37 +69,25 @@ class VulkanPipelineCache { const Shader& shader, Shader::HostVertexShaderType host_vertex_shader_type) const; SpirvShaderTranslator::Modification GetCurrentPixelShaderModification( - const Shader& shader) const; + const Shader& shader, uint32_t normalized_color_mask) const; // TODO(Triang3l): Return a deferred creation handle. bool ConfigurePipeline( VulkanShader::VulkanTranslation* vertex_shader, VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, VkPipeline& pipeline_out, const PipelineLayoutProvider*& pipeline_layout_out); private: - // Can only load pipeline storage if features of the device it was created on - // and the current device match because descriptions may requires features not - // supported on the device. Very radical differences (such as RB emulation - // method) should result in a different storage file being used. - union DevicePipelineFeatures { - struct { - uint32_t point_polygons : 1; - uint32_t triangle_fans : 1; - }; - uint32_t features = 0; - }; - enum class PipelinePrimitiveTopology : uint32_t { kPointList, kLineList, kLineStrip, kTriangleList, kTriangleStrip, - // Requires DevicePipelineFeatures::triangle_fans. kTriangleFan, kLineListWithAdjacency, kPatchList, @@ -107,6 +99,35 @@ class VulkanPipelineCache { kPoint, }; + enum class PipelineBlendFactor : uint32_t { + kZero, + kOne, + kSrcColor, + kOneMinusSrcColor, + kDstColor, + kOneMinusDstColor, + kSrcAlpha, + kOneMinusSrcAlpha, + kDstAlpha, + kOneMinusDstAlpha, + kConstantColor, + kOneMinusConstantColor, + kConstantAlpha, + kOneMinusConstantAlpha, + kSrcAlphaSaturate, + }; + + // Update PipelineDescription::kVersion if anything is changed! + XEPACKEDSTRUCT(PipelineRenderTarget, { + PipelineBlendFactor src_color_blend_factor : 4; // 4 + PipelineBlendFactor dst_color_blend_factor : 4; // 8 + xenos::BlendOp color_blend_op : 3; // 11 + PipelineBlendFactor src_alpha_blend_factor : 4; // 15 + PipelineBlendFactor dst_alpha_blend_factor : 4; // 19 + xenos::BlendOp alpha_blend_op : 3; // 22 + uint32_t color_write_mask : 4; // 26 + }); + XEPACKEDSTRUCT(PipelineDescription, { uint64_t vertex_shader_hash; uint64_t vertex_shader_modification; @@ -119,10 +140,27 @@ class VulkanPipelineCache { PipelinePrimitiveTopology primitive_topology : 3; // 3 uint32_t primitive_restart : 1; // 4 // Rasterization. - PipelinePolygonMode polygon_mode : 2; // 6 - uint32_t cull_front : 1; // 7 - uint32_t cull_back : 1; // 8 - uint32_t front_face_clockwise : 1; // 9 + uint32_t depth_clamp_enable : 1; // 5 + PipelinePolygonMode polygon_mode : 2; // 7 + uint32_t cull_front : 1; // 8 + uint32_t cull_back : 1; // 9 + uint32_t front_face_clockwise : 1; // 10 + // Depth / stencil. + uint32_t depth_write_enable : 1; // 11 + xenos::CompareFunction depth_compare_op : 3; // 14 + uint32_t stencil_test_enable : 1; // 15 + xenos::StencilOp stencil_front_fail_op : 3; // 18 + xenos::StencilOp stencil_front_pass_op : 3; // 21 + xenos::StencilOp stencil_front_depth_fail_op : 3; // 24 + xenos::CompareFunction stencil_front_compare_op : 3; // 27 + xenos::StencilOp stencil_back_fail_op : 3; // 30 + + xenos::StencilOp stencil_back_pass_op : 3; // 3 + xenos::StencilOp stencil_back_depth_fail_op : 3; // 6 + xenos::CompareFunction stencil_back_compare_op : 3; // 9 + + // Filled only for the attachments present in the render pass object. + PipelineRenderTarget render_targets[xenos::kMaxColorRenderTargets]; // Including all the padding, for a stable hash. PipelineDescription() { Reset(); } @@ -166,13 +204,20 @@ class VulkanPipelineCache { bool TranslateAnalyzedShader(SpirvShaderTranslator& translator, VulkanShader::VulkanTranslation& translation); + void WritePipelineRenderTargetDescription( + reg::RB_BLENDCONTROL blend_control, uint32_t write_mask, + PipelineRenderTarget& render_target_out) const; bool GetCurrentStateDescription( const VulkanShader::VulkanTranslation* vertex_shader, const VulkanShader::VulkanTranslation* pixel_shader, const PrimitiveProcessor::ProcessingResult& primitive_processing_result, + uint32_t normalized_color_mask, VulkanRenderTargetCache::RenderPassKey render_pass_key, PipelineDescription& description_out) const; + // Whether the pipeline for the given description is supported by the device. + bool ArePipelineRequirementsMet(const PipelineDescription& description) const; + // Can be called from creation threads - all needed data must be fully set up // at the point of the call: shaders must be translated, pipeline layout and // render pass objects must be available. @@ -183,8 +228,6 @@ class VulkanPipelineCache { const RegisterFile& register_file_; VulkanRenderTargetCache& render_target_cache_; - DevicePipelineFeatures device_pipeline_features_; - // Temporary storage for AnalyzeUcode calls on the processor thread. StringBuffer ucode_disasm_buffer_; // Reusable shader translator on the command processor thread. diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index 5c1dd4168..7b628037b 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -28,11 +28,17 @@ VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); } bool VulkanPrimitiveProcessor::Initialize() { // TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch. - // TODO(Triang3l): Portability subset triangleFans check when portability - // subset support is added. // TODO(Triang3l): geometryShader check for quads when geometry shaders are // added. - if (!InitializeCommon(true, true, false, false)) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanProvider(); + const VkPhysicalDevicePortabilitySubsetFeaturesKHR* + device_portability_subset_features = + provider.device_portability_subset_features(); + if (!InitializeCommon(true, + !device_portability_subset_features || + device_portability_subset_features->triangleFans, + false, false)) { Shutdown(); return false; } diff --git a/src/xenia/ui/vulkan/functions/device_1_0.inc b/src/xenia/ui/vulkan/functions/device_1_0.inc index f0811fcea..2a979f55f 100644 --- a/src/xenia/ui/vulkan/functions/device_1_0.inc +++ b/src/xenia/ui/vulkan/functions/device_1_0.inc @@ -20,7 +20,12 @@ XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed) XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass) XE_UI_VULKAN_FUNCTION(vkCmdPipelineBarrier) XE_UI_VULKAN_FUNCTION(vkCmdPushConstants) +XE_UI_VULKAN_FUNCTION(vkCmdSetBlendConstants) +XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBias) XE_UI_VULKAN_FUNCTION(vkCmdSetScissor) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilCompareMask) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference) +XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask) XE_UI_VULKAN_FUNCTION(vkCmdSetViewport) XE_UI_VULKAN_FUNCTION(vkCreateBuffer) XE_UI_VULKAN_FUNCTION(vkCreateCommandPool)