diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 617cc76b4..4ee35cf11 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,9 @@ SpirvShaderTranslator::Features::Features( } } +const std::string SpirvShaderTranslator::kInterpolatorNamePrefix = + "xe_interpolator_"; + SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) : features_(features) {} @@ -363,6 +367,8 @@ void SpirvShaderTranslator::StartTranslation() { if (is_vertex_shader()) { StartVertexOrTessEvalShaderBeforeMain(); + } else if (is_pixel_shader()) { + StartFragmentShaderBeforeMain(); } // Begin the main function. @@ -394,8 +400,9 @@ void SpirvShaderTranslator::StartTranslation() { if (register_array_size) { id_vector_temp_.clear(); id_vector_temp_.reserve(register_array_size); - // TODO(Triang3l): In PS, only initialize starting from the interpolators, - // probably manually. But not very important. + // TODO(Triang3l): In PS, only need to initialize starting from the + // interpolators, probably manually. But likely not very important - the + // compiler in the driver will likely eliminate that write. for (uint32_t i = 0; i < register_array_size; ++i) { id_vector_temp_.push_back(const_float4_0_); } @@ -411,6 +418,8 @@ void SpirvShaderTranslator::StartTranslation() { // main function. if (is_vertex_shader()) { StartVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + StartFragmentShaderInMain(); } // Open the main loop. @@ -921,6 +930,16 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { main_interface_.push_back(input_vertex_index_); } + // Create the Xenia-specific outputs. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassOutput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + main_interface_.push_back(interpolator); + } + // Create the entire GLSL 4.50 gl_PerVertex output similar to what glslang // does. Members (like gl_PointSize) don't need to be used, and also // ClipDistance and CullDistance may exist even if the device doesn't support @@ -978,6 +997,11 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { spv::NoPrecision, spv::StorageClassFunction, type_float3_, "xe_var_point_size_edge_flag_kill_vertex"); + // Zero the interpolators. + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { + builder_->createStore(const_float4_0_, input_output_interpolators_[i]); + } + // Load the vertex index or the tessellation parameters. if (register_count()) { // TODO(Triang3l): Barycentric coordinates and patch index. @@ -1167,6 +1191,73 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { id_vector_temp_)); } +void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { + // Interpolator inputs. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + spv::Id interpolator = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, + (kInterpolatorNamePrefix + std::to_string(i)).c_str()); + input_output_interpolators_[i] = interpolator; + builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); + main_interface_.push_back(interpolator); + } + + // Framebuffer attachment outputs. + std::fill(output_fragment_data_.begin(), output_fragment_data_.end(), + spv::NoResult); + static const char* const kFragmentDataNames[] = { + "xe_out_fragment_data_0", + "xe_out_fragment_data_1", + "xe_out_fragment_data_2", + "xe_out_fragment_data_3", + }; + uint32_t shader_writes_color_targets = + current_shader().writes_color_targets(); + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (!(shader_writes_color_targets & (uint32_t(1) << i))) { + continue; + } + spv::Id output_fragment_data_rt = + builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float4_, kFragmentDataNames[i]); + output_fragment_data_[i] = output_fragment_data_rt; + builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation, + int(i)); + // Make invariant as pixel shaders may be used for various precise + // computations. + builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant); + main_interface_.push_back(output_fragment_data_rt); + } +} + +void SpirvShaderTranslator::StartFragmentShaderInMain() { + // Copy the interpolators to general-purpose registers. + // TODO(Triang3l): Centroid. + // TODO(Triang3l): ps_param_gen. + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); + for (uint32_t i = 0; i < interpolator_count; ++i) { + id_vector_temp_.clear(); + // Register array element. + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + builder_->createStore( + builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision), + builder_->createAccessChain(spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + + // Initialize the colors for safety. + uint32_t shader_writes_color_targets = + current_shader().writes_color_targets(); + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (shader_writes_color_targets & (uint32_t(1) << i)) { + builder_->createStore(const_float4_0_, output_fragment_data_[i]); + } + } +} + void SpirvShaderTranslator::UpdateExecConditionals( ParsedExecInstruction::Type type, uint32_t bool_constant_index, bool condition) { @@ -1507,6 +1598,10 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, target_pointer = builder_->createAccessChain( spv::StorageClassFunction, var_main_registers_, id_vector_temp_util_); } break; + case InstructionStorageTarget::kInterpolator: + assert_true(is_vertex_shader()); + target_pointer = input_output_interpolators_[result.storage_index]; + break; case InstructionStorageTarget::kPosition: assert_true(is_vertex_shader()); id_vector_temp_util_.clear(); @@ -1515,6 +1610,13 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, target_pointer = builder_->createAccessChain( spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); break; + case InstructionStorageTarget::kColor: + assert_true(is_pixel_shader()); + assert_not_zero(used_write_mask); + assert_true(current_shader().writes_color_target(result.storage_index)); + target_pointer = output_fragment_data_[result.storage_index]; + assert_true(target_pointer != spv::NoResult); + break; default: // TODO(Triang3l): All storage targets. break; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index ef350b85e..94c58a976 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,8 +10,10 @@ #ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ #define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_ +#include #include #include +#include #include #include @@ -225,6 +227,9 @@ class SpirvShaderTranslator : public ShaderTranslator { void StartVertexOrTessEvalShaderInMain(); void CompleteVertexOrTessEvalShaderInMain(); + void StartFragmentShaderBeforeMain(); + void StartFragmentShaderInMain(); + // Updates the current flow control condition (to be called in the beginning // of exec and in jumps), closing the previous conditionals if needed. // However, if the condition is not different, the instruction-level predicate @@ -405,6 +410,12 @@ class SpirvShaderTranslator : public ShaderTranslator { // VS as TES only - int. spv::Id input_primitive_id_; + // In vertex or tessellation evaluation shaders - outputs, always + // xenos::kMaxInterpolators. + // In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()). + spv::Id input_output_interpolators_[xenos::kMaxInterpolators]; + static const std::string kInterpolatorNamePrefix; + enum OutputPerVertexMember : unsigned int { kOutputPerVertexMemberPosition, kOutputPerVertexMemberPointSize, @@ -414,6 +425,8 @@ class SpirvShaderTranslator : public ShaderTranslator { }; spv::Id output_per_vertex_; + std::array output_fragment_data_; + std::vector main_interface_; spv::Function* function_main_; // bool. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 14edfe4ab..6313254d0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -645,6 +645,35 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); + bool memexport_used_vertex = vertex_shader->is_valid_memexport_used(); + + // Pixel shader analysis. + bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); + bool is_rasterization_done = + draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); + VulkanShader* pixel_shader = nullptr; + if (is_rasterization_done) { + // See xenos::ModeControl for explanation why the pixel shader is only used + // when it's kColorDepth here. + if (edram_mode == xenos::ModeControl::kColorDepth) { + pixel_shader = static_cast(active_pixel_shader()); + if (pixel_shader) { + pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); + if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader, + regs)) { + pixel_shader = nullptr; + } + } + } + } else { + // Disabling pixel shader for this case is also required by the pipeline + // cache. + if (!memexport_used_vertex) { + // This draw has no effect. + return true; + } + } + // TODO(Triang3l): Memory export. BeginSubmission(true); @@ -663,28 +692,20 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, return false; } - // TODO(Triang3l): Get a pixel shader. - VulkanShader* pixel_shader = nullptr; - // Shader modifications. SpirvShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( *vertex_shader, primitive_processing_result.host_vertex_shader_type); SpirvShaderTranslator::Modification pixel_shader_modification = - SpirvShaderTranslator::Modification(0); + pixel_shader + ? pipeline_cache_->GetCurrentPixelShaderModification(*pixel_shader) + : SpirvShaderTranslator::Modification(0); - VulkanRenderTargetCache::FramebufferKey framebuffer_key; - if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) { - return false; - } - VkFramebuffer framebuffer = - render_target_cache_->GetFramebuffer(framebuffer_key); - if (framebuffer == VK_NULL_HANDLE) { - return false; - } - VkRenderPass render_pass = - render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key); - if (render_pass == VK_NULL_HANDLE) { + // Set up the render targets - this may perform dispatches and draws. + uint32_t pixel_shader_writes_color_targets = + pixel_shader ? pixel_shader->writes_color_targets() : 0; + if (!render_target_cache_->Update(is_rasterization_done, + pixel_shader_writes_color_targets)) { return false; } @@ -693,7 +714,11 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, static_cast( vertex_shader->GetOrCreateTranslation( vertex_shader_modification.value)); - VulkanShader::VulkanTranslation* pixel_shader_translation = nullptr; + VulkanShader::VulkanTranslation* pixel_shader_translation = + pixel_shader ? static_cast( + pixel_shader->GetOrCreateTranslation( + pixel_shader_modification.value)) + : nullptr; // Update the graphics pipeline, and if the new graphics pipeline has a // different layout, invalidate incompatible descriptor sets before updating @@ -702,8 +727,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider; if (!pipeline_cache_->ConfigurePipeline( vertex_shader_translation, pixel_shader_translation, - primitive_processing_result, framebuffer_key.render_pass_key, - pipeline, pipeline_layout_provider)) { + primitive_processing_result, + render_target_cache_->last_update_render_pass_key(), pipeline, + pipeline_layout_provider)) { return false; } deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -829,24 +855,28 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // TODO(Triang3l): Memory export. shared_memory_->Use(VulkanSharedMemory::Usage::kRead); - // After all commands that may dispatch or copy, enter the render pass before - // drawing. + // After all commands that may dispatch, copy or insert barriers, enter the + // render pass before drawing. + VkRenderPass render_pass = render_target_cache_->last_update_render_pass(); + const VulkanRenderTargetCache::Framebuffer* framebuffer = + render_target_cache_->last_update_framebuffer(); if (current_render_pass_ != render_pass || - current_framebuffer_ != framebuffer) { + current_framebuffer_ != framebuffer->framebuffer) { if (current_render_pass_ != VK_NULL_HANDLE) { deferred_command_buffer_.CmdVkEndRenderPass(); } current_render_pass_ = render_pass; - current_framebuffer_ = framebuffer; + current_framebuffer_ = framebuffer->framebuffer; VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; render_pass_begin_info.renderPass = render_pass; - render_pass_begin_info.framebuffer = framebuffer; + render_pass_begin_info.framebuffer = framebuffer->framebuffer; render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; - render_pass_begin_info.renderArea.extent.width = 1280; - render_pass_begin_info.renderArea.extent.height = 720; + // TODO(Triang3l): Actual dirty width / height in the deferred command + // buffer. + render_pass_begin_info.renderArea.extent = framebuffer->host_extent; render_pass_begin_info.clearValueCount = 0; render_pass_begin_info.pClearValues = nullptr; deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 5ce43edc7..30892d079 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -513,7 +513,34 @@ bool VulkanPipelineCache::EnsurePipelineCreated( VkPipelineMultisampleStateCreateInfo multisample_state = {}; multisample_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + multisample_state.rasterizationSamples = VkSampleCountFlagBits( + uint32_t(1) << uint32_t(description.render_pass_key.msaa_samples)); + + // TODO(Triang3l): Depth / stencil state. + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {}; + depth_stencil_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depth_stencil_state.pNext = nullptr; + + // TODO(Triang3l): Color blend state. + // TODO(Triang3l): Handle disabled separate blending. + VkPipelineColorBlendAttachmentState + color_blend_attachments[xenos::kMaxColorRenderTargets] = {}; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (!(description.render_pass_key.depth_and_color_used & (1 << (1 + i)))) { + continue; + } + color_blend_attachments[i].colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } + VkPipelineColorBlendStateCreateInfo color_blend_state = {}; + color_blend_state.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blend_state.attachmentCount = + 32 - xe::lzcnt( + uint32_t(description.render_pass_key.depth_and_color_used >> 1)); + color_blend_state.pAttachments = color_blend_attachments; static const VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, @@ -538,8 +565,8 @@ bool VulkanPipelineCache::EnsurePipelineCreated( pipeline_create_info.pViewportState = &viewport_state; pipeline_create_info.pRasterizationState = &rasterization_state; pipeline_create_info.pMultisampleState = &multisample_state; - pipeline_create_info.pDepthStencilState = nullptr; - pipeline_create_info.pColorBlendState = nullptr; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pColorBlendState = &color_blend_state; pipeline_create_info.pDynamicState = &dynamic_state; pipeline_create_info.layout = creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout(); diff --git a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc index deeef270f..f78a65d7c 100644 --- a/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_primitive_processor.cc @@ -113,6 +113,7 @@ void VulkanPrimitiveProcessor::BeginSubmission() { builtin_index_buffer_memory_barrier.buffer = builtin_index_buffer_; builtin_index_buffer_memory_barrier.offset = 0; builtin_index_buffer_memory_barrier.size = VK_WHOLE_SIZE; + command_processor_.EndRenderPass(); command_buffer.CmdVkPipelineBarrier( VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, &builtin_index_buffer_memory_barrier, 0, nullptr); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index e85b6ea8b..bb9058a82 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -2,15 +2,26 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/gpu/vulkan/vulkan_render_target_cache.h" +#include +#include +#include +#include +#include +#include + +#include "xenia/base/assert.h" #include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" +#include "xenia/ui/vulkan/vulkan_util.h" namespace xe { namespace gpu { @@ -19,13 +30,38 @@ namespace vulkan { VulkanRenderTargetCache::VulkanRenderTargetCache( VulkanCommandProcessor& command_processor, const RegisterFile& register_file) - : command_processor_(command_processor), register_file_(register_file) {} + : RenderTargetCache(register_file), command_processor_(command_processor) {} -VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); } +VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); } -bool VulkanRenderTargetCache::Initialize() { return true; } +bool VulkanRenderTargetCache::Initialize() { + InitializeCommon(); + return true; +} -void VulkanRenderTargetCache::Shutdown() { ClearCache(); } +void VulkanRenderTargetCache::Shutdown(bool from_destructor) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + last_update_framebuffer_ = VK_NULL_HANDLE; + for (const auto& framebuffer_pair : framebuffers_) { + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); + } + framebuffers_.clear(); + + last_update_render_pass_ = VK_NULL_HANDLE; + for (const auto& render_pass_pair : render_passes_) { + dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); + } + render_passes_.clear(); + + if (!from_destructor) { + ShutdownCommon(); + } +} void VulkanRenderTargetCache::ClearCache() { const ui::vulkan::VulkanProvider& provider = @@ -33,15 +69,190 @@ void VulkanRenderTargetCache::ClearCache() { const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); + // Framebuffer objects must be destroyed because they reference views of + // attachment images, which may be removed by the common ClearCache. + last_update_framebuffer_ = VK_NULL_HANDLE; for (const auto& framebuffer_pair : framebuffers_) { - dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr); + dfn.vkDestroyFramebuffer(device, framebuffer_pair.second.framebuffer, + nullptr); } framebuffers_.clear(); + last_update_render_pass_ = VK_NULL_HANDLE; for (const auto& render_pass_pair : render_passes_) { dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr); } render_passes_.clear(); + + RenderTargetCache::ClearCache(); +} + +bool VulkanRenderTargetCache::Update(bool is_rasterization_done, + uint32_t shader_writes_color_targets) { + if (!RenderTargetCache::Update(is_rasterization_done, + shader_writes_color_targets)) { + return false; + } + + auto rb_surface_info = register_file().Get(); + RenderTarget* const* depth_and_color_render_targets = + last_update_accumulated_render_targets(); + uint32_t render_targets_are_srgb = + gamma_render_target_as_srgb_ + ? last_update_accumulated_color_targets_are_gamma() + : 0; + + RenderPassKey render_pass_key; + render_pass_key.msaa_samples = rb_surface_info.msaa_samples; + // TODO(Triang3l): 2x MSAA as 4x. + if (depth_and_color_render_targets[0]) { + render_pass_key.depth_and_color_used |= 1 << 0; + render_pass_key.depth_format = + depth_and_color_render_targets[0]->key().GetDepthFormat(); + } + if (depth_and_color_render_targets[1]) { + render_pass_key.depth_and_color_used |= 1 << 1; + render_pass_key.color_0_view_format = + (render_targets_are_srgb & (1 << 0)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[1]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[2]) { + render_pass_key.depth_and_color_used |= 1 << 2; + render_pass_key.color_1_view_format = + (render_targets_are_srgb & (1 << 1)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[2]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[3]) { + render_pass_key.depth_and_color_used |= 1 << 3; + render_pass_key.color_2_view_format = + (render_targets_are_srgb & (1 << 2)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[3]->key().GetColorFormat(); + } + if (depth_and_color_render_targets[4]) { + render_pass_key.depth_and_color_used |= 1 << 4; + render_pass_key.color_3_view_format = + (render_targets_are_srgb & (1 << 3)) + ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA + : depth_and_color_render_targets[4]->key().GetColorFormat(); + } + + const Framebuffer* framebuffer = last_update_framebuffer_; + VkRenderPass render_pass = last_update_render_pass_key_ == render_pass_key + ? last_update_render_pass_ + : VK_NULL_HANDLE; + if (render_pass == VK_NULL_HANDLE) { + render_pass = GetRenderPass(render_pass_key); + if (render_pass == VK_NULL_HANDLE) { + return false; + } + // Framebuffer for a different render pass needed now. + framebuffer = nullptr; + } + + uint32_t pitch_tiles_at_32bpp = + ((rb_surface_info.surface_pitch + << uint32_t(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X)) + + (xenos::kEdramTileWidthSamples - 1)) / + xenos::kEdramTileWidthSamples; + if (framebuffer) { + if (last_update_framebuffer_pitch_tiles_at_32bpp_ != pitch_tiles_at_32bpp || + std::memcmp(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_))) { + framebuffer = nullptr; + } + } + if (!framebuffer) { + framebuffer = GetFramebuffer(render_pass_key, pitch_tiles_at_32bpp, + depth_and_color_render_targets); + if (!framebuffer) { + return false; + } + } + + // Successful update - write the new configuration. + last_update_render_pass_key_ = render_pass_key; + last_update_render_pass_ = render_pass; + last_update_framebuffer_pitch_tiles_at_32bpp_ = pitch_tiles_at_32bpp; + std::memcpy(last_update_framebuffer_attachments_, + depth_and_color_render_targets, + sizeof(last_update_framebuffer_attachments_)); + last_update_framebuffer_ = framebuffer; + + // Transition the used render targets. + VkPipelineStageFlags barrier_src_stage_mask = 0; + VkPipelineStageFlags barrier_dst_stage_mask = 0; + VkImageMemoryBarrier barrier_image_memory[1 + xenos::kMaxColorRenderTargets]; + uint32_t barrier_image_memory_count = 0; + for (uint32_t i = 0; i < 1 + xenos::kMaxColorRenderTargets; ++i) { + RenderTarget* rt = depth_and_color_render_targets[i]; + if (!rt) { + continue; + } + auto& vulkan_rt = *static_cast(rt); + VkPipelineStageFlags rt_src_stage_mask = vulkan_rt.current_stage_mask(); + VkAccessFlags rt_src_access_mask = vulkan_rt.current_access_mask(); + VkImageLayout rt_old_layout = vulkan_rt.current_layout(); + VkPipelineStageFlags rt_dst_stage_mask; + VkAccessFlags rt_dst_access_mask; + VkImageLayout rt_new_layout; + if (i) { + rt_dst_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + rt_dst_access_mask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + rt_new_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } else { + rt_dst_stage_mask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + rt_dst_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + rt_new_layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + bool rt_image_memory_barrier_needed = + rt_src_access_mask != rt_dst_access_mask || + rt_old_layout != rt_new_layout; + if (rt_image_memory_barrier_needed || + rt_src_stage_mask != rt_dst_stage_mask) { + barrier_src_stage_mask |= rt_src_stage_mask; + barrier_dst_stage_mask |= rt_dst_stage_mask; + if (rt_image_memory_barrier_needed) { + VkImageMemoryBarrier& rt_image_memory_barrier = + barrier_image_memory[barrier_image_memory_count++]; + rt_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + rt_image_memory_barrier.pNext = nullptr; + rt_image_memory_barrier.srcAccessMask = rt_src_access_mask; + rt_image_memory_barrier.dstAccessMask = rt_dst_access_mask; + rt_image_memory_barrier.oldLayout = rt_old_layout; + rt_image_memory_barrier.newLayout = rt_new_layout; + rt_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + rt_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + rt_image_memory_barrier.image = vulkan_rt.image(); + ui::vulkan::util::InitializeSubresourceRange( + rt_image_memory_barrier.subresourceRange, + i ? VK_IMAGE_ASPECT_COLOR_BIT + : (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + } + vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout); + } + } + if (barrier_src_stage_mask || barrier_dst_stage_mask || + barrier_image_memory_count) { + if (!barrier_src_stage_mask) { + barrier_src_stage_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + } + if (!barrier_dst_stage_mask) { + barrier_dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + command_processor_.EndRenderPass(); + command_processor_.deferred_command_buffer().CmdVkPipelineBarrier( + barrier_src_stage_mask, barrier_dst_stage_mask, 0, 0, nullptr, 0, + nullptr, barrier_image_memory_count, barrier_image_memory); + } + + return true; } VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { @@ -50,30 +261,128 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { return it->second; } - // TODO(Triang3l): Attachments and dependencies. + VkSampleCountFlagBits samples; + switch (key.msaa_samples) { + case xenos::MsaaSamples::k1X: + samples = VK_SAMPLE_COUNT_1_BIT; + break; + case xenos::MsaaSamples::k2X: + // Using unconditionally because if 2x is emulated as 4x, the key will + // also contain 4x. + samples = VK_SAMPLE_COUNT_2_BIT; + break; + case xenos::MsaaSamples::k4X: + samples = VK_SAMPLE_COUNT_4_BIT; + break; + default: + return VK_NULL_HANDLE; + } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = 0; - subpass_description.pColorAttachments = nullptr; - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + VkAttachmentDescription attachments[1 + xenos::kMaxColorRenderTargets]; + if (key.depth_and_color_used & 0b1) { + VkAttachmentDescription& attachment = attachments[0]; + attachment.flags = 0; + attachment.format = GetDepthVulkanFormat(key.depth_format); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + } + VkAttachmentReference color_attachments[xenos::kMaxColorRenderTargets]; + xenos::ColorRenderTargetFormat color_formats[] = { + key.color_0_view_format, + key.color_1_view_format, + key.color_2_view_format, + key.color_3_view_format, + }; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + VkAttachmentReference& color_attachment = color_attachments[i]; + color_attachment.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + uint32_t attachment_bit = uint32_t(1) << (1 + i); + if (!(key.depth_and_color_used & attachment_bit)) { + color_attachment.attachment = VK_ATTACHMENT_UNUSED; + continue; + } + uint32_t attachment_index = + xe::bit_count(key.depth_and_color_used & (attachment_bit - 1)); + color_attachment.attachment = attachment_index; + VkAttachmentDescription& attachment = attachments[attachment_index]; + attachment.flags = 0; + attachment.format = GetColorVulkanFormat(color_formats[i]); + attachment.samples = samples; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + } + + VkAttachmentReference depth_stencil_attachment; + depth_stencil_attachment.attachment = + (key.depth_and_color_used & 0b1) ? 0 : VK_ATTACHMENT_UNUSED; + depth_stencil_attachment.layout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass; + subpass.flags = 0; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = nullptr; + subpass.colorAttachmentCount = + 32 - xe::lzcnt(uint32_t(key.depth_and_color_used >> 1)); + subpass.pColorAttachments = color_attachments; + subpass.pResolveAttachments = nullptr; + subpass.pDepthStencilAttachment = + (key.depth_and_color_used & 0b1) ? &depth_stencil_attachment : nullptr; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = nullptr; + + VkPipelineStageFlags dependency_stage_mask = 0; + VkAccessFlags dependency_access_mask = 0; + if (key.depth_and_color_used & 0b1) { + dependency_stage_mask |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dependency_access_mask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + if (key.depth_and_color_used >> 1) { + dependency_stage_mask |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dependency_access_mask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + VkSubpassDependency subpass_dependencies[2]; + subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[0].dstSubpass = 0; + subpass_dependencies[0].srcStageMask = dependency_stage_mask; + subpass_dependencies[0].dstStageMask = dependency_stage_mask; + subpass_dependencies[0].srcAccessMask = dependency_access_mask; + subpass_dependencies[0].dstAccessMask = dependency_access_mask; + subpass_dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + subpass_dependencies[1].srcSubpass = 0; + subpass_dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL; + subpass_dependencies[1].srcStageMask = dependency_stage_mask; + subpass_dependencies[1].dstStageMask = dependency_stage_mask; + subpass_dependencies[1].srcAccessMask = dependency_access_mask; + subpass_dependencies[1].dstAccessMask = dependency_access_mask; + subpass_dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; VkRenderPassCreateInfo render_pass_create_info; render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; render_pass_create_info.pNext = nullptr; render_pass_create_info.flags = 0; - render_pass_create_info.attachmentCount = 0; - render_pass_create_info.pAttachments = nullptr; + render_pass_create_info.attachmentCount = + xe::bit_count(key.depth_and_color_used); + render_pass_create_info.pAttachments = attachments; render_pass_create_info.subpassCount = 1; - render_pass_create_info.pSubpasses = &subpass_description; - render_pass_create_info.dependencyCount = 0; - render_pass_create_info.pDependencies = nullptr; + render_pass_create_info.pSubpasses = &subpass; + render_pass_create_info.dependencyCount = + key.depth_and_color_used ? uint32_t(xe::countof(subpass_dependencies)) + : 0; + render_pass_create_info.pDependencies = subpass_dependencies; const ui::vulkan::VulkanProvider& provider = command_processor_.GetVulkanContext().GetVulkanProvider(); @@ -89,15 +398,343 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) { return render_pass; } -VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { - auto it = framebuffers_.find(key); - if (it != framebuffers_.end()) { - return it->second; +VkFormat VulkanRenderTargetCache::GetDepthVulkanFormat( + xenos::DepthRenderTargetFormat format) const { + // TODO(Triang3l): Conditional 24-bit depth. + return VK_FORMAT_D32_SFLOAT_S8_UINT; +} + +VkFormat VulkanRenderTargetCache::GetColorVulkanFormat( + xenos::ColorRenderTargetFormat format) const { + switch (format) { + case xenos::ColorRenderTargetFormat::k_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + return gamma_render_target_as_srgb_ ? VK_FORMAT_R8G8B8A8_SRGB + : VK_FORMAT_R8G8B8A8_UNORM; + case xenos::ColorRenderTargetFormat::k_2_10_10_10: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + // TODO(Triang3l): Fallback to float16 (disregarding clearing correctness + // likely) - possibly on render target gathering, treating them entirely + // as float16. + return VK_FORMAT_R16G16B16A16_SNORM; + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_SFLOAT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_SFLOAT; + default: + assert_unhandled_case(format); + return VK_FORMAT_UNDEFINED; + } +} + +VkFormat VulkanRenderTargetCache::GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, bool* is_integer_out) const { + if (is_integer_out) { + *is_integer_out = true; + } + // Floating-point numbers have NaNs that need to be propagated without + // modifications to the bit representation, and SNORM has two representations + // of -1. + switch (format) { + case xenos::ColorRenderTargetFormat::k_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_UINT; + case xenos::ColorRenderTargetFormat::k_16_16_16_16: + case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_UINT; + case xenos::ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_UINT; + case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_UINT; + default: + if (is_integer_out) { + *is_integer_out = false; + } + return GetColorVulkanFormat(format); + } +} + +VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + if (view_color_transfer_separate_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_color_transfer_separate_, nullptr); + } + if (view_srgb_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb_, nullptr); + } + if (view_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_stencil_, nullptr); + } + if (view_depth_stencil_ != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_depth_stencil_, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color_, nullptr); + dfn.vkDestroyImage(device, image_, nullptr); + dfn.vkFreeMemory(device, memory_, nullptr); +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferWidth; +} + +uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + return provider.device_properties().limits.maxFramebufferHeight; +} + +RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget( + RenderTargetKey key) { + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + // Create the image. + + VkImageCreateInfo image_create_info; + image_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_create_info.pNext = nullptr; + image_create_info.flags = 0; + image_create_info.imageType = VK_IMAGE_TYPE_2D; + // TODO(Triang3l): Resolution scaling. + image_create_info.extent.width = key.GetWidth(); + image_create_info.extent.height = + GetRenderTargetHeight(key.pitch_tiles_at_32bpp, key.msaa_samples); + image_create_info.extent.depth = 1; + image_create_info.mipLevels = 1; + image_create_info.arrayLayers = 1; + // TODO(Triang3l): 2x MSAA as 4x. + image_create_info.samples = + VkSampleCountFlagBits(uint32_t(1) << uint32_t(key.msaa_samples)); + image_create_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_create_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + image_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_create_info.queueFamilyIndexCount = 0; + image_create_info.pQueueFamilyIndices = nullptr; + image_create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VkFormat transfer_format; + bool is_srgb_view_needed = false; + if (key.is_depth) { + image_create_info.format = GetDepthVulkanFormat(key.GetDepthFormat()); + transfer_format = image_create_info.format; + image_create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + xenos::ColorRenderTargetFormat color_format = key.GetColorFormat(); + image_create_info.format = GetColorVulkanFormat(color_format); + transfer_format = GetColorOwnershipTransferVulkanFormat(color_format); + is_srgb_view_needed = + gamma_render_target_as_srgb_ && + (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8 || + color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA); + if (image_create_info.format != transfer_format || is_srgb_view_needed) { + image_create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } + image_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (image_create_info.format == VK_FORMAT_UNDEFINED) { + XELOGE("VulkanRenderTargetCache: Unknown {} render target format {}", + key.is_depth ? "depth" : "color", key.resource_format); + return nullptr; + } + VkImage image; + if (dfn.vkCreateImage(device, &image_create_info, nullptr, &image) != + VK_SUCCESS) { + // TODO(Triang3l): Error message. + return nullptr; } - VkRenderPass render_pass = GetRenderPass(key.render_pass_key); + // Allocate and bind the memory. + + VkMemoryAllocateInfo memory_allocate_info; + VkMemoryRequirements memory_requirements; + dfn.vkGetImageMemoryRequirements(device, image, &memory_requirements); + if (!xe::bit_scan_forward(memory_requirements.memoryTypeBits & + provider.memory_types_device_local(), + &memory_allocate_info.memoryTypeIndex)) { + dfn.vkDestroyImage(device, image, nullptr); + return nullptr; + } + memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + VkMemoryDedicatedAllocateInfoKHR memory_dedicated_allocate_info; + if (provider.device_extensions().khr_dedicated_allocation) { + memory_dedicated_allocate_info.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; + memory_dedicated_allocate_info.pNext = nullptr; + memory_dedicated_allocate_info.image = image; + memory_dedicated_allocate_info.buffer = VK_NULL_HANDLE; + memory_allocate_info.pNext = &memory_dedicated_allocate_info; + } else { + memory_allocate_info.pNext = nullptr; + } + memory_allocate_info.allocationSize = memory_requirements.size; + VkDeviceMemory memory; + if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) != + VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + return nullptr; + } + if (dfn.vkBindImageMemory(device, image, memory, 0) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + + // Create the image views. + + VkImageViewCreateInfo view_create_info; + view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_create_info.pNext = nullptr; + view_create_info.flags = 0; + view_create_info.image = image; + view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_create_info.format = image_create_info.format; + view_create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + ui::vulkan::util::InitializeSubresourceRange( + view_create_info.subresourceRange, + key.is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT); + VkImageView view_depth_color; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_color) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + VkImageView view_depth_stencil = VK_NULL_HANDLE; + VkImageView view_stencil = VK_NULL_HANDLE; + VkImageView view_srgb = VK_NULL_HANDLE; + VkImageView view_color_transfer_separate = VK_NULL_HANDLE; + if (key.is_depth) { + view_create_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_depth_stencil) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_stencil) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_stencil, nullptr); + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } else { + if (is_srgb_view_needed) { + view_create_info.format = VK_FORMAT_R8G8B8A8_SRGB; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_srgb) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + if (transfer_format != image_create_info.format) { + view_create_info.format = transfer_format; + if (dfn.vkCreateImageView(device, &view_create_info, nullptr, + &view_color_transfer_separate) != VK_SUCCESS) { + // TODO(Triang3l): Error message. + if (view_srgb != VK_NULL_HANDLE) { + dfn.vkDestroyImageView(device, view_srgb, nullptr); + } + dfn.vkDestroyImageView(device, view_depth_color, nullptr); + dfn.vkDestroyImage(device, image, nullptr); + dfn.vkFreeMemory(device, memory, nullptr); + return nullptr; + } + } + } + + VkImageView view_transfer_separate = VK_NULL_HANDLE; + + return new VulkanRenderTarget(key, provider, image, memory, view_depth_color, + view_depth_stencil, view_stencil, view_srgb, + view_color_transfer_separate); +} + +const VulkanRenderTargetCache::Framebuffer* +VulkanRenderTargetCache::GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets) { + FramebufferKey key; + key.render_pass_key = render_pass_key; + key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; + if (render_pass_key.depth_and_color_used & (1 << 0)) { + key.depth_base_tiles = depth_and_color_render_targets[0]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 1)) { + key.color_0_base_tiles = + depth_and_color_render_targets[1]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 2)) { + key.color_1_base_tiles = + depth_and_color_render_targets[2]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 3)) { + key.color_2_base_tiles = + depth_and_color_render_targets[3]->key().base_tiles; + } + if (render_pass_key.depth_and_color_used & (1 << 4)) { + key.color_3_base_tiles = + depth_and_color_render_targets[4]->key().base_tiles; + } + auto it = framebuffers_.find(key); + if (it != framebuffers_.end()) { + return &it->second; + } + + const ui::vulkan::VulkanProvider& provider = + command_processor_.GetVulkanContext().GetVulkanProvider(); + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); + VkDevice device = provider.device(); + + VkRenderPass render_pass = GetRenderPass(render_pass_key); if (render_pass == VK_NULL_HANDLE) { - return VK_NULL_HANDLE; + return nullptr; + } + + VkImageView attachments[1 + xenos::kMaxColorRenderTargets]; + uint32_t attachment_count = 0; + uint32_t depth_and_color_rts_remaining = render_pass_key.depth_and_color_used; + uint32_t rt_index; + while (xe::bit_scan_forward(depth_and_color_rts_remaining, &rt_index)) { + depth_and_color_rts_remaining &= ~(uint32_t(1) << rt_index); + const auto& vulkan_rt = *static_cast( + depth_and_color_render_targets[rt_index]); + attachments[attachment_count++] = rt_index ? vulkan_rt.view_depth_color() + : vulkan_rt.view_depth_stencil(); } VkFramebufferCreateInfo framebuffer_create_info; @@ -105,30 +742,33 @@ VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) { framebuffer_create_info.pNext = nullptr; framebuffer_create_info.flags = 0; framebuffer_create_info.renderPass = render_pass; - framebuffer_create_info.attachmentCount = 0; - framebuffer_create_info.pAttachments = nullptr; - framebuffer_create_info.width = 1280; - framebuffer_create_info.height = 720; + framebuffer_create_info.attachmentCount = attachment_count; + framebuffer_create_info.pAttachments = attachments; + VkExtent2D host_extent; + if (pitch_tiles_at_32bpp) { + host_extent.width = RenderTargetKey::GetWidth(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + host_extent.height = GetRenderTargetHeight(pitch_tiles_at_32bpp, + render_pass_key.msaa_samples); + } else { + assert_zero(render_pass_key.depth_and_color_used); + host_extent.width = 0; + host_extent.height = 0; + } + // Vulkan requires width and height greater than 0. + framebuffer_create_info.width = std::max(host_extent.width, uint32_t(1)); + framebuffer_create_info.height = std::max(host_extent.height, uint32_t(1)); framebuffer_create_info.layers = 1; - - const ui::vulkan::VulkanProvider& provider = - command_processor_.GetVulkanContext().GetVulkanProvider(); - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); - VkDevice device = provider.device(); VkFramebuffer framebuffer; if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr, &framebuffer) != VK_SUCCESS) { - XELOGE("Failed to create a Vulkan framebuffer"); - return VK_NULL_HANDLE; + return nullptr; } - framebuffers_.emplace(key, framebuffer); - return framebuffer; -} - -bool VulkanRenderTargetCache::UpdateRenderTargets( - FramebufferKey& framebuffer_key_out) { - framebuffer_key_out = FramebufferKey(); - return true; + // Creates at a persistent location - safe to use pointers. + return &framebuffers_ + .emplace(std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple(framebuffer, host_extent)) + .first->second; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 11be41612..080724ceb 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2020 Ben Vanik. All rights reserved. * + * Copyright 2021 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -14,8 +14,8 @@ #include #include -#include "xenia/base/xxhash.h" -#include "xenia/gpu/register_file.h" +#include "xenia/base/hash.h" +#include "xenia/gpu/render_target_cache.h" #include "xenia/ui/vulkan/vulkan_provider.h" namespace xe { @@ -24,18 +24,183 @@ namespace vulkan { class VulkanCommandProcessor; -// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D -// implementations. -class VulkanRenderTargetCache { +class VulkanRenderTargetCache final : public RenderTargetCache { public: union RenderPassKey { + struct { + // If emulating 2x as 4x, set this to 4x for 2x not to create unnecessary + // render pass objects. + xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2 + // << 0 is depth, << 1...4 is color. + uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7 + // 0 for unused attachments. + // If VK_FORMAT_D24_UNORM_S8_UINT is not supported, this must be kD24FS8 + // even for kD24S8. + xenos::DepthRenderTargetFormat depth_format + : xenos::kDepthRenderTargetFormatBits; // 8 + // Linear or sRGB included if host sRGB is used. + xenos::ColorRenderTargetFormat color_0_view_format + : xenos::kColorRenderTargetFormatBits; // 12 + xenos::ColorRenderTargetFormat color_1_view_format + : xenos::kColorRenderTargetFormatBits; // 16 + xenos::ColorRenderTargetFormat color_2_view_format + : xenos::kColorRenderTargetFormatBits; // 20 + xenos::ColorRenderTargetFormat color_3_view_format + : xenos::kColorRenderTargetFormatBits; // 24 + }; uint32_t key = 0; + struct Hasher { + size_t operator()(const RenderPassKey& key) const { + return std::hash{}(key.key); + } + }; + bool operator==(const RenderPassKey& other_key) const { + return key == other_key.key; + } + bool operator!=(const RenderPassKey& other_key) const { + return !(*this == other_key); + } }; - static_assert(sizeof(RenderPassKey) == sizeof(uint32_t)); + static_assert_size(RenderPassKey, sizeof(uint32_t)); + + struct Framebuffer { + VkFramebuffer framebuffer; + VkExtent2D host_extent; + Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent) + : framebuffer(framebuffer), host_extent(host_extent) {} + }; + + VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, + const RegisterFile& register_file); + ~VulkanRenderTargetCache(); + + bool Initialize(); + void Shutdown(bool from_destructor = false); + void ClearCache() override; + + // TOOD(Triang3l): Fragment shader interlock. + Path GetPath() const override { return Path::kHostRenderTargets; } + + // TODO(Triang3l): Resolution scaling. + uint32_t GetResolutionScale() const override { return 1; } + + bool Update(bool is_rasterization_done, + uint32_t shader_writes_color_targets) override; + // Binding information for the last successful update. + RenderPassKey last_update_render_pass_key() const { + return last_update_render_pass_key_; + } + VkRenderPass last_update_render_pass() const { + return last_update_render_pass_; + } + const Framebuffer* last_update_framebuffer() const { + return last_update_framebuffer_; + } + + // Returns the render pass object, or VK_NULL_HANDLE if failed to create. + // A render pass managed by the render target cache may be ended and resumed + // at any time (to allow for things like copying and texture loading). + VkRenderPass GetRenderPass(RenderPassKey key); + + VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const; + VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const; + VkFormat GetColorOwnershipTransferVulkanFormat( + xenos::ColorRenderTargetFormat format, + bool* is_integer_out = nullptr) const; + + protected: + // Can only be destroyed when framebuffers referencing it are destroyed! + class VulkanRenderTarget final : public RenderTarget { + public: + // Takes ownership of the Vulkan objects passed to the constructor. + VulkanRenderTarget(RenderTargetKey key, + const ui::vulkan::VulkanProvider& provider, + VkImage image, VkDeviceMemory memory, + VkImageView view_depth_color, + VkImageView view_depth_stencil, VkImageView view_stencil, + VkImageView view_srgb, + VkImageView view_color_transfer_separate) + : RenderTarget(key), + provider_(provider), + image_(image), + memory_(memory), + view_depth_color_(view_depth_color), + view_depth_stencil_(view_depth_stencil), + view_stencil_(view_stencil), + view_srgb_(view_srgb), + view_color_transfer_separate_(view_color_transfer_separate) {} + ~VulkanRenderTarget(); + + VkImage image() const { return image_; } + + VkImageView view_depth_color() const { return view_depth_color_; } + VkImageView view_depth_stencil() const { return view_depth_stencil_; } + + VkPipelineStageFlags current_stage_mask() const { + return current_stage_mask_; + } + VkAccessFlags current_access_mask() const { return current_access_mask_; } + VkImageLayout current_layout() const { return current_layout_; } + void SetUsage(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask, + VkImageLayout layout) { + current_stage_mask_ = stage_mask; + current_access_mask_ = access_mask; + current_layout_ = layout; + } + + private: + const ui::vulkan::VulkanProvider& provider_; + + VkImage image_; + VkDeviceMemory memory_; + + // TODO(Triang3l): Per-format drawing views for mutable formats with EDRAM + // aliasing without transfers. + VkImageView view_depth_color_; + // Optional views. + VkImageView view_depth_stencil_; + VkImageView view_stencil_; + VkImageView view_srgb_; + VkImageView view_color_transfer_separate_; + + VkPipelineStageFlags current_stage_mask_ = 0; + VkAccessFlags current_access_mask_ = 0; + VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; + }; + + uint32_t GetMaxRenderTargetWidth() const override; + uint32_t GetMaxRenderTargetHeight() const override; + + RenderTarget* CreateRenderTarget(RenderTargetKey key) override; + + // TODO(Triang3l): Check actual unorm24 support. + bool IsHostDepthEncodingDifferent( + xenos::DepthRenderTargetFormat format) const override { + return true; + } + + private: + VulkanCommandProcessor& command_processor_; + + // RenderPassKey::key -> VkRenderPass. + std::unordered_map render_passes_; + + // For host render targets. struct FramebufferKey { RenderPassKey render_pass_key; + // Same as RenderTargetKey::pitch_tiles_at_32bpp. + uint32_t pitch_tiles_at_32bpp : 8; // 8 + // [0, 2047]. + uint32_t depth_base_tiles : xenos::kEdramBaseTilesBits - 1; // 19 + uint32_t color_0_base_tiles : xenos::kEdramBaseTilesBits - 1; // 30 + + uint32_t color_1_base_tiles : xenos::kEdramBaseTilesBits - 1; // 43 + uint32_t color_2_base_tiles : xenos::kEdramBaseTilesBits - 1; // 54 + + uint32_t color_3_base_tiles : xenos::kEdramBaseTilesBits - 1; // 75 + // Including all the padding, for a stable hash. FramebufferKey() { Reset(); } FramebufferKey(const FramebufferKey& key) { @@ -48,44 +213,27 @@ class VulkanRenderTargetCache { bool operator==(const FramebufferKey& key) const { return std::memcmp(this, &key, sizeof(*this)) == 0; } + using Hasher = xe::hash::XXHasher; void Reset() { std::memset(this, 0, sizeof(*this)); } - uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); } - struct Hasher { - size_t operator()(const FramebufferKey& description) const { - return size_t(description.GetHash()); - } - }; }; - static_assert(sizeof(FramebufferKey) == sizeof(uint32_t)); - - VulkanRenderTargetCache(VulkanCommandProcessor& command_processor, - const RegisterFile& register_file); - ~VulkanRenderTargetCache(); - - bool Initialize(); - void Shutdown(); - void ClearCache(); - - // Returns the render pass object, or VK_NULL_HANDLE if failed to create. - // A render pass managed by the render target cache may be ended and resumed - // at any time (to allow for things like copying and texture loading). - VkRenderPass GetRenderPass(RenderPassKey key); // Returns the framebuffer object, or VK_NULL_HANDLE if failed to create. - VkFramebuffer GetFramebuffer(FramebufferKey key); + const Framebuffer* GetFramebuffer( + RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp, + const RenderTarget* const* depth_and_color_render_targets); - // May dispatch computations. - bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out); + bool gamma_render_target_as_srgb_ = false; - private: - VulkanCommandProcessor& command_processor_; - const RegisterFile& register_file_; - - // RenderPassKey::key -> VkRenderPass. - std::unordered_map render_passes_; - - std::unordered_map + std::unordered_map framebuffers_; + + RenderPassKey last_update_render_pass_key_; + VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE; + uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0; + const RenderTarget* const* + last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] = + {}; + const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE; }; } // namespace vulkan diff --git a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc index b30386793..245fbb684 100644 --- a/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc +++ b/src/xenia/ui/vulkan/vulkan_immediate_drawer.cc @@ -834,8 +834,6 @@ bool VulkanImmediateDrawer::CreateTextureResource( const VulkanProvider& provider = context_.GetVulkanProvider(); const VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - bool dedicated_allocation_supported = - provider.device_extensions().khr_dedicated_allocation; // Create the image and the descriptor. @@ -882,7 +880,7 @@ bool VulkanImmediateDrawer::CreateTextureResource( } image_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; VkMemoryDedicatedAllocateInfoKHR image_memory_dedicated_allocate_info; - if (dedicated_allocation_supported) { + if (provider.device_extensions().khr_dedicated_allocation) { image_memory_dedicated_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR; image_memory_dedicated_allocate_info.pNext = nullptr;