From 121a2d655a2d1080e6b69a5ea6a1107339c7ef15 Mon Sep 17 00:00:00 2001 From: DrChat Date: Fri, 1 Sep 2017 18:32:55 -0500 Subject: [PATCH] Vulkan: Properly support depth writes (or blit depth images if able) --- src/xenia/gpu/register_table.inc | 2 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 6 ++ src/xenia/gpu/vulkan/render_cache.cc | 3 + src/xenia/gpu/vulkan/render_cache.h | 2 + .../gpu/vulkan/vulkan_command_processor.cc | 78 ++++++++++++------- src/xenia/ui/vulkan/blitter.cc | 53 ++++++++++--- src/xenia/ui/vulkan/blitter.h | 4 +- 7 files changed, 105 insertions(+), 43 deletions(-) diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index 759aeb04c..9aa649742 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -184,7 +184,7 @@ XE_GPU_REGISTER(0x2318, kDword, RB_COPY_CONTROL) XE_GPU_REGISTER(0x2319, kDword, RB_COPY_DEST_BASE) XE_GPU_REGISTER(0x231A, kDword, RB_COPY_DEST_PITCH) XE_GPU_REGISTER(0x231B, kDword, RB_COPY_DEST_INFO) -XE_GPU_REGISTER(0x231C, kDword, RB_TILE_CLEAR) +XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR) XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR) XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR) XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LOW) diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 1533c63ef..4d5dc9c49 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -435,6 +435,12 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, // TODO(benvanik): quad strip geometry shader. assert_always("Quad strips not implemented"); return nullptr; + case PrimitiveType::k2DCopyRectListV0: + case PrimitiveType::k2DCopyRectListV1: + case PrimitiveType::k2DCopyRectListV2: + case PrimitiveType::k2DCopyRectListV3: + // TODO(DrChat): Research this. + return nullptr; default: assert_unhandled_case(primitive_type); return nullptr; diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 02b41219a..19879fa53 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -273,6 +273,8 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); + + image_layout = image_barrier.newLayout; } CachedTileView::~CachedTileView() { @@ -436,6 +438,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device, // Single subpass that writes to our attachments. // FIXME: "Multiple attachments that alias the same memory must not be used in // a single subpass" + // TODO: Input attachment for depth/stencil reads? VkSubpassDescription subpass_info; subpass_info.flags = 0; subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index b6dca40cc..e6074a119 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -56,6 +56,8 @@ class CachedTileView { VkImage image = nullptr; // Simple view on the image matching the format. VkImageView image_view = nullptr; + // Image layout + VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED; // Memory buffer VkDeviceMemory memory = nullptr; // Image sample count diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index db3b7ce00..ffef84490 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -91,7 +91,9 @@ bool VulkanCommandProcessor::SetupContext() { render_cache_ = std::make_unique(register_file_, device_); VkEventCreateInfo info = { - VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, nullptr, 0, + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + nullptr, + 0, }; VkResult result = @@ -251,7 +253,7 @@ void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, nullptr, 0, - blitter_->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM), + blitter_->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM, true), 1, &fb_image_view_, extents.width, @@ -439,7 +441,8 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, nullptr, 1, &barrier); VkRect2D src_rect = { - {0, 0}, {frontbuffer_width, frontbuffer_height}, + {0, 0}, + {frontbuffer_width, frontbuffer_height}, }; blitter_->BlitTexture2D( copy_commands, current_batch_fence_, @@ -806,13 +809,23 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( // TODO(benvanik): compute based on indices or vertex count. // THIS CAN BE MASSIVELY INCORRECT (too large). - size_t valid_range = size_t(fetch->size * 4); + uint32_t source_length = fetch->size * 4; uint32_t physical_address = fetch->address << 2; - trace_writer_.WriteMemoryRead(physical_address, valid_range); + trace_writer_.WriteMemoryRead(physical_address, source_length); // Upload (or get a cached copy of) the buffer. - uint32_t source_length = uint32_t(valid_range); + // TODO: Make the buffer cache ... actually cache buffers. We can have + // a list of buffers that were cached, and store those in chunks in a + // multiple of the host's page size. + // WRITE WATCHES: We need to invalidate vertex buffers if they're written + // to. Since most vertex buffers aren't aligned to a page boundary, this + // means a watch may cover more than one vertex buffer. + // We need to maintain a list of write watches, and what memory ranges + // they cover. If a vertex buffer lies within a write watch's range, assign + // it to the watch. If there's partial alignment where a buffer lies within + // one watch and outside of it, should we create a new watch or extend the + // existing watch? auto buffer_ref = buffer_cache_->UploadVertexBuffer( current_setup_buffer_, physical_address, source_length, static_cast(fetch->endian), current_batch_fence_); @@ -896,9 +909,6 @@ bool VulkanCommandProcessor::IssueCopy() { ColorFormatToTextureFormat(copy_regs->copy_dest_info.copy_dest_format); // TODO: copy dest number / bias - // TODO: Issue with RDR - resolves k_16_16_16_16_FLOAT and samples - // k_16_16_16_16. - uint32_t copy_dest_base = copy_regs->copy_dest_base; uint32_t copy_dest_pitch = copy_regs->copy_dest_pitch.copy_dest_pitch; uint32_t copy_dest_height = copy_regs->copy_dest_pitch.copy_dest_height; @@ -995,7 +1005,8 @@ bool VulkanCommandProcessor::IssueCopy() { if (is_color_source) { // Source from a color target. uint32_t color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, + regs[XE_GPU_REG_RB_COLOR_INFO].u32, + regs[XE_GPU_REG_RB_COLOR1_INFO].u32, regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32, }; @@ -1078,9 +1089,12 @@ bool VulkanCommandProcessor::IssueCopy() { image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.srcAccessMask = 0; image_barrier.dstAccessMask = - VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; image_barrier.oldLayout = texture->image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + image_barrier.newLayout = + is_color_source ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; image_barrier.image = texture->image; image_barrier.subresourceRange = {0, 0, 1, 0, 1}; image_barrier.subresourceRange.aspectMask = @@ -1102,21 +1116,23 @@ bool VulkanCommandProcessor::IssueCopy() { VkFilter filter = is_color_source ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; switch (copy_command) { case CopyCommand::kRaw: - /* - render_cache_->RawCopyToImage(command_buffer, edram_base, texture->image, - texture->image_layout, is_color_source, - resolve_offset, resolve_extent); - break; - */ + /* + render_cache_->RawCopyToImage(command_buffer, edram_base, + texture->image, texture->image_layout, is_color_source, resolve_offset, + resolve_extent); break; + */ case CopyCommand::kConvert: { - /* - render_cache_->BlitToImage(command_buffer, edram_base, surface_pitch, - resolve_extent.height, surface_msaa, - texture->image, texture->image_layout, - is_color_source, src_format, filter, - resolve_offset, resolve_extent); - */ + if (!is_color_source && copy_regs->copy_dest_info.copy_dest_swap == 0) { + // Depth images are a bit more complicated. Try a blit! + render_cache_->BlitToImage( + command_buffer, edram_base, surface_pitch, resolve_extent.height, + surface_msaa, texture->image, texture->image_layout, + is_color_source, src_format, filter, + {resolve_offset.x, resolve_offset.y, 0}, + {resolve_extent.width, resolve_extent.height, 1}); + break; + } // Blit with blitter. auto view = render_cache_->FindTileView( @@ -1135,9 +1151,11 @@ bool VulkanCommandProcessor::IssueCopy() { image_barrier.srcAccessMask = is_color_source ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - image_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + image_barrier.oldLayout = view->image_layout; + image_barrier.newLayout = view->image_layout; image_barrier.image = view->image; image_barrier.subresourceRange = {0, 0, 1, 0, 1}; image_barrier.subresourceRange.aspectMask = @@ -1148,7 +1166,8 @@ bool VulkanCommandProcessor::IssueCopy() { VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - auto render_pass = blitter_->GetRenderPass(texture->format); + auto render_pass = + blitter_->GetRenderPass(texture->format, is_color_source); // Create a framebuffer containing our image. if (!texture->framebuffer) { @@ -1181,6 +1200,7 @@ bool VulkanCommandProcessor::IssueCopy() { // Pull the tile view back to a color attachment. std::swap(image_barrier.srcAccessMask, image_barrier.dstAccessMask); + std::swap(image_barrier.oldLayout, image_barrier.newLayout); vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, diff --git a/src/xenia/ui/vulkan/blitter.cc b/src/xenia/ui/vulkan/blitter.cc index 61404c499..acd716166 100644 --- a/src/xenia/ui/vulkan/blitter.cc +++ b/src/xenia/ui/vulkan/blitter.cc @@ -182,7 +182,7 @@ void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, } // Acquire a render pass. - auto render_pass = GetRenderPass(dst_image_format); + auto render_pass = GetRenderPass(dst_image_format, color_or_depth); VkRenderPassBeginInfo render_pass_info = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, @@ -207,7 +207,10 @@ void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, vkCmdSetViewport(command_buffer, 0, 1, &viewport); VkRect2D scissor = { - dst_offset.x, dst_offset.y, dst_extents.width, dst_extents.height, + dst_offset.x, + dst_offset.y, + dst_extents.width, + dst_extents.height, }; vkCmdSetScissor(command_buffer, 0, 1, &scissor); @@ -233,7 +236,7 @@ void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, VkDescriptorImageInfo image; image.sampler = filter == VK_FILTER_NEAREST ? samp_nearest_ : samp_linear_; image.imageView = src_image_view; - image.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; write.pImageInfo = ℑ write.pBufferInfo = nullptr; @@ -256,7 +259,10 @@ void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence, &vtx_constants); PixPushConstants pix_constants = { - 0, 0, 0, swap_channels ? 1 : 0, + 0, + 0, + 0, + swap_channels ? 1 : 0, }; vkCmdPushConstants(command_buffer, pipeline_layout_, VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(VtxPushConstants), @@ -279,14 +285,14 @@ void Blitter::CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence, VkImageView dst_image_view, VkExtent2D extents) { } -VkRenderPass Blitter::GetRenderPass(VkFormat format) { +VkRenderPass Blitter::GetRenderPass(VkFormat format, bool color_or_depth) { auto pass = render_passes_.find(format); if (pass != render_passes_.end()) { return pass->second; } // Create and cache the render pass. - VkRenderPass render_pass = CreateRenderPass(format); + VkRenderPass render_pass = CreateRenderPass(format, color_or_depth); if (render_pass) { render_passes_[format] = render_pass; } @@ -310,7 +316,8 @@ VkPipeline Blitter::GetPipeline(VkRenderPass render_pass, return pipeline; } -VkRenderPass Blitter::CreateRenderPass(VkFormat output_format) { +VkRenderPass Blitter::CreateRenderPass(VkFormat output_format, + bool color_or_depth) { VkAttachmentDescription attachments[1]; std::memset(attachments, 0, sizeof(attachments)); @@ -327,16 +334,25 @@ VkRenderPass Blitter::CreateRenderPass(VkFormat output_format) { VkAttachmentReference attach_refs[1]; attach_refs[0].attachment = 0; - attach_refs[0].layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attach_refs[0].layout = + color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass = { 0, VK_PIPELINE_BIND_POINT_GRAPHICS, 0, nullptr, - 1, attach_refs, + 0, nullptr, nullptr, nullptr, 0, nullptr, }; + if (color_or_depth) { + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = attach_refs; + } else { + subpass.pDepthStencilAttachment = attach_refs; + } + VkRenderPassCreateInfo renderpass_info = { VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, @@ -440,7 +456,21 @@ VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass, multisample_info.alphaToCoverageEnable = VK_FALSE; multisample_info.alphaToOneEnable = VK_FALSE; pipeline_info.pMultisampleState = &multisample_info; - pipeline_info.pDepthStencilState = nullptr; + VkPipelineDepthStencilStateCreateInfo depth_info = { + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + nullptr, + 0, + VK_TRUE, + VK_TRUE, + VK_COMPARE_OP_ALWAYS, + VK_FALSE, + VK_FALSE, + {}, + {}, + 0.f, + 1.f, + }; + pipeline_info.pDepthStencilState = &depth_info; VkPipelineColorBlendStateCreateInfo blend_info; blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; blend_info.pNext = nullptr; @@ -467,7 +497,8 @@ VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass, dynamic_state_info.pNext = nullptr; dynamic_state_info.flags = 0; VkDynamicState dynamic_states[] = { - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, }; dynamic_state_info.dynamicStateCount = static_cast(xe::countof(dynamic_states)); diff --git a/src/xenia/ui/vulkan/blitter.h b/src/xenia/ui/vulkan/blitter.h index 620db0bc1..1ec48fd4f 100644 --- a/src/xenia/ui/vulkan/blitter.h +++ b/src/xenia/ui/vulkan/blitter.h @@ -51,7 +51,7 @@ class Blitter { VkImageView dst_image_view, VkExtent2D extents); // For framebuffer creation. - VkRenderPass GetRenderPass(VkFormat format); + VkRenderPass GetRenderPass(VkFormat format, bool color_or_depth); private: struct VtxPushConstants { @@ -64,7 +64,7 @@ class Blitter { }; VkPipeline GetPipeline(VkRenderPass render_pass, VkShaderModule frag_shader); - VkRenderPass CreateRenderPass(VkFormat output_format); + VkRenderPass CreateRenderPass(VkFormat output_format, bool color_or_depth); VkPipeline CreatePipeline(VkRenderPass render_pass, VkShaderModule frag_shader);