From 84758a3a3faa03aed86611380ead1660f2e4657b Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 13 May 2017 10:15:56 -0500 Subject: [PATCH] Vulkan CP: Use the color blitter --- src/xenia/gpu/vulkan/render_cache.cc | 41 +++- src/xenia/gpu/vulkan/render_cache.h | 14 +- src/xenia/gpu/vulkan/texture_cache.cc | 68 ++++-- src/xenia/gpu/vulkan/texture_cache.h | 8 +- .../gpu/vulkan/vulkan_command_processor.cc | 197 +++++++++++++++--- .../gpu/vulkan/vulkan_command_processor.h | 4 + .../gpu/vulkan/vulkan_graphics_system.cc | 3 +- 7 files changed, 267 insertions(+), 68 deletions(-) diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 311b04aa4..02b41219a 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -236,16 +236,27 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device, err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view); CheckResult(err, "vkCreateImageView"); + // Create separate depth/stencil views. + if (key.color_or_depth == 0) { + image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + err = vkCreateImageView(device_, &image_view_info, nullptr, + &image_view_depth); + CheckResult(err, "vkCreateImageView"); + + image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + err = vkCreateImageView(device_, &image_view_info, nullptr, + &image_view_depth); + CheckResult(err, "vkCreateImageView"); + } + // TODO(benvanik): transition to general layout? VkImageMemoryBarrier image_barrier; image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_barrier.pNext = nullptr; - image_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + image_barrier.srcAccessMask = 0; image_barrier.dstAccessMask = key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - image_barrier.dstAccessMask |= - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -873,12 +884,28 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer, return true; } -VkImageView RenderCache::FindTileView(uint32_t base, uint32_t pitch, - MsaaSamples samples, bool color_or_depth, - uint32_t format) { +CachedTileView* RenderCache::FindTileView(uint32_t base, uint32_t pitch, + MsaaSamples samples, + bool color_or_depth, + uint32_t format) { uint32_t tile_width = samples == MsaaSamples::k4X ? 40 : 80; uint32_t tile_height = samples != MsaaSamples::k1X ? 8 : 16; + if (color_or_depth) { + // Adjust similar formats for easier matching. + switch (static_cast(format)) { + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + format = uint32_t(ColorRenderTargetFormat::k_8_8_8_8); + break; + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10); + break; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10_FLOAT); + break; + } + } + TileViewKey key; key.tile_offset = base; key.tile_width = xe::round_up(pitch, tile_width) / tile_width; @@ -888,7 +915,7 @@ VkImageView RenderCache::FindTileView(uint32_t base, uint32_t pitch, key.edram_format = static_cast(format); auto view = FindTileView(key); if (view) { - return view->image_view; + return view; } return nullptr; diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index 9f6a72dc8..b6dca40cc 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -61,6 +61,11 @@ class CachedTileView { // Image sample count VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT; + // (if a depth view) Image view of depth aspect + VkImageView image_view_depth = nullptr; + // (if a depth view) Image view of stencil aspect + VkImageView image_view_stencil = nullptr; + CachedTileView(ui::vulkan::VulkanDevice* device, VkCommandBuffer command_buffer, VkDeviceMemory edram_memory, TileViewKey view_key); @@ -76,6 +81,10 @@ class CachedTileView { return key.tile_offset < other.key.tile_offset; } + VkExtent2D GetSize() const { + return {key.tile_width * 80ul, key.tile_height * 16ul}; + } + private: VkDevice device_ = nullptr; }; @@ -269,8 +278,9 @@ class RenderCache { // with an already open pass. bool dirty() const; - VkImageView FindTileView(uint32_t base, uint32_t pitch, MsaaSamples samples, - bool color_or_depth, uint32_t format); + CachedTileView* FindTileView(uint32_t base, uint32_t pitch, + MsaaSamples samples, bool color_or_depth, + uint32_t format); // Begins a render pass targeting the state-specified framebuffer formats. // The command buffer will be transitioned into the render pass phase. diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 8b7d03941..7d72fca2b 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -311,7 +311,7 @@ void TextureCache::DestroyEmptySet() { } TextureCache::Texture* TextureCache::AllocateTexture( - const TextureInfo& texture_info) { + const TextureInfo& texture_info, VkFormatFeatureFlags required_flags) { // Create an image first. VkImageCreateInfo image_info = {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -341,18 +341,24 @@ TextureCache::Texture* TextureCache::AllocateTexture( : VK_FORMAT_R8G8B8A8_UNORM; image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT; + image_info.usage = + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; // Check the device limits for the format before we create it. VkFormatProperties props; vkGetPhysicalDeviceFormatProperties(*device_, format, &props); - uint32_t required_flags = - VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; if ((props.optimalTilingFeatures & required_flags) != required_flags) { // Texture needs conversion on upload to a native format. - // assert_always(); + assert_always(); + } + + if (props.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) { + // Add color attachment usage if it's supported. + image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + + if (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT) { + image_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; } VkImageFormatProperties image_props; @@ -413,6 +419,10 @@ bool TextureCache::FreeTexture(Texture* texture) { return false; } + if (texture->framebuffer) { + vkDestroyFramebuffer(*device_, texture->framebuffer, nullptr); + } + for (auto it = texture->views.begin(); it != texture->views.end();) { vkDestroyImageView(*device_, (*it)->view, nullptr); it = texture->views.erase(it); @@ -449,7 +459,9 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( } // No texture at this location. Make a new one. - auto texture = AllocateTexture(texture_info); + auto texture = + AllocateTexture(texture_info, VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT); // Setup a debug name for the texture. device_->DbgSetObjectName( @@ -965,23 +977,30 @@ void TextureCache::ConvertTexture2D(uint8_t* dest, const TextureInfo& src) { uint32_t offset_x; uint32_t offset_y; TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y); - auto bpp = (bytes_per_block >> 2) + - ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); - for (uint32_t y = 0, output_base_offset = 0; - y < std::min(src.size_2d.block_height, src.size_2d.logical_height); - y++, output_base_offset += src.size_2d.output_pitch) { - auto input_base_offset = TextureInfo::TiledOffset2DOuter( - offset_y + y, - (src.size_2d.input_width / src.format_info()->block_width), bpp); - for (uint32_t x = 0, output_offset = output_base_offset; - x < src.size_2d.block_width; x++, output_offset += bytes_per_block) { + auto log2_bpp = (bytes_per_block >> 2) + + ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); + + // Offset to the current row, in bytes. + uint32_t output_row_offset = 0; + for (uint32_t y = 0; y < src.size_2d.block_height; y++) { + auto input_row_offset = TextureInfo::TiledOffset2DOuter( + offset_y + y, src.size_2d.block_width, log2_bpp); + + // Go block-by-block on this row. + uint32_t output_offset = output_row_offset; + for (uint32_t x = 0; x < src.size_2d.block_width; x++) { auto input_offset = - TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp, - input_base_offset) >> - bpp; + TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, + log2_bpp, input_row_offset) >> + log2_bpp; + TextureSwap(src.endianness, dest + output_offset, src_mem + input_offset * bytes_per_block, bytes_per_block); + + output_offset += bytes_per_block; } + + output_row_offset += src.size_2d.output_pitch; } } } @@ -1174,6 +1193,13 @@ bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier.image = dest->image; barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT || + dest->format == VK_FORMAT_D24_UNORM_S8_UINT || + dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + barrier.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index 0534f82c6..c806910b1 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -44,6 +44,7 @@ class TextureCache { VkDeviceMemory image_memory; VkDeviceSize memory_offset; VkDeviceSize memory_size; + VkFramebuffer framebuffer; // Blit target frame buffer. uintptr_t access_watch_handle; bool pending_invalidation; @@ -100,6 +101,8 @@ class TextureCache { uint32_t height, TextureFormat format, VkOffset2D* out_offset = nullptr); + TextureView* DemandView(Texture* texture, uint16_t swizzle); + // Demands a texture for the purpose of resolving from EDRAM. This either // creates a new texture or returns a previously created texture. Texture* DemandResolveTexture(const TextureInfo& texture_info, @@ -124,7 +127,9 @@ class TextureCache { void DestroyEmptySet(); // Allocates a new texture and memory to back it on the GPU. - Texture* AllocateTexture(const TextureInfo& texture_info); + Texture* AllocateTexture(const TextureInfo& texture_info, + VkFormatFeatureFlags required_flags = + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); bool FreeTexture(Texture* texture); // Demands a texture. If command_buffer is null and the texture hasn't been @@ -132,7 +137,6 @@ class TextureCache { Texture* Demand(const TextureInfo& texture_info, VkCommandBuffer command_buffer = nullptr, VkFence completion_fence = nullptr); - TextureView* DemandView(Texture* texture, uint16_t swizzle); Sampler* Demand(const SamplerInfo& sampler_info); void FlushPendingCommands(VkCommandBuffer command_buffer, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index c62866f1d..c1c8aa8ed 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -68,9 +68,16 @@ bool VulkanCommandProcessor::SetupContext() { queue_mutex_ = &device_->primary_queue_mutex(); } + // Setup a blitter. + blitter_ = std::make_unique(); + if (!blitter_->Initialize(device_)) { + XELOGE("Unable to initialize blitter"); + return false; + } + // Setup fenced pools used for all our per-frame/per-draw resources. command_buffer_pool_ = std::make_unique( - *device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); + *device_, device_->queue_family_index()); // Initialize the state machine caches. buffer_cache_ = std::make_unique( @@ -112,6 +119,8 @@ void VulkanCommandProcessor::ShutdownContext() { render_cache_.reset(); texture_cache_.reset(); + blitter_.reset(); + // Free all pools. This must come after all of our caches clean up. command_buffer_pool_.reset(); @@ -200,7 +209,7 @@ void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, image_info.samples = VK_SAMPLE_COUNT_1_BIT; image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.usage = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.queueFamilyIndexCount = 0; image_info.pQueueFamilyIndices = nullptr; @@ -222,12 +231,42 @@ void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, std::lock_guard lock(swap_state_.mutex); swap_state_.front_buffer_texture = reinterpret_cast(image_fb); + VkImageViewCreateInfo view_create_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + image_fb, + VK_IMAGE_VIEW_TYPE_2D, + VK_FORMAT_R8G8B8A8_UNORM, + {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}, + }; + status = + vkCreateImageView(*device_, &view_create_info, nullptr, &fb_image_view_); + CheckResult(status, "vkCreateImageView"); + + VkFramebufferCreateInfo framebuffer_create_info = { + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + blitter_->GetRenderPass(VK_FORMAT_R8G8B8A8_UNORM), + 1, + &fb_image_view_, + extents.width, + extents.height, + 1, + }; + status = vkCreateFramebuffer(*device_, &framebuffer_create_info, nullptr, + &fb_framebuffer_); + CheckResult(status, "vkCreateFramebuffer"); + // Transition image to general layout. VkImageMemoryBarrier barrier; std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier)); barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.srcAccessMask = 0; - barrier.dstAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -241,6 +280,9 @@ void VulkanCommandProcessor::CreateSwapImage(VkCommandBuffer setup_buffer, } void VulkanCommandProcessor::DestroySwapImage() { + vkDestroyFramebuffer(*device_, fb_framebuffer_, nullptr); + vkDestroyImageView(*device_, fb_image_view_, nullptr); + std::lock_guard lock(swap_state_.mutex); vkDestroyImage(*device_, reinterpret_cast(swap_state_.front_buffer_texture), @@ -249,6 +291,8 @@ void VulkanCommandProcessor::DestroySwapImage() { swap_state_.front_buffer_texture = 0; fb_memory_ = nullptr; + fb_framebuffer_ = nullptr; + fb_image_view_ = nullptr; } void VulkanCommandProcessor::BeginFrame() { @@ -363,7 +407,7 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; barrier.oldLayout = texture->image_layout; barrier.newLayout = texture->image_layout; barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; @@ -372,31 +416,43 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, nullptr, 1, &barrier); - // Now issue a blit command. - VkImageBlit blit; - std::memset(&blit, 0, sizeof(VkImageBlit)); - blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - blit.srcOffsets[0] = {0, 0, 0}; - blit.srcOffsets[1] = {int32_t(frontbuffer_width), - int32_t(frontbuffer_height), 1}; - blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; - blit.dstOffsets[0] = {0, 0, 0}; - blit.dstOffsets[1] = {int32_t(frontbuffer_width), - int32_t(frontbuffer_height), 1}; + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.image = swap_fb; + vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); - vkCmdBlitImage(copy_commands, texture->image, texture->image_layout, - swap_fb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, - VK_FILTER_LINEAR); + VkRect2D src_rect = { + {0, 0}, {frontbuffer_width, frontbuffer_height}, + }; + blitter_->BlitTexture2D( + copy_commands, current_batch_fence_, + texture_cache_->DemandView(texture, 0x688)->view, src_rect, + {texture->texture_info.width + 1, texture->texture_info.height + 1}, + VK_FORMAT_R8G8B8A8_UNORM, {0, 0}, + {frontbuffer_width, frontbuffer_height}, fb_framebuffer_, + VK_FILTER_LINEAR, true, true); + + std::swap(barrier.oldLayout, barrier.newLayout); + barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); std::lock_guard lock(swap_state_.mutex); swap_state_.width = frontbuffer_width; swap_state_.height = frontbuffer_height; auto swap_event = reinterpret_cast(swap_state_.backend_data); - vkCmdSetEvent(copy_commands, swap_event, VK_PIPELINE_STAGE_TRANSFER_BIT); + vkCmdSetEvent(copy_commands, swap_event, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); } status = vkEndCommandBuffer(copy_commands); @@ -471,6 +527,7 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, // resources! command_buffer_pool_->Scavenge(); + blitter_->Scavenge(); texture_cache_->Scavenge(); buffer_cache_->Scavenge(); } @@ -516,7 +573,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, auto pixel_shader = static_cast(active_pixel_shader()); if (!vertex_shader) { // Always need a vertex shader. - return true; + return false; } // Depth-only mode doesn't need a pixel shader (we'll use a fake one). if (enable_mode == ModeControl::kDepth) { @@ -527,10 +584,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } - bool started_frame = false; + bool full_update = false; if (!frame_open_) { BeginFrame(); - started_frame = true; + full_update = true; } auto command_buffer = current_command_buffer_; auto setup_buffer = current_setup_buffer_; @@ -544,6 +601,7 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, current_render_state_ = nullptr; } + full_update = true; current_render_state_ = render_cache_->BeginRenderPass( command_buffer, vertex_shader, pixel_shader); if (!current_render_state_) { @@ -559,13 +617,13 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, command_buffer, current_render_state_, vertex_shader, pixel_shader, primitive_type, &pipeline); if (pipeline_status == PipelineCache::UpdateStatus::kMismatch || - started_frame) { + full_update) { vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } else if (pipeline_status == PipelineCache::UpdateStatus::kError) { return false; } - pipeline_cache_->SetDynamicState(command_buffer, started_frame); + pipeline_cache_->SetDynamicState(command_buffer, full_update); // Pass registers to the shaders. if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { @@ -931,14 +989,14 @@ bool VulkanCommandProcessor::IssueCopy() { } if (copy_src_select > 3 || depth_clear_enabled) { - // Source from a depth target. + // Source from or clear a depth target. uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; depth_edram_base = depth_info & 0xFFF; depth_format = static_cast((depth_info >> 16) & 0x1); if (copy_src_select > 3) { - copy_dest_format = TextureFormat::k_24_8; + copy_dest_format = DepthRenderTargetToTextureFormat(depth_format); } } @@ -995,7 +1053,7 @@ bool VulkanCommandProcessor::IssueCopy() { } // Transition the image into a transfer destination layout, if needed. - // TODO: Util function for this + // TODO: If blitting, layout should be color attachment. VkImageMemoryBarrier image_barrier; image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; image_barrier.pNext = nullptr; @@ -1004,7 +1062,7 @@ bool VulkanCommandProcessor::IssueCopy() { image_barrier.srcAccessMask = 0; image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_barrier.oldLayout = texture->image_layout; - image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; image_barrier.image = texture->image; image_barrier.subresourceRange = {0, 0, 1, 0, 1}; image_barrier.subresourceRange.aspectMask = @@ -1016,9 +1074,9 @@ bool VulkanCommandProcessor::IssueCopy() { VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier); - VkOffset3D resolve_offset = {dest_min_x, dest_min_y, 0}; - VkExtent3D resolve_extent = {uint32_t(dest_max_x - dest_min_x), - uint32_t(dest_max_y - dest_min_y), 1}; + VkOffset2D resolve_offset = {dest_min_x, dest_min_y}; + VkExtent2D resolve_extent = {uint32_t(dest_max_x - dest_min_x), + uint32_t(dest_max_y - dest_min_y)}; // Ask the render cache to copy to the resolve texture. auto edram_base = copy_src_select <= 3 ? color_edram_base : depth_edram_base; @@ -1034,13 +1092,84 @@ bool VulkanCommandProcessor::IssueCopy() { resolve_offset, resolve_extent); break; */ - case CopyCommand::kConvert: + + case CopyCommand::kConvert: { + /* render_cache_->BlitToImage(command_buffer, edram_base, surface_pitch, resolve_extent.height, surface_msaa, texture->image, texture->image_layout, copy_src_select <= 3, src_format, filter, resolve_offset, resolve_extent); - break; + */ + + // Blit with blitter. + auto view = + render_cache_->FindTileView(edram_base, surface_pitch, surface_msaa, + copy_src_select <= 3, src_format); + if (!view) { + break; + } + + // Convert the tile view to a sampled image. + // Put a barrier on the tile view. + VkImageMemoryBarrier image_barrier; + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + image_barrier.pNext = nullptr; + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.srcAccessMask = + copy_src_select <= 3 ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + : VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + image_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + image_barrier.image = view->image; + image_barrier.subresourceRange = {0, 0, 1, 0, 1}; + image_barrier.subresourceRange.aspectMask = + copy_src_select <= 3 + ? VK_IMAGE_ASPECT_COLOR_BIT + : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, nullptr, + 0, nullptr, 1, &image_barrier); + + auto render_pass = blitter_->GetRenderPass(texture->format); + + // Create a framebuffer containing our image. + if (!texture->framebuffer) { + auto texture_view = texture_cache_->DemandView(texture, 0x688); + + VkFramebufferCreateInfo fb_create_info = { + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + nullptr, + 0, + render_pass, + 1, + &texture_view->view, + texture->texture_info.width + 1, + texture->texture_info.height + 1, + 1, + }; + + VkResult res = vkCreateFramebuffer(*device_, &fb_create_info, nullptr, + &texture->framebuffer); + CheckResult(res, "vkCreateFramebuffer"); + } + + blitter_->BlitTexture2D( + command_buffer, current_batch_fence_, + copy_src_select == 4 ? view->image_view_depth : view->image_view, + {{0, 0}, {resolve_extent.width, resolve_extent.height}}, + view->GetSize(), texture->format, resolve_offset, resolve_extent, + texture->framebuffer, filter, copy_src_select <= 3, true); + + // Pull the tile view back to a color attachment. + std::swap(image_barrier.srcAccessMask, image_barrier.dstAccessMask); + vkCmdPipelineBarrier(command_buffer, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, nullptr, 0, + nullptr, 1, &image_barrier); + } break; case CopyCommand::kConstantOne: case CopyCommand::kNull: diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 79f1bfdd9..0f5100152 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -31,6 +31,7 @@ #include "xenia/gpu/xenos.h" #include "xenia/kernel/xthread.h" #include "xenia/memory.h" +#include "xenia/ui/vulkan/blitter.h" #include "xenia/ui/vulkan/fenced_pools.h" #include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_device.h" @@ -96,6 +97,8 @@ class VulkanCommandProcessor : public CommandProcessor { // front buffer / back buffer memory VkDeviceMemory fb_memory_ = nullptr; + VkImageView fb_image_view_ = nullptr; + VkFramebuffer fb_framebuffer_ = nullptr; uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4 uint8_t dirty_bool_constants_ = 0; @@ -124,6 +127,7 @@ class VulkanCommandProcessor : public CommandProcessor { std::unique_ptr render_cache_; std::unique_ptr texture_cache_; + std::unique_ptr blitter_; std::unique_ptr command_buffer_pool_; bool frame_open_ = false; diff --git a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc index 2830244ed..159b36a0d 100644 --- a/src/xenia/gpu/vulkan/vulkan_graphics_system.cc +++ b/src/xenia/gpu/vulkan/vulkan_graphics_system.cc @@ -231,8 +231,7 @@ void VulkanGraphicsSystem::DestroyCaptureBuffer() { std::unique_ptr VulkanGraphicsSystem::CreateCommandProcessor() { - return std::unique_ptr( - new VulkanCommandProcessor(this, kernel_state_)); + return std::make_unique(this, kernel_state_); } void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) {