From de1e4661ff538b20da3d3f72f87c13822b794404 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Feb 2016 20:36:10 -0800 Subject: [PATCH] Not crashing (but also likely not working) EDRAM emulation. --- src/xenia/gpu/trace_dump.cc | 15 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 2 +- src/xenia/gpu/vulkan/pipeline_cache.h | 4 +- src/xenia/gpu/vulkan/render_cache.cc | 713 +++++++++++++++++- src/xenia/gpu/vulkan/render_cache.h | 106 ++- .../gpu/vulkan/vulkan_command_processor.cc | 36 +- src/xenia/gpu/vulkan/vulkan_shader.h | 2 +- src/xenia/ui/vulkan/fenced_pools.h | 3 + src/xenia/ui/vulkan/vulkan.cc | 4 + src/xenia/ui/vulkan/vulkan.h | 1 + src/xenia/ui/vulkan/vulkan_context.cc | 2 +- src/xenia/ui/vulkan/vulkan_device.cc | 5 + 12 files changed, 851 insertions(+), 42 deletions(-) diff --git a/src/xenia/gpu/trace_dump.cc b/src/xenia/gpu/trace_dump.cc index b7804ebec..7535e0f01 100644 --- a/src/xenia/gpu/trace_dump.cc +++ b/src/xenia/gpu/trace_dump.cc @@ -13,6 +13,7 @@ #include "third_party/stb/stb_image_write.h" #include "xenia/base/logging.h" +#include "xenia/base/profiling.h" #include "xenia/base/string.h" #include "xenia/base/threading.h" #include "xenia/gpu/command_processor.h" @@ -189,10 +190,16 @@ void TraceDump::Run() { }); xe::threading::Fence capture_fence; + bool did_capture = false; loop_->PostDelayed( [&]() { // Capture. auto raw_image = window_->context()->Capture(); + if (!raw_image) { + // Failed to capture anything. + capture_fence.Signal(); + return; + } // Save framebuffer png. std::string png_path = xe::to_string(base_output_path_ + L".png"); @@ -201,6 +208,7 @@ void TraceDump::Run() { raw_image->data.data(), static_cast(raw_image->stride)); + did_capture = true; capture_fence.Signal(); }, 50); @@ -211,10 +219,13 @@ void TraceDump::Run() { loop_->Quit(); loop_->AwaitQuit(); - player_.reset(); - emulator_.reset(); + Profiler::Shutdown(); window_.reset(); loop_.reset(); + player_.reset(); + emulator_.reset(); + + // TODO(benvanik): die if failed to capture? } } // namespace gpu diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index ec6c28eac..5416aea7f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -84,7 +84,7 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type, } bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, - VkRenderPass render_pass, + const RenderState* render_state, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 00e36ef12..74c461504 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -14,6 +14,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/spirv_shader_translator.h" +#include "xenia/gpu/vulkan/render_cache.h" #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/spirv/spirv_disassembler.h" @@ -42,7 +43,8 @@ class PipelineCache { // in the command buffer is issued at this time. // Returns whether the pipeline could be successfully created. bool ConfigurePipeline(VkCommandBuffer command_buffer, - VkRenderPass render_pass, VulkanShader* vertex_shader, + const RenderState* render_state, + VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type); diff --git a/src/xenia/gpu/vulkan/render_cache.cc b/src/xenia/gpu/vulkan/render_cache.cc index 32d9349e5..b477d5633 100644 --- a/src/xenia/gpu/vulkan/render_cache.cc +++ b/src/xenia/gpu/vulkan/render_cache.cc @@ -9,6 +9,8 @@ #include "xenia/gpu/vulkan/render_cache.h" +#include + #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/memory.h" @@ -20,55 +22,711 @@ namespace xe { namespace gpu { namespace vulkan { +using namespace xe::gpu::xenos; using xe::ui::vulkan::CheckResult; +constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024; + +VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) { + switch (format) { + case ColorRenderTargetFormat::k_8_8_8_8: + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + return VK_FORMAT_R8G8B8A8_UNORM; + case ColorRenderTargetFormat::k_2_10_10_10: + case ColorRenderTargetFormat::k_2_10_10_10_unknown: + return VK_FORMAT_A2R10G10B10_UNORM_PACK32; + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT: + case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: + // WARNING: this is wrong, most likely - no float form in vulkan? + XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used"); + return VK_FORMAT_A2R10G10B10_SSCALED_PACK32; + case ColorRenderTargetFormat::k_16_16: + return VK_FORMAT_R16G16_UNORM; + case ColorRenderTargetFormat::k_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case ColorRenderTargetFormat::k_16_16_FLOAT: + return VK_FORMAT_R16G16_SFLOAT; + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case ColorRenderTargetFormat::k_32_FLOAT: + return VK_FORMAT_R32_SFLOAT; + case ColorRenderTargetFormat::k_32_32_FLOAT: + return VK_FORMAT_R32G32_SFLOAT; + default: + assert_unhandled_case(key.edram_format); + return VK_FORMAT_UNDEFINED; + } +} + +VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) { + switch (format) { + case DepthRenderTargetFormat::kD24S8: + return VK_FORMAT_D24_UNORM_S8_UINT; + case DepthRenderTargetFormat::kD24FS8: + // TODO(benvanik): some way to emulate? resolve-time flag? + XELOGW("Unsupported EDRAM format kD24FS8 used"); + return VK_FORMAT_D24_UNORM_S8_UINT; + default: + return VK_FORMAT_UNDEFINED; + } +} + +// Cached view into the EDRAM memory. +// The image is aliased to a region of the edram_memory_ based on the tile +// parameters. +// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible +// formats? +class CachedTileView { + public: + // Key identifying the view in the cache. + TileViewKey key; + // Image mapped into EDRAM. + VkImage image = nullptr; + // Simple view on the image matching the format. + VkImageView image_view = nullptr; + + CachedTileView(VkDevice device, VkDeviceMemory edram_memory, + TileViewKey view_key); + ~CachedTileView(); + + bool IsEqual(const TileViewKey& other_key) const { + auto a = reinterpret_cast(&key); + auto b = reinterpret_cast(&other_key); + return *a == *b; + } + + private: + VkDevice device_ = nullptr; +}; + +// Cached framebuffer referencing tile attachments. +// Each framebuffer is specific to a render pass. Ugh. +class CachedFramebuffer { + public: + // TODO(benvanik): optimized key? tile base + format for each? + + // Framebuffer with the attachments ready for use in the parent render pass. + VkFramebuffer handle = nullptr; + // Width of the framebuffer in pixels. + uint32_t width = 0; + // Height of the framebuffer in pixels. + uint32_t height = 0; + // References to color attachments, if used. + CachedTileView* color_attachments[4] = {nullptr}; + // Reference to depth/stencil attachment, if used. + CachedTileView* depth_stencil_attachment = nullptr; + + CachedFramebuffer(VkDevice device, VkRenderPass render_pass, + uint32_t surface_width, uint32_t surface_height, + CachedTileView* target_color_attachments[4], + CachedTileView* target_depth_stencil_attachment); + ~CachedFramebuffer(); + + bool IsCompatible(const RenderConfiguration& desired_config) const; + + private: + VkDevice device_ = nullptr; +}; + +// Cached render passes based on register states. +// Each render pass is dependent on the format, dimensions, and use of +// all attachments. The same render pass can be reused for multiple +// framebuffers pointing at various tile views, though those cached +// framebuffers are specific to the render pass. +class CachedRenderPass { + public: + // Configuration this pass was created with. + RenderConfiguration config; + // Initialized render pass for the register state. + VkRenderPass handle = nullptr; + // Cache of framebuffers for the various tile attachments. + std::vector cached_framebuffers; + + CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config); + ~CachedRenderPass(); + + bool IsCompatible(const RenderConfiguration& desired_config) const; + + private: + VkDevice device_ = nullptr; +}; + +CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, + TileViewKey view_key) + : device_(device), key(std::move(view_key)) { + // Map format to Vulkan. + VkFormat vulkan_format = VK_FORMAT_UNDEFINED; + uint32_t bpp = 4; + if (key.color_or_depth) { + auto edram_format = static_cast(key.edram_format); + vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format); + switch (edram_format) { + case ColorRenderTargetFormat::k_16_16_16_16: + case ColorRenderTargetFormat::k_16_16_16_16_FLOAT: + case ColorRenderTargetFormat::k_32_32_FLOAT: + bpp = 8; + break; + default: + bpp = 4; + break; + } + } else { + auto edram_format = static_cast(key.edram_format); + vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); + } + assert_true(vulkan_format != VK_FORMAT_UNDEFINED); + assert_true(bpp == 4); + + // Create the image with the desired properties. + VkImageCreateInfo image_info; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.pNext = nullptr; + // TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have + // multiple views. + image_info.flags = 0; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = vulkan_format; + image_info.extent.width = key.tile_width * 80; + image_info.extent.height = key.tile_height * 16; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + // TODO(benvanik): native MSAA support? + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.usage |= key.color_or_depth + ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.queueFamilyIndexCount = 0; + image_info.pQueueFamilyIndices = nullptr; + image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + auto err = vkCreateImage(device_, &image_info, nullptr, &image); + CheckResult(err, "vkCreateImage"); + + // Verify our assumptions about memory layout are correct. + VkDeviceSize edram_offset = key.tile_offset * 5120; + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(device, image, &memory_requirements); + assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity); + assert_true(edram_offset % memory_requirements.alignment == 0); + + // Bind to the region of EDRAM we occupy. + err = vkBindImageMemory(device_, image, edram_memory, edram_offset); + CheckResult(err, "vkBindImageMemory"); + + // Create the image view we'll use to attach it to a framebuffer. + VkImageViewCreateInfo image_view_info; + image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + image_view_info.pNext = nullptr; + image_view_info.flags = 0; + image_view_info.image = image; + image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_info.format = image_info.format; + // TODO(benvanik): manipulate? may not be able to when attached. + image_view_info.components = { + VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }; + image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + if (key.color_or_depth) { + image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } else { + image_view_info.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } + err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view); + CheckResult(err, "vkCreateImageView"); + + // TODO(benvanik): transition to general layout? +} + +CachedTileView::~CachedTileView() { + vkDestroyImageView(device_, image_view, nullptr); + vkDestroyImage(device_, image, nullptr); +} + +CachedFramebuffer::CachedFramebuffer( + VkDevice device, VkRenderPass render_pass, uint32_t surface_width, + uint32_t surface_height, CachedTileView* target_color_attachments[4], + CachedTileView* target_depth_stencil_attachment) + : device_(device), + width(surface_width), + height(surface_height), + depth_stencil_attachment(target_depth_stencil_attachment) { + for (int i = 0; i < 4; ++i) { + color_attachments[i] = target_color_attachments[i]; + } + + // Create framebuffer. + VkImageView image_views[5] = {nullptr}; + int image_view_count = 0; + for (int i = 0; i < 4; ++i) { + if (color_attachments[i]) { + image_views[image_view_count++] = color_attachments[i]->image_view; + } + } + if (depth_stencil_attachment) { + image_views[image_view_count++] = depth_stencil_attachment->image_view; + } + VkFramebufferCreateInfo framebuffer_info; + framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebuffer_info.pNext = nullptr; + framebuffer_info.renderPass = render_pass; + framebuffer_info.attachmentCount = image_view_count; + framebuffer_info.pAttachments = image_views; + framebuffer_info.width = width; + framebuffer_info.height = height; + framebuffer_info.layers = 1; + auto err = vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle); + CheckResult(err, "vkCreateFramebuffer"); +} + +CachedFramebuffer::~CachedFramebuffer() { + vkDestroyFramebuffer(device_, handle, nullptr); +} + +bool CachedFramebuffer::IsCompatible( + const RenderConfiguration& desired_config) const { + // We already know all render pass things line up, so let's verify dimensions, + // edram offsets, etc. We need an exact match. + // TODO(benvanik): separate image views from images in tiles and store in fb? + for (int i = 0; i < 4; ++i) { + // Ensure the the attachment points to the same tile. + if (!color_attachments[i]) { + continue; + } + auto& color_info = color_attachments[i]->key; + auto& desired_color_info = desired_config.color[i]; + if (color_info.tile_offset != desired_color_info.edram_base || + color_info.edram_format != + static_cast(desired_color_info.format)) { + return false; + } + } + // Ensure depth attachment is correct. + if (depth_stencil_attachment && + (depth_stencil_attachment->key.tile_offset != + desired_config.depth_stencil.edram_base || + depth_stencil_attachment->key.edram_format != + static_cast(desired_config.depth_stencil.format))) { + return false; + } + return true; +} + +CachedRenderPass::CachedRenderPass(VkDevice device, + const RenderConfiguration& desired_config) + : device_(device) { + std::memcpy(&config, &desired_config, sizeof(config)); + + // Initialize all attachments to default unused. + // As we set layout(location=RT) in shaders we must always provide 4. + VkAttachmentDescription attachments[5]; + for (int i = 0; i < 4; ++i) { + attachments[i].flags = 0; + attachments[i].format = VK_FORMAT_UNDEFINED; + attachments[i].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL; + attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL; + } + auto& depth_stencil_attachment = attachments[4]; + depth_stencil_attachment.flags = 0; + depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; + depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL; + depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL; + VkAttachmentReference depth_stencil_attachment_ref; + depth_stencil_attachment_ref.attachment = VK_ATTACHMENT_UNUSED; + depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; + + // Configure attachments based on what's enabled. + VkAttachmentReference color_attachment_refs[4]; + for (int i = 0; i < 4; ++i) { + auto& color_config = config.color[i]; + // TODO(benvanik): see how loose we can be with these. + attachments[i].format = + ColorRenderTargetFormatToVkFormat(color_config.format); + auto& color_attachment_ref = color_attachment_refs[i]; + color_attachment_ref.attachment = i; + color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL; + } + auto& depth_config = config.depth_stencil; + depth_stencil_attachment_ref.attachment = 4; + depth_stencil_attachment.format = + DepthRenderTargetFormatToVkFormat(depth_config.format); + + // Single subpass that writes to our attachments. + VkSubpassDescription subpass_info; + subpass_info.flags = 0; + subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass_info.inputAttachmentCount = 0; + subpass_info.pInputAttachments = nullptr; + subpass_info.colorAttachmentCount = 4; + subpass_info.pColorAttachments = color_attachment_refs; + subpass_info.pResolveAttachments = nullptr; + subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref; + subpass_info.preserveAttachmentCount = 0; + subpass_info.pPreserveAttachments = nullptr; + + // Create the render pass. + VkRenderPassCreateInfo render_pass_info; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.pNext = nullptr; + render_pass_info.attachmentCount = 5; + render_pass_info.pAttachments = attachments; + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &subpass_info; + render_pass_info.dependencyCount = 0; + render_pass_info.pDependencies = nullptr; + auto err = vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle); + CheckResult(err, "vkCreateRenderPass"); +} + +CachedRenderPass::~CachedRenderPass() { + for (auto framebuffer : cached_framebuffers) { + delete framebuffer; + } + cached_framebuffers.clear(); + + vkDestroyRenderPass(device_, handle, nullptr); +} + +bool CachedRenderPass::IsCompatible( + const RenderConfiguration& desired_config) const { + for (int i = 0; i < 4; ++i) { + // TODO(benvanik): allow compatible vulkan formats. + if (config.color[i].format != desired_config.color[i].format) { + return false; + } + } + if (config.depth_stencil.format != desired_config.depth_stencil.format) { + return false; + } + return true; +} + RenderCache::RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device) - : register_file_(register_file), device_(*device) {} + : register_file_(register_file), device_(*device) { + // Create the buffer we'll bind to our memory. + // We do this first so we can get the right memory type. + VkBufferCreateInfo buffer_info; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = nullptr; + buffer_info.flags = 0; + buffer_info.size = kEdramBufferCapacity; + buffer_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + buffer_info.queueFamilyIndexCount = 0; + buffer_info.pQueueFamilyIndices = nullptr; + auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_); + CheckResult(err, "vkCreateBuffer"); -RenderCache::~RenderCache() = default; + // Query requirements for the buffer. + // It should be 1:1. + VkMemoryRequirements buffer_requirements; + vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements); + assert_true(buffer_requirements.size == kEdramBufferCapacity); -VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader) { + // Create a dummy image so we can see what memory bits it requires. + // They should overlap with the buffer requirements but are likely more + // strict. + VkImageCreateInfo test_image_info; + test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + test_image_info.pNext = nullptr; + test_image_info.flags = 0; + test_image_info.imageType = VK_IMAGE_TYPE_2D; + test_image_info.format = VK_FORMAT_R8G8B8A8_UINT; + test_image_info.extent.width = 128; + test_image_info.extent.height = 128; + test_image_info.extent.depth = 1; + test_image_info.mipLevels = 1; + test_image_info.arrayLayers = 1; + test_image_info.samples = VK_SAMPLE_COUNT_1_BIT; + test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + test_image_info.queueFamilyIndexCount = 0; + test_image_info.pQueueFamilyIndices = nullptr; + test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL; + VkImage test_image = nullptr; + err = vkCreateImage(device_, &test_image_info, nullptr, &test_image); + CheckResult(err, "vkCreateImage"); + VkMemoryRequirements image_requirements; + vkGetImageMemoryRequirements(device_, test_image, &image_requirements); + vkDestroyImage(device_, test_image, nullptr); + assert_true((image_requirements.memoryTypeBits & + buffer_requirements.memoryTypeBits) != 0); + + // Allocate EDRAM memory. + VkMemoryRequirements memory_requirements; + memory_requirements.size = buffer_requirements.size; + memory_requirements.alignment = buffer_requirements.alignment; + memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits; + // TODO(benvanik): do we need it host visible? + edram_memory_ = device->AllocateMemory(memory_requirements, 0); + + // Bind buffer to map our entire memory. + vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0); +} + +RenderCache::~RenderCache() { + // TODO(benvanik): wait for idle. + + // Dispose all render passes (and their framebuffers). + for (auto render_pass : cached_render_passes_) { + delete render_pass; + } + cached_render_passes_.clear(); + + // Dispose all of our cached tile views. + for (auto tile_view : cached_tile_views_) { + delete tile_view; + } + cached_tile_views_.clear(); + + // Release underlying EDRAM memory. + vkDestroyBuffer(device_, edram_buffer_, nullptr); + vkFreeMemory(device_, edram_memory_, nullptr); +} + +const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader) { assert_null(current_command_buffer_); current_command_buffer_ = command_buffer; // Lookup or construct a render pass compatible with our current state. - VkRenderPass render_pass = nullptr; + auto config = ¤t_state_.config; + CachedRenderPass* render_pass = nullptr; + CachedFramebuffer* framebuffer = nullptr; + auto& regs = shadow_registers_; + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + if (!dirty && current_state_.render_pass) { + // No registers have changed so we can reuse the previous render pass - + // just begin with what we had. + render_pass = current_state_.render_pass; + framebuffer = current_state_.framebuffer; + } else { + // Re-parse configuration. + if (!ParseConfiguration(config)) { + return nullptr; + } - // Begin render pass. + // Lookup or generate a new render pass and framebuffer for the new state. + if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) { + return nullptr; + } + current_state_.render_pass = render_pass; + current_state_.framebuffer = framebuffer; + } + if (!render_pass) { + return nullptr; + } + + // Setup render pass in command buffer. + // This is meant to preserve previous contents as we may be called + // repeatedly. VkRenderPassBeginInfo render_pass_begin_info; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.pNext = nullptr; - render_pass_begin_info.renderPass = render_pass; - - // Target framebuffer. - // render_pass_begin_info.framebuffer = current_buffer.framebuffer; + render_pass_begin_info.renderPass = render_pass->handle; + render_pass_begin_info.framebuffer = framebuffer->handle; // Render into the entire buffer (or at least tell the API we are doing // this). In theory it'd be better to clip this to the scissor region, but // the docs warn anything but the full framebuffer may be slow. render_pass_begin_info.renderArea.offset.x = 0; render_pass_begin_info.renderArea.offset.y = 0; - // render_pass_begin_info.renderArea.extent.width = surface_width_; - // render_pass_begin_info.renderArea.extent.height = surface_height_; + render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; + render_pass_begin_info.renderArea.extent.height = config->surface_height_px; // Configure clear color, if clearing. - VkClearValue color_clear_value; - color_clear_value.color.float32[0] = 238 / 255.0f; - color_clear_value.color.float32[1] = 238 / 255.0f; - color_clear_value.color.float32[2] = 238 / 255.0f; - color_clear_value.color.float32[3] = 1.0f; - VkClearValue clear_values[] = {color_clear_value}; - render_pass_begin_info.clearValueCount = - static_cast(xe::countof(clear_values)); - render_pass_begin_info.pClearValues = clear_values; + // TODO(benvanik): enable clearing here during resolve? + render_pass_begin_info.clearValueCount = 0; + render_pass_begin_info.pClearValues = nullptr; + // Begin the render pass. vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); - return render_pass; + return ¤t_state_; +} + +bool RenderCache::ParseConfiguration(RenderConfiguration* config) { + auto& regs = shadow_registers_; + + // RB_MODECONTROL + // Rough mode control (color, color+depth, etc). + config->mode_control = static_cast(regs.rb_modecontrol & 0x7); + + // RB_SURFACE_INFO + // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html + config->surface_pitch_px = regs.rb_surface_info & 0x3FFF; + config->surface_msaa = + static_cast((regs.rb_surface_info >> 16) & 0x3); + + // TODO(benvanik): verify min/max so we don't go out of bounds. + // TODO(benvanik): has to be a good way to get height. + // Guess the height from the scissor height. + // It's wildly inaccurate, but I've never seen it be bigger than the + // EDRAM tiling. + uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; + uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; + config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); + + // Color attachment configuration. + if (config->mode_control == ModeControl::kColorDepth) { + uint32_t color_info[4] = { + regs.rb_color_info, regs.rb_color1_info, regs.rb_color2_info, + regs.rb_color3_info, + }; + for (int i = 0; i < 4; ++i) { + config->color[i].edram_base = color_info[i] & 0xFFF; + config->color[i].format = + static_cast((color_info[i] >> 16) & 0xF); + // We don't support GAMMA formats, so switch them to what we do support. + switch (config->color[i].format) { + case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: + config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; + break; + } + } + } else { + for (int i = 0; i < 4; ++i) { + config->color[i].edram_base = 0; + config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; + } + } + + // Depth/stencil attachment configuration. + if (config->mode_control == ModeControl::kColorDepth || + config->mode_control == ModeControl::kDepth) { + config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF; + config->depth_stencil.format = + static_cast((regs.rb_depth_info >> 16) & 0x1); + } else { + config->depth_stencil.edram_base = 0; + config->depth_stencil.format = DepthRenderTargetFormat::kD24S8; + } + + return true; +} + +bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, + CachedRenderPass** out_render_pass, + CachedFramebuffer** out_framebuffer) { + *out_render_pass = nullptr; + *out_framebuffer = nullptr; + + // TODO(benvanik): better lookup. + // Attempt to find the render pass in our cache. + CachedRenderPass* render_pass = nullptr; + for (auto cached_render_pass : cached_render_passes_) { + if (cached_render_pass->IsCompatible(*config)) { + // Found a match. + render_pass = cached_render_pass; + break; + } + } + + // If no render pass was found in the cache create a new one. + if (!render_pass) { + render_pass = new CachedRenderPass(device_, *config); + cached_render_passes_.push_back(render_pass); + } + + // TODO(benvanik): better lookup. + // Attempt to find the framebuffer in the render pass cache. + CachedFramebuffer* framebuffer = nullptr; + for (auto cached_framebuffer : render_pass->cached_framebuffers) { + if (cached_framebuffer->IsCompatible(*config)) { + // Found a match. + framebuffer = cached_framebuffer; + break; + } + } + + // If no framebuffer was found in the cache create a new one. + if (!framebuffer) { + CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, + nullptr}; + for (int i = 0; i < 4; ++i) { + TileViewKey color_key; + color_key.tile_offset = config->color[i].edram_base; + color_key.tile_width = config->surface_pitch_px / 80; + color_key.tile_height = config->surface_height_px / 16; + color_key.color_or_depth = 1; + color_key.edram_format = static_cast(config->color[i].format); + target_color_attachments[i] = GetTileView(color_key); + if (!target_color_attachments) { + XELOGE("Failed to get tile view for color attachment"); + return false; + } + } + + TileViewKey depth_stencil_key; + depth_stencil_key.tile_offset = config->depth_stencil.edram_base; + depth_stencil_key.tile_width = config->surface_pitch_px / 80; + depth_stencil_key.tile_height = config->surface_height_px / 16; + depth_stencil_key.color_or_depth = 0; + depth_stencil_key.edram_format = + static_cast(config->depth_stencil.format); + auto target_depth_stencil_attachment = GetTileView(depth_stencil_key); + if (!target_depth_stencil_attachment) { + XELOGE("Failed to get tile view for depth/stencil attachment"); + return false; + } + + framebuffer = new CachedFramebuffer( + device_, render_pass->handle, config->surface_pitch_px, + config->surface_height_px, target_color_attachments, + target_depth_stencil_attachment); + render_pass->cached_framebuffers.push_back(framebuffer); + } + + *out_render_pass = render_pass; + *out_framebuffer = framebuffer; + return true; +} + +CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) { + // Check the cache. + // TODO(benvanik): better lookup. + for (auto tile_view : cached_tile_views_) { + if (tile_view->IsEqual(view_key)) { + return tile_view; + } + } + + // Create a new tile and add to the cache. + auto tile_view = new CachedTileView(device_, edram_memory_, view_key); + cached_tile_views_.push_back(tile_view); + return tile_view; } void RenderCache::EndRenderPass() { @@ -84,6 +742,15 @@ void RenderCache::ClearCache() { // TODO(benvanik): caching. } +bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + } // namespace vulkan } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/render_cache.h b/src/xenia/gpu/vulkan/render_cache.h index aeaa07264..865b34cfd 100644 --- a/src/xenia/gpu/vulkan/render_cache.h +++ b/src/xenia/gpu/vulkan/render_cache.h @@ -21,6 +21,58 @@ namespace xe { namespace gpu { namespace vulkan { +// TODO(benvanik): make public API? +class CachedTileView; +class CachedFramebuffer; +class CachedRenderPass; + +// Uniquely identifies EDRAM tiles. +struct TileViewKey { + // Offset into EDRAM in 5120b tiles. + uint16_t tile_offset; + // Tile width of the view in base 80x16 tiles. + uint16_t tile_width; + // Tile height of the view in base 80x16 tiles. + uint16_t tile_height; + // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. + uint16_t color_or_depth : 1; + // Either ColorRenderTargetFormat or DepthRenderTargetFormat. + uint16_t edram_format : 15; +}; +static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); + +// Parsed render configuration from the current render state. +struct RenderConfiguration { + // Render mode (color+depth, depth-only, etc). + xenos::ModeControl mode_control; + // Target surface pitch, in pixels. + uint32_t surface_pitch_px; + // ESTIMATED target surface height, in pixels. + uint32_t surface_height_px; + // Surface MSAA setting. + MsaaSamples surface_msaa; + // Color attachments for the 4 render targets. + struct { + uint32_t edram_base; + ColorRenderTargetFormat format; + } color[4]; + // Depth/stencil attachment. + struct { + uint32_t edram_base; + DepthRenderTargetFormat format; + } depth_stencil; +}; + +// Current render state based on the register-specified configuration. +struct RenderState { + // Parsed configuration. + RenderConfiguration config; + // Render pass (to be used with pipelines/etc). + CachedRenderPass* render_pass = nullptr; + // Target framebuffer bound to the render pass. + CachedFramebuffer* framebuffer = nullptr; +}; + // Manages the virtualized EDRAM and the render target cache. // // On the 360 the render target is an opaque block of memory in EDRAM that's @@ -165,9 +217,9 @@ class RenderCache { // Begins a render pass targeting the state-specified framebuffer formats. // The command buffer will be transitioned into the render pass phase. - VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer, - VulkanShader* vertex_shader, - VulkanShader* pixel_shader); + const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, + VulkanShader* vertex_shader, + VulkanShader* pixel_shader); // Ends the current render pass. // The command buffer will be transitioned out of the render pass phase. @@ -177,9 +229,57 @@ class RenderCache { void ClearCache(); private: + // Parses the current state into a configuration object. + bool ParseConfiguration(RenderConfiguration* config); + + // Gets or creates a render pass and frame buffer for the given configuration. + // This attempts to reuse as much as possible across render passes and + // framebuffers. + bool ConfigureRenderPass(RenderConfiguration* config, + CachedRenderPass** out_render_pass, + CachedFramebuffer** out_framebuffer); + + // Gets or creates a tile view with the given parameters. + CachedTileView* GetTileView(const TileViewKey& view_key); + RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; + // Entire 10MiB of EDRAM, aliased to hell by various VkImages. + VkDeviceMemory edram_memory_ = nullptr; + // Buffer overlayed 1:1 with edram_memory_ to allow raw access. + VkBuffer edram_buffer_ = nullptr; + + // Cache of VkImage and VkImageView's for all of our EDRAM tilings. + // TODO(benvanik): non-linear lookup? Should only be a small number of these. + std::vector cached_tile_views_; + + // Cache of render passes based on formats. + std::vector cached_render_passes_; + + // Shadows of the registers that impact the render pass we choose. + // If the registers don't change between passes we can quickly reuse the + // previous one. + struct ShadowRegisters { + uint32_t rb_modecontrol; + uint32_t rb_surface_info; + uint32_t rb_color_info; + uint32_t rb_color1_info; + uint32_t rb_color2_info; + uint32_t rb_color3_info; + uint32_t rb_depth_info; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + + ShadowRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } shadow_registers_; + bool SetShadowRegister(uint32_t* dest, uint32_t register_name); + + // Configuration used for the current/previous Begin/End, representing the + // current shadow register state. + RenderState current_state_; + // Only valid during a BeginRenderPass/EndRenderPass block. VkCommandBuffer current_command_buffer_ = nullptr; }; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 8047bd202..e19e89c29 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -179,9 +179,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // We need the to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); auto pixel_shader = static_cast(active_pixel_shader()); - if (!vertex_shader || !vertex_shader->is_valid() || !pixel_shader || - !pixel_shader->is_valid()) { - // Skipped because we can't understand the shader. + if (!vertex_shader || !vertex_shader->is_valid()) { + // Always need a vertex shader. + return true; + } + // Depth-only mode doesn't need a pixel shader (we'll use a fake one). + if (enable_mode == ModeControl::kDepth) { + // Use a dummy pixel shader when required. + // TODO(benvanik): dummy pixel shader. + assert_not_null(pixel_shader); + } else if (!pixel_shader || !pixel_shader->is_valid()) { + // Need a pixel shader in normal color mode. return true; } @@ -198,16 +206,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, // Begin the render pass. // This will setup our framebuffer and begin the pass in the command buffer. - VkRenderPass render_pass = render_cache_->BeginRenderPass( + auto render_state = render_cache_->BeginRenderPass( command_buffer, vertex_shader, pixel_shader); - if (!render_pass) { + if (!render_state) { return false; } // Configure the pipeline for drawing. // This encodes all render state (blend, depth, etc), our shader stages, // and our vertex input layout. - if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass, + if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state, vertex_shader, pixel_shader, primitive_type)) { render_cache_->EndRenderPass(); @@ -215,12 +223,14 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, } // Upload the constants the shaders require. - auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters( + // These are optional, and if none are defined 0 will be returned. + VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( vertex_shader->constant_register_map()); - auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( pixel_shader->constant_register_map()); if (vertex_constant_offset == VK_WHOLE_SIZE || pixel_constant_offset == VK_WHOLE_SIZE) { + // Shader wants constants but we couldn't upload them. render_cache_->EndRenderPass(); return false; } @@ -307,11 +317,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, if (queue_mutex_) { queue_mutex_->lock(); } - vkQueueWaitIdle(queue_); + err = vkQueueWaitIdle(queue_); + CheckResult(err, "vkQueueWaitIdle"); + err = vkDeviceWaitIdle(*device_); + CheckResult(err, "vkDeviceWaitIdle"); if (queue_mutex_) { queue_mutex_->unlock(); } - command_buffer_pool_->Scavenge(); + while (command_buffer_pool_->has_pending()) { + command_buffer_pool_->Scavenge(); + xe::threading::MaybeYield(); + } vkDestroyFence(*device_, fence, nullptr); return true; diff --git a/src/xenia/gpu/vulkan/vulkan_shader.h b/src/xenia/gpu/vulkan/vulkan_shader.h index 97dbd5822..1dc55d8b0 100644 --- a/src/xenia/gpu/vulkan/vulkan_shader.h +++ b/src/xenia/gpu/vulkan/vulkan_shader.h @@ -31,7 +31,7 @@ class VulkanShader : public Shader { bool Prepare(); private: - VkDevice device_ = nullptr; + VkDevice device_ = nullptr; VkShaderModule shader_module_ = nullptr; }; diff --git a/src/xenia/ui/vulkan/fenced_pools.h b/src/xenia/ui/vulkan/fenced_pools.h index 52274a9d9..3bc7e30f6 100644 --- a/src/xenia/ui/vulkan/fenced_pools.h +++ b/src/xenia/ui/vulkan/fenced_pools.h @@ -47,6 +47,9 @@ class BaseFencedPool { } } + // True if one or more batches are still pending on the GPU. + bool has_pending() const { return pending_batch_list_head_ != nullptr; } + // Checks all pending batches for completion and scavenges their entries. // This should be called as frequently as reasonable. void Scavenge() { diff --git a/src/xenia/ui/vulkan/vulkan.cc b/src/xenia/ui/vulkan/vulkan.cc index ba889e109..77c8fb548 100644 --- a/src/xenia/ui/vulkan/vulkan.cc +++ b/src/xenia/ui/vulkan/vulkan.cc @@ -10,3 +10,7 @@ #include "xenia/ui/vulkan/vulkan.h" DEFINE_bool(vulkan_validation, false, "Enable Vulkan validation layers."); + +DEFINE_bool(vulkan_primary_queue_only, false, + "Force the use of the primary queue, ignoring any additional that " + "may be present."); diff --git a/src/xenia/ui/vulkan/vulkan.h b/src/xenia/ui/vulkan/vulkan.h index 697c9fa57..7a7e64f10 100644 --- a/src/xenia/ui/vulkan/vulkan.h +++ b/src/xenia/ui/vulkan/vulkan.h @@ -30,5 +30,6 @@ #define XELOGVK XELOGI DECLARE_bool(vulkan_validation); +DECLARE_bool(vulkan_primary_queue_only); #endif // XENIA_UI_VULKAN_VULKAN_H_ diff --git a/src/xenia/ui/vulkan/vulkan_context.cc b/src/xenia/ui/vulkan/vulkan_context.cc index a2c5998f4..381fb0ab7 100644 --- a/src/xenia/ui/vulkan/vulkan_context.cc +++ b/src/xenia/ui/vulkan/vulkan_context.cc @@ -142,7 +142,7 @@ void VulkanContext::EndSwap() { } std::unique_ptr VulkanContext::Capture() { - assert_always(); + // TODO(benvanik): read back swap chain front buffer. return nullptr; } diff --git a/src/xenia/ui/vulkan/vulkan_device.cc b/src/xenia/ui/vulkan/vulkan_device.cc index c7ca1d974..ded29212d 100644 --- a/src/xenia/ui/vulkan/vulkan_device.cc +++ b/src/xenia/ui/vulkan/vulkan_device.cc @@ -129,6 +129,11 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) { return false; } + // Some tools *cough* renderdoc *cough* can't handle multiple queues. + if (FLAGS_vulkan_primary_queue_only) { + queue_count = 1; + } + VkDeviceQueueCreateInfo queue_info; queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue_info.pNext = nullptr;