2715 lines
117 KiB
C++
2715 lines
117 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2022 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <iterator>
|
|
#include <tuple>
|
|
#include <utility>
|
|
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/math.h"
|
|
#include "xenia/base/profiling.h"
|
|
#include "xenia/gpu/draw_util.h"
|
|
#include "xenia/gpu/gpu_flags.h"
|
|
#include "xenia/gpu/registers.h"
|
|
#include "xenia/gpu/shader.h"
|
|
#include "xenia/gpu/spirv_shader_translator.h"
|
|
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
|
|
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
|
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
|
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
|
#include "xenia/gpu/xenos.h"
|
|
#include "xenia/ui/vulkan/vulkan_presenter.h"
|
|
#include "xenia/ui/vulkan/vulkan_provider.h"
|
|
#include "xenia/ui/vulkan/vulkan_util.h"
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace vulkan {
|
|
|
|
// Generated with `xb buildshaders`.
|
|
namespace shaders {
|
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_tc_vs.h"
|
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/uv_ps.h"
|
|
} // namespace shaders
|
|
|
|
VulkanCommandProcessor::VulkanCommandProcessor(
|
|
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
|
: CommandProcessor(graphics_system, kernel_state),
|
|
deferred_command_buffer_(*this) {}
|
|
|
|
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
|
|
|
|
void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
|
uint32_t length) {
|
|
shared_memory_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true);
|
|
}
|
|
|
|
void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {}
|
|
|
|
bool VulkanCommandProcessor::SetupContext() {
|
|
if (!CommandProcessor::SetupContext()) {
|
|
XELOGE("Failed to initialize base command processor context");
|
|
return false;
|
|
}
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
|
|
|
guest_shader_pipeline_stages_ = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
|
guest_shader_vertex_stages_ = VK_SHADER_STAGE_VERTEX_BIT;
|
|
if (device_features.tessellationShader) {
|
|
guest_shader_pipeline_stages_ |=
|
|
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
|
|
guest_shader_vertex_stages_ |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
|
|
}
|
|
if (!device_features.vertexPipelineStoresAndAtomics) {
|
|
// For memory export from vertex shaders converted to compute shaders.
|
|
guest_shader_pipeline_stages_ |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
|
guest_shader_vertex_stages_ |= VK_SHADER_STAGE_COMPUTE_BIT;
|
|
}
|
|
|
|
// No specific reason for 32768, just the "too much" amount from Direct3D 12
|
|
// PIX warnings.
|
|
transient_descriptor_pool_uniform_buffers_ =
|
|
std::make_unique<ui::vulkan::TransientDescriptorPool>(
|
|
provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768);
|
|
// 16384 is bigger than any single uniform buffer that Xenia needs, but is the
|
|
// minimum maxUniformBufferRange, thus the safe minimum amount.
|
|
VkDeviceSize uniform_buffer_alignment = std::max(
|
|
provider.device_properties().limits.minUniformBufferOffsetAlignment,
|
|
VkDeviceSize(1));
|
|
uniform_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
|
|
provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
|
xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize,
|
|
size_t(16384)),
|
|
size_t(uniform_buffer_alignment)));
|
|
|
|
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info;
|
|
descriptor_set_layout_create_info.sType =
|
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
|
descriptor_set_layout_create_info.pNext = nullptr;
|
|
descriptor_set_layout_create_info.flags = 0;
|
|
descriptor_set_layout_create_info.bindingCount = 0;
|
|
descriptor_set_layout_create_info.pBindings = nullptr;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_empty_) != VK_SUCCESS) {
|
|
XELOGE("Failed to create an empty Vulkan descriptor set layout");
|
|
return false;
|
|
}
|
|
VkShaderStageFlags guest_shader_stages =
|
|
guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
VkDescriptorSetLayoutBinding descriptor_set_layout_binding_uniform_buffer;
|
|
descriptor_set_layout_binding_uniform_buffer.binding = 0;
|
|
descriptor_set_layout_binding_uniform_buffer.descriptorType =
|
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
descriptor_set_layout_binding_uniform_buffer.descriptorCount = 1;
|
|
descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages;
|
|
descriptor_set_layout_binding_uniform_buffer.pImmutableSamplers = nullptr;
|
|
descriptor_set_layout_create_info.bindingCount = 1;
|
|
descriptor_set_layout_create_info.pBindings =
|
|
&descriptor_set_layout_binding_uniform_buffer;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_fetch_bool_loop_constants_) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for the fetch, bool "
|
|
"and loop constants uniform buffer");
|
|
return false;
|
|
}
|
|
descriptor_set_layout_binding_uniform_buffer.stageFlags =
|
|
guest_shader_vertex_stages_;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_float_constants_vertex_) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for the vertex shader "
|
|
"float constants uniform buffer");
|
|
return false;
|
|
}
|
|
descriptor_set_layout_binding_uniform_buffer.stageFlags =
|
|
VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_float_constants_pixel_) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for the pixel shader "
|
|
"float constants uniform buffer");
|
|
return false;
|
|
}
|
|
descriptor_set_layout_binding_uniform_buffer.stageFlags = guest_shader_stages;
|
|
if (device_features.tessellationShader) {
|
|
descriptor_set_layout_binding_uniform_buffer.stageFlags |=
|
|
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
|
}
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_system_constants_) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for the system "
|
|
"constants uniform buffer");
|
|
return false;
|
|
}
|
|
uint32_t shared_memory_binding_count_log2 =
|
|
SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2(
|
|
provider.device_properties().limits.maxStorageBufferRange);
|
|
uint32_t shared_memory_binding_count = uint32_t(1)
|
|
<< shared_memory_binding_count_log2;
|
|
VkDescriptorSetLayoutBinding
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[1];
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[0].binding = 0;
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorType =
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorCount =
|
|
shared_memory_binding_count;
|
|
// TODO(Triang3l): When fullDrawIndexUint32 fallback is added, force host
|
|
// vertex shader access to the shared memory for the tessellation vertex
|
|
// shader (to retrieve tessellation factors).
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags =
|
|
guest_shader_stages;
|
|
descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers =
|
|
nullptr;
|
|
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
|
// interlocks case.
|
|
descriptor_set_layout_create_info.pBindings =
|
|
descriptor_set_layout_bindings_shared_memory_and_edram;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for the shared memory "
|
|
"and the EDRAM");
|
|
return false;
|
|
}
|
|
|
|
shared_memory_ = std::make_unique<VulkanSharedMemory>(
|
|
*this, *memory_, trace_writer_, guest_shader_pipeline_stages_);
|
|
if (!shared_memory_->Initialize()) {
|
|
XELOGE("Failed to initialize shared memory");
|
|
return false;
|
|
}
|
|
|
|
primitive_processor_ = std::make_unique<VulkanPrimitiveProcessor>(
|
|
*register_file_, *memory_, trace_writer_, *shared_memory_, *this);
|
|
if (!primitive_processor_->Initialize()) {
|
|
XELOGE("Failed to initialize the geometric primitive processor");
|
|
return false;
|
|
}
|
|
|
|
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
|
// supports resolution scaling.
|
|
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
|
*register_file_, *memory_, &trace_writer_, 1, 1, *this);
|
|
if (!render_target_cache_->Initialize()) {
|
|
XELOGE("Failed to initialize the render target cache");
|
|
return false;
|
|
}
|
|
|
|
pipeline_cache_ = std::make_unique<VulkanPipelineCache>(
|
|
*this, *register_file_, *render_target_cache_,
|
|
guest_shader_vertex_stages_);
|
|
if (!pipeline_cache_->Initialize()) {
|
|
XELOGE("Failed to initialize the graphics pipeline cache");
|
|
return false;
|
|
}
|
|
|
|
// Shared memory and EDRAM common bindings.
|
|
VkDescriptorPoolSize descriptor_pool_sizes[1];
|
|
descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
descriptor_pool_sizes[0].descriptorCount = shared_memory_binding_count;
|
|
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
|
// interlocks case.
|
|
VkDescriptorPoolCreateInfo descriptor_pool_create_info;
|
|
descriptor_pool_create_info.sType =
|
|
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
|
descriptor_pool_create_info.pNext = nullptr;
|
|
descriptor_pool_create_info.flags = 0;
|
|
descriptor_pool_create_info.maxSets = 1;
|
|
descriptor_pool_create_info.poolSizeCount = 1;
|
|
descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes;
|
|
if (dfn.vkCreateDescriptorPool(device, &descriptor_pool_create_info, nullptr,
|
|
&shared_memory_and_edram_descriptor_pool_) !=
|
|
VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create the Vulkan descriptor pool for shared memory and "
|
|
"EDRAM");
|
|
return false;
|
|
}
|
|
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
|
|
descriptor_set_allocate_info.sType =
|
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
|
descriptor_set_allocate_info.pNext = nullptr;
|
|
descriptor_set_allocate_info.descriptorPool =
|
|
shared_memory_and_edram_descriptor_pool_;
|
|
descriptor_set_allocate_info.descriptorSetCount = 1;
|
|
descriptor_set_allocate_info.pSetLayouts =
|
|
&descriptor_set_layout_shared_memory_and_edram_;
|
|
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
|
&shared_memory_and_edram_descriptor_set_) !=
|
|
VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to allocate the Vulkan descriptor set for shared memory and "
|
|
"EDRAM");
|
|
return false;
|
|
}
|
|
VkDescriptorBufferInfo
|
|
shared_memory_descriptor_buffers_info[SharedMemory::kBufferSize /
|
|
(128 << 20)];
|
|
uint32_t shared_memory_binding_range =
|
|
SharedMemory::kBufferSize >> shared_memory_binding_count_log2;
|
|
for (uint32_t i = 0; i < shared_memory_binding_count; ++i) {
|
|
VkDescriptorBufferInfo& shared_memory_descriptor_buffer_info =
|
|
shared_memory_descriptor_buffers_info[i];
|
|
shared_memory_descriptor_buffer_info.buffer = shared_memory_->buffer();
|
|
shared_memory_descriptor_buffer_info.offset =
|
|
shared_memory_binding_range * i;
|
|
shared_memory_descriptor_buffer_info.range = shared_memory_binding_range;
|
|
}
|
|
VkWriteDescriptorSet write_descriptor_sets[1];
|
|
write_descriptor_sets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
write_descriptor_sets[0].pNext = nullptr;
|
|
write_descriptor_sets[0].dstSet = shared_memory_and_edram_descriptor_set_;
|
|
write_descriptor_sets[0].dstBinding = 0;
|
|
write_descriptor_sets[0].dstArrayElement = 0;
|
|
write_descriptor_sets[0].descriptorCount = shared_memory_binding_count;
|
|
write_descriptor_sets[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
write_descriptor_sets[0].pImageInfo = nullptr;
|
|
write_descriptor_sets[0].pBufferInfo = shared_memory_descriptor_buffers_info;
|
|
write_descriptor_sets[0].pTexelBufferView = nullptr;
|
|
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
|
// interlocks case.
|
|
dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr);
|
|
|
|
// Swap objects.
|
|
|
|
// Swap render pass. Doesn't make assumptions about outer usage (explicit
|
|
// barriers must be used instead) for simplicity of use in different scenarios
|
|
// with different pipelines.
|
|
VkAttachmentDescription swap_render_pass_attachment;
|
|
swap_render_pass_attachment.flags = 0;
|
|
swap_render_pass_attachment.format =
|
|
ui::vulkan::VulkanPresenter::kGuestOutputFormat;
|
|
swap_render_pass_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
|
swap_render_pass_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
swap_render_pass_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
|
swap_render_pass_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
|
swap_render_pass_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
|
swap_render_pass_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
|
swap_render_pass_attachment.finalLayout =
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
VkAttachmentReference swap_render_pass_color_attachment;
|
|
swap_render_pass_color_attachment.attachment = 0;
|
|
swap_render_pass_color_attachment.layout =
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
|
VkSubpassDescription swap_render_pass_subpass = {};
|
|
swap_render_pass_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
|
swap_render_pass_subpass.colorAttachmentCount = 1;
|
|
swap_render_pass_subpass.pColorAttachments =
|
|
&swap_render_pass_color_attachment;
|
|
VkSubpassDependency swap_render_pass_dependencies[2];
|
|
for (uint32_t i = 0; i < 2; ++i) {
|
|
VkSubpassDependency& swap_render_pass_dependency =
|
|
swap_render_pass_dependencies[i];
|
|
swap_render_pass_dependency.srcSubpass = i ? 0 : VK_SUBPASS_EXTERNAL;
|
|
swap_render_pass_dependency.dstSubpass = i ? VK_SUBPASS_EXTERNAL : 0;
|
|
swap_render_pass_dependency.srcStageMask =
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
swap_render_pass_dependency.dstStageMask =
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
swap_render_pass_dependency.srcAccessMask =
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
swap_render_pass_dependency.dstAccessMask =
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
swap_render_pass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
|
|
}
|
|
VkRenderPassCreateInfo swap_render_pass_create_info;
|
|
swap_render_pass_create_info.sType =
|
|
VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
|
swap_render_pass_create_info.pNext = nullptr;
|
|
swap_render_pass_create_info.flags = 0;
|
|
swap_render_pass_create_info.attachmentCount = 1;
|
|
swap_render_pass_create_info.pAttachments = &swap_render_pass_attachment;
|
|
swap_render_pass_create_info.subpassCount = 1;
|
|
swap_render_pass_create_info.pSubpasses = &swap_render_pass_subpass;
|
|
swap_render_pass_create_info.dependencyCount =
|
|
uint32_t(xe::countof(swap_render_pass_dependencies));
|
|
swap_render_pass_create_info.pDependencies = swap_render_pass_dependencies;
|
|
if (dfn.vkCreateRenderPass(device, &swap_render_pass_create_info, nullptr,
|
|
&swap_render_pass_) != VK_SUCCESS) {
|
|
XELOGE("Failed to create the Vulkan render pass for presentation");
|
|
return false;
|
|
}
|
|
|
|
// Swap pipeline layout.
|
|
// TODO(Triang3l): Source binding, push constants, FXAA pipeline layout.
|
|
VkPipelineLayoutCreateInfo swap_pipeline_layout_create_info;
|
|
swap_pipeline_layout_create_info.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
|
swap_pipeline_layout_create_info.pNext = nullptr;
|
|
swap_pipeline_layout_create_info.flags = 0;
|
|
swap_pipeline_layout_create_info.setLayoutCount = 0;
|
|
swap_pipeline_layout_create_info.pSetLayouts = nullptr;
|
|
swap_pipeline_layout_create_info.pushConstantRangeCount = 0;
|
|
swap_pipeline_layout_create_info.pPushConstantRanges = nullptr;
|
|
if (dfn.vkCreatePipelineLayout(device, &swap_pipeline_layout_create_info,
|
|
nullptr,
|
|
&swap_pipeline_layout_) != VK_SUCCESS) {
|
|
XELOGE("Failed to create the Vulkan pipeline layout for presentation");
|
|
return false;
|
|
}
|
|
|
|
// Swap pipeline.
|
|
|
|
VkPipelineShaderStageCreateInfo swap_pipeline_stages[2];
|
|
swap_pipeline_stages[0].sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
swap_pipeline_stages[0].pNext = nullptr;
|
|
swap_pipeline_stages[0].flags = 0;
|
|
swap_pipeline_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
|
|
swap_pipeline_stages[0].module = ui::vulkan::util::CreateShaderModule(
|
|
provider, shaders::fullscreen_tc_vs, sizeof(shaders::fullscreen_tc_vs));
|
|
if (swap_pipeline_stages[0].module == VK_NULL_HANDLE) {
|
|
XELOGE("Failed to create the Vulkan vertex shader module for presentation");
|
|
return false;
|
|
}
|
|
swap_pipeline_stages[0].pName = "main";
|
|
swap_pipeline_stages[0].pSpecializationInfo = nullptr;
|
|
swap_pipeline_stages[1].sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
swap_pipeline_stages[1].pNext = nullptr;
|
|
swap_pipeline_stages[1].flags = 0;
|
|
swap_pipeline_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
swap_pipeline_stages[1].module = ui::vulkan::util::CreateShaderModule(
|
|
provider, shaders::uv_ps, sizeof(shaders::uv_ps));
|
|
if (swap_pipeline_stages[1].module == VK_NULL_HANDLE) {
|
|
XELOGE(
|
|
"Failed to create the Vulkan fragment shader module for presentation");
|
|
dfn.vkDestroyShaderModule(device, swap_pipeline_stages[0].module, nullptr);
|
|
return false;
|
|
}
|
|
swap_pipeline_stages[1].pName = "main";
|
|
swap_pipeline_stages[1].pSpecializationInfo = nullptr;
|
|
|
|
VkPipelineVertexInputStateCreateInfo swap_pipeline_vertex_input_state = {};
|
|
swap_pipeline_vertex_input_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
|
|
|
VkPipelineInputAssemblyStateCreateInfo swap_pipeline_input_assembly_state;
|
|
swap_pipeline_input_assembly_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
|
swap_pipeline_input_assembly_state.pNext = nullptr;
|
|
swap_pipeline_input_assembly_state.flags = 0;
|
|
swap_pipeline_input_assembly_state.topology =
|
|
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
swap_pipeline_input_assembly_state.primitiveRestartEnable = VK_FALSE;
|
|
|
|
VkPipelineViewportStateCreateInfo swap_pipeline_viewport_state;
|
|
swap_pipeline_viewport_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
|
swap_pipeline_viewport_state.pNext = nullptr;
|
|
swap_pipeline_viewport_state.flags = 0;
|
|
swap_pipeline_viewport_state.viewportCount = 1;
|
|
swap_pipeline_viewport_state.pViewports = nullptr;
|
|
swap_pipeline_viewport_state.scissorCount = 1;
|
|
swap_pipeline_viewport_state.pScissors = nullptr;
|
|
|
|
VkPipelineRasterizationStateCreateInfo swap_pipeline_rasterization_state = {};
|
|
swap_pipeline_rasterization_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
|
swap_pipeline_rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
|
|
swap_pipeline_rasterization_state.cullMode = VK_CULL_MODE_NONE;
|
|
swap_pipeline_rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
|
swap_pipeline_rasterization_state.lineWidth = 1.0f;
|
|
|
|
VkPipelineMultisampleStateCreateInfo swap_pipeline_multisample_state = {};
|
|
swap_pipeline_multisample_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
|
swap_pipeline_multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
|
|
|
VkPipelineColorBlendAttachmentState
|
|
swap_pipeline_color_blend_attachment_state = {};
|
|
swap_pipeline_color_blend_attachment_state.colorWriteMask =
|
|
VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
|
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
|
|
VkPipelineColorBlendStateCreateInfo swap_pipeline_color_blend_state = {};
|
|
swap_pipeline_color_blend_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
|
swap_pipeline_color_blend_state.attachmentCount = 1;
|
|
swap_pipeline_color_blend_state.pAttachments =
|
|
&swap_pipeline_color_blend_attachment_state;
|
|
|
|
static const VkDynamicState kSwapPipelineDynamicStates[] = {
|
|
VK_DYNAMIC_STATE_VIEWPORT,
|
|
VK_DYNAMIC_STATE_SCISSOR,
|
|
};
|
|
VkPipelineDynamicStateCreateInfo swap_pipeline_dynamic_state;
|
|
swap_pipeline_dynamic_state.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
|
swap_pipeline_dynamic_state.pNext = nullptr;
|
|
swap_pipeline_dynamic_state.flags = 0;
|
|
swap_pipeline_dynamic_state.dynamicStateCount =
|
|
uint32_t(xe::countof(kSwapPipelineDynamicStates));
|
|
swap_pipeline_dynamic_state.pDynamicStates = kSwapPipelineDynamicStates;
|
|
|
|
VkGraphicsPipelineCreateInfo swap_pipeline_create_info;
|
|
swap_pipeline_create_info.sType =
|
|
VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
|
swap_pipeline_create_info.pNext = nullptr;
|
|
swap_pipeline_create_info.flags = 0;
|
|
swap_pipeline_create_info.stageCount =
|
|
uint32_t(xe::countof(swap_pipeline_stages));
|
|
swap_pipeline_create_info.pStages = swap_pipeline_stages;
|
|
swap_pipeline_create_info.pVertexInputState =
|
|
&swap_pipeline_vertex_input_state;
|
|
swap_pipeline_create_info.pInputAssemblyState =
|
|
&swap_pipeline_input_assembly_state;
|
|
swap_pipeline_create_info.pTessellationState = nullptr;
|
|
swap_pipeline_create_info.pViewportState = &swap_pipeline_viewport_state;
|
|
swap_pipeline_create_info.pRasterizationState =
|
|
&swap_pipeline_rasterization_state;
|
|
swap_pipeline_create_info.pMultisampleState =
|
|
&swap_pipeline_multisample_state;
|
|
swap_pipeline_create_info.pDepthStencilState = nullptr;
|
|
swap_pipeline_create_info.pColorBlendState = &swap_pipeline_color_blend_state;
|
|
swap_pipeline_create_info.pDynamicState = &swap_pipeline_dynamic_state;
|
|
swap_pipeline_create_info.layout = swap_pipeline_layout_;
|
|
swap_pipeline_create_info.renderPass = swap_render_pass_;
|
|
swap_pipeline_create_info.subpass = 0;
|
|
swap_pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
|
|
swap_pipeline_create_info.basePipelineIndex = -1;
|
|
VkResult swap_pipeline_create_result = dfn.vkCreateGraphicsPipelines(
|
|
device, VK_NULL_HANDLE, 1, &swap_pipeline_create_info, nullptr,
|
|
&swap_pipeline_);
|
|
for (size_t i = 0; i < xe::countof(swap_pipeline_stages); ++i) {
|
|
dfn.vkDestroyShaderModule(device, swap_pipeline_stages[i].module, nullptr);
|
|
}
|
|
if (swap_pipeline_create_result != VK_SUCCESS) {
|
|
XELOGE("Failed to create the Vulkan pipeline for presentation");
|
|
return false;
|
|
}
|
|
|
|
// Just not to expose uninitialized memory.
|
|
std::memset(&system_constants_, 0, sizeof(system_constants_));
|
|
|
|
return true;
|
|
}
|
|
|
|
void VulkanCommandProcessor::ShutdownContext() {
|
|
AwaitAllQueueOperationsCompletion();
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
|
|
for (const auto& framebuffer_pair : swap_framebuffers_outdated_) {
|
|
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
|
|
}
|
|
swap_framebuffers_outdated_.clear();
|
|
for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) {
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device,
|
|
swap_framebuffer.framebuffer);
|
|
}
|
|
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device,
|
|
swap_pipeline_);
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device,
|
|
swap_pipeline_layout_);
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device,
|
|
swap_render_pass_);
|
|
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorPool, device,
|
|
shared_memory_and_edram_descriptor_pool_);
|
|
|
|
pipeline_cache_.reset();
|
|
|
|
render_target_cache_.reset();
|
|
|
|
primitive_processor_.reset();
|
|
|
|
shared_memory_.reset();
|
|
|
|
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
|
|
dfn.vkDestroyPipelineLayout(
|
|
device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr);
|
|
}
|
|
pipeline_layouts_.clear();
|
|
for (const auto& descriptor_set_layout_pair :
|
|
descriptor_set_layouts_textures_) {
|
|
dfn.vkDestroyDescriptorSetLayout(device, descriptor_set_layout_pair.second,
|
|
nullptr);
|
|
}
|
|
descriptor_set_layouts_textures_.clear();
|
|
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorSetLayout, device,
|
|
descriptor_set_layout_shared_memory_and_edram_);
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorSetLayout, device,
|
|
descriptor_set_layout_system_constants_);
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorSetLayout, device,
|
|
descriptor_set_layout_float_constants_pixel_);
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorSetLayout, device,
|
|
descriptor_set_layout_float_constants_vertex_);
|
|
ui::vulkan::util::DestroyAndNullHandle(
|
|
dfn.vkDestroyDescriptorSetLayout, device,
|
|
descriptor_set_layout_fetch_bool_loop_constants_);
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
|
device, descriptor_set_layout_empty_);
|
|
|
|
uniform_buffer_pool_.reset();
|
|
transient_descriptor_pool_uniform_buffers_.reset();
|
|
|
|
sparse_bind_wait_stage_mask_ = 0;
|
|
sparse_buffer_binds_.clear();
|
|
sparse_memory_binds_.clear();
|
|
|
|
deferred_command_buffer_.Reset();
|
|
for (const auto& command_buffer_pair : command_buffers_submitted_) {
|
|
dfn.vkDestroyCommandPool(device, command_buffer_pair.second.pool, nullptr);
|
|
}
|
|
command_buffers_submitted_.clear();
|
|
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
|
|
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
|
|
}
|
|
command_buffers_writable_.clear();
|
|
|
|
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
|
|
frame_completed_ = 0;
|
|
frame_current_ = 1;
|
|
frame_open_ = false;
|
|
|
|
for (const auto& semaphore : submissions_in_flight_semaphores_) {
|
|
dfn.vkDestroySemaphore(device, semaphore.second, nullptr);
|
|
}
|
|
submissions_in_flight_semaphores_.clear();
|
|
for (VkFence& fence : submissions_in_flight_fences_) {
|
|
dfn.vkDestroyFence(device, fence, nullptr);
|
|
}
|
|
submissions_in_flight_fences_.clear();
|
|
current_submission_wait_stage_masks_.clear();
|
|
for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
|
|
dfn.vkDestroySemaphore(device, semaphore, nullptr);
|
|
}
|
|
current_submission_wait_semaphores_.clear();
|
|
submission_completed_ = 0;
|
|
submission_open_ = false;
|
|
|
|
for (VkSemaphore semaphore : semaphores_free_) {
|
|
dfn.vkDestroySemaphore(device, semaphore, nullptr);
|
|
}
|
|
semaphores_free_.clear();
|
|
for (VkFence fence : fences_free_) {
|
|
dfn.vkDestroyFence(device, fence, nullptr);
|
|
}
|
|
fences_free_.clear();
|
|
|
|
device_lost_ = false;
|
|
|
|
CommandProcessor::ShutdownContext();
|
|
}
|
|
|
|
void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|
CommandProcessor::WriteRegister(index, value);
|
|
|
|
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
|
|
if (frame_open_) {
|
|
uint32_t float_constant_index =
|
|
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
|
|
if (float_constant_index >= 256) {
|
|
float_constant_index -= 256;
|
|
if (current_float_constant_map_pixel_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
|
|
}
|
|
} else {
|
|
if (current_float_constant_map_vertex_[float_constant_index >> 6] &
|
|
(1ull << (float_constant_index & 63))) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
|
|
}
|
|
}
|
|
}
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &= ~(
|
|
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants);
|
|
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
|
|
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants);
|
|
}
|
|
}
|
|
|
|
void VulkanCommandProcessor::SparseBindBuffer(
|
|
VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds,
|
|
VkPipelineStageFlags wait_stage_mask) {
|
|
if (!bind_count) {
|
|
return;
|
|
}
|
|
SparseBufferBind& buffer_bind = sparse_buffer_binds_.emplace_back();
|
|
buffer_bind.buffer = buffer;
|
|
buffer_bind.bind_offset = sparse_memory_binds_.size();
|
|
buffer_bind.bind_count = bind_count;
|
|
sparse_memory_binds_.reserve(sparse_memory_binds_.size() + bind_count);
|
|
sparse_memory_binds_.insert(sparse_memory_binds_.end(), binds,
|
|
binds + bind_count);
|
|
sparse_bind_wait_stage_mask_ |= wait_stage_mask;
|
|
}
|
|
|
|
void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
|
|
uint32_t frontbuffer_width,
|
|
uint32_t frontbuffer_height) {
|
|
// FIXME(Triang3l): frontbuffer_ptr is currently unreliable, in the trace
|
|
// player it's set to 0, but it's not needed anyway since the fetch constant
|
|
// contains the address.
|
|
|
|
SCOPE_profile_cpu_f("gpu");
|
|
|
|
ui::Presenter* presenter = graphics_system_->presenter();
|
|
if (!presenter) {
|
|
return;
|
|
}
|
|
|
|
// TODO(Triang3l): Resolution scale.
|
|
uint32_t resolution_scale = 1;
|
|
uint32_t scaled_width = frontbuffer_width * resolution_scale;
|
|
uint32_t scaled_height = frontbuffer_height * resolution_scale;
|
|
presenter->RefreshGuestOutput(
|
|
scaled_width, scaled_height, 1280 * resolution_scale,
|
|
720 * resolution_scale,
|
|
[this, scaled_width, scaled_height](
|
|
ui::Presenter::GuestOutputRefreshContext& context) -> bool {
|
|
// In case the swap command is the only one in the frame.
|
|
if (!BeginSubmission(true)) {
|
|
return false;
|
|
}
|
|
|
|
auto& vulkan_context = static_cast<
|
|
ui::vulkan::VulkanPresenter::VulkanGuestOutputRefreshContext&>(
|
|
context);
|
|
uint64_t guest_output_image_version = vulkan_context.image_version();
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
|
|
// Make sure a framebuffer is available for the current guest output
|
|
// image version.
|
|
size_t swap_framebuffer_index = SIZE_MAX;
|
|
size_t swap_framebuffer_new_index = SIZE_MAX;
|
|
// Try to find the existing framebuffer for the current guest output
|
|
// image version, or an unused (without an existing framebuffer, or with
|
|
// one, but that has never actually been used dynamically) slot.
|
|
for (size_t i = 0; i < swap_framebuffers_.size(); ++i) {
|
|
const SwapFramebuffer& existing_swap_framebuffer =
|
|
swap_framebuffers_[i];
|
|
if (existing_swap_framebuffer.framebuffer != VK_NULL_HANDLE &&
|
|
existing_swap_framebuffer.version == guest_output_image_version) {
|
|
swap_framebuffer_index = i;
|
|
break;
|
|
}
|
|
if (existing_swap_framebuffer.framebuffer == VK_NULL_HANDLE ||
|
|
!existing_swap_framebuffer.last_submission) {
|
|
swap_framebuffer_new_index = i;
|
|
}
|
|
}
|
|
if (swap_framebuffer_index == SIZE_MAX) {
|
|
if (swap_framebuffer_new_index == SIZE_MAX) {
|
|
// Replace the earliest used framebuffer.
|
|
swap_framebuffer_new_index = 0;
|
|
for (size_t i = 1; i < swap_framebuffers_.size(); ++i) {
|
|
if (swap_framebuffers_[i].last_submission <
|
|
swap_framebuffers_[swap_framebuffer_new_index]
|
|
.last_submission) {
|
|
swap_framebuffer_new_index = i;
|
|
}
|
|
}
|
|
}
|
|
swap_framebuffer_index = swap_framebuffer_new_index;
|
|
SwapFramebuffer& new_swap_framebuffer =
|
|
swap_framebuffers_[swap_framebuffer_new_index];
|
|
if (new_swap_framebuffer.framebuffer != VK_NULL_HANDLE) {
|
|
if (submission_completed_ >= new_swap_framebuffer.last_submission) {
|
|
dfn.vkDestroyFramebuffer(device, new_swap_framebuffer.framebuffer,
|
|
nullptr);
|
|
} else {
|
|
swap_framebuffers_outdated_.emplace_back(
|
|
new_swap_framebuffer.last_submission,
|
|
new_swap_framebuffer.framebuffer);
|
|
}
|
|
new_swap_framebuffer.framebuffer = VK_NULL_HANDLE;
|
|
}
|
|
VkImageView guest_output_image_view_srgb =
|
|
vulkan_context.image_view();
|
|
VkFramebufferCreateInfo swap_framebuffer_create_info;
|
|
swap_framebuffer_create_info.sType =
|
|
VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
|
swap_framebuffer_create_info.pNext = nullptr;
|
|
swap_framebuffer_create_info.flags = 0;
|
|
swap_framebuffer_create_info.renderPass = swap_render_pass_;
|
|
swap_framebuffer_create_info.attachmentCount = 1;
|
|
swap_framebuffer_create_info.pAttachments =
|
|
&guest_output_image_view_srgb;
|
|
swap_framebuffer_create_info.width = scaled_width;
|
|
swap_framebuffer_create_info.height = scaled_height;
|
|
swap_framebuffer_create_info.layers = 1;
|
|
if (dfn.vkCreateFramebuffer(
|
|
device, &swap_framebuffer_create_info, nullptr,
|
|
&new_swap_framebuffer.framebuffer) != VK_SUCCESS) {
|
|
XELOGE("Failed to create the Vulkan framebuffer for presentation");
|
|
return false;
|
|
}
|
|
new_swap_framebuffer.version = guest_output_image_version;
|
|
// The actual submission index will be set if the framebuffer is
|
|
// actually used, not dropped due to some error.
|
|
new_swap_framebuffer.last_submission = 0;
|
|
}
|
|
|
|
|
|
if (vulkan_context.image_ever_written_previously()) {
|
|
// Insert a barrier after the last presenter's usage of the guest
|
|
// output image. Will be overwriting all the contents, so oldLayout
|
|
// layout is UNDEFINED. The render pass will do the layout transition,
|
|
// but newLayout must not be UNDEFINED.
|
|
PushImageMemoryBarrier(
|
|
vulkan_context.image(),
|
|
ui::vulkan::util::InitializeSubresourceRange(),
|
|
ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask,
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask,
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
|
}
|
|
|
|
// End the current render pass before inserting barriers and starting a
|
|
// new one, and insert the barrier.
|
|
SubmitBarriers(true);
|
|
|
|
SwapFramebuffer& swap_framebuffer =
|
|
swap_framebuffers_[swap_framebuffer_index];
|
|
swap_framebuffer.last_submission = GetCurrentSubmission();
|
|
|
|
VkRenderPassBeginInfo render_pass_begin_info;
|
|
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
render_pass_begin_info.pNext = nullptr;
|
|
render_pass_begin_info.renderPass = swap_render_pass_;
|
|
render_pass_begin_info.framebuffer = swap_framebuffer.framebuffer;
|
|
render_pass_begin_info.renderArea.offset.x = 0;
|
|
render_pass_begin_info.renderArea.offset.y = 0;
|
|
render_pass_begin_info.renderArea.extent.width = scaled_width;
|
|
render_pass_begin_info.renderArea.extent.height = scaled_height;
|
|
render_pass_begin_info.clearValueCount = 0;
|
|
render_pass_begin_info.pClearValues = nullptr;
|
|
deferred_command_buffer_.CmdVkBeginRenderPass(
|
|
&render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE);
|
|
|
|
VkViewport viewport;
|
|
viewport.x = 0.0f;
|
|
viewport.y = 0.0f;
|
|
viewport.width = float(scaled_width);
|
|
viewport.height = float(scaled_height);
|
|
viewport.minDepth = 0.0f;
|
|
viewport.maxDepth = 1.0f;
|
|
SetViewport(viewport);
|
|
VkRect2D scissor;
|
|
scissor.offset.x = 0;
|
|
scissor.offset.y = 0;
|
|
scissor.extent.width = scaled_width;
|
|
scissor.extent.height = scaled_height;
|
|
SetScissor(scissor);
|
|
|
|
BindExternalGraphicsPipeline(swap_pipeline_);
|
|
|
|
deferred_command_buffer_.CmdVkDraw(3, 1, 0, 0);
|
|
|
|
deferred_command_buffer_.CmdVkEndRenderPass();
|
|
|
|
// Insert the release barrier.
|
|
PushImageMemoryBarrier(
|
|
vulkan_context.image(),
|
|
ui::vulkan::util::InitializeSubresourceRange(),
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
ui::vulkan::VulkanPresenter::kGuestOutputInternalStageMask,
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
|
ui::vulkan::VulkanPresenter::kGuestOutputInternalAccessMask,
|
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
ui::vulkan::VulkanPresenter::kGuestOutputInternalLayout);
|
|
|
|
// Need to submit all the commands before giving the image back to the
|
|
// presenter so it can submit its own commands for displaying it to the
|
|
// queue, and also need to submit the release barrier.
|
|
EndSubmission(true);
|
|
return true;
|
|
});
|
|
|
|
// End the frame even if did not present for any reason (the image refresher
|
|
// was not called), to prevent leaking per-frame resources.
|
|
EndSubmission(true);
|
|
}
|
|
|
|
bool VulkanCommandProcessor::PushBufferMemoryBarrier(
|
|
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
|
|
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
|
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
|
|
uint32_t src_queue_family_index, uint32_t dst_queue_family_index,
|
|
bool skip_if_equal) {
|
|
if (skip_if_equal && src_stage_mask == dst_stage_mask &&
|
|
src_access_mask == dst_access_mask &&
|
|
src_queue_family_index == dst_queue_family_index) {
|
|
return false;
|
|
}
|
|
|
|
// Separate different barriers for overlapping buffer ranges into different
|
|
// pipeline barrier commands.
|
|
for (const VkBufferMemoryBarrier& other_buffer_memory_barrier :
|
|
pending_barriers_buffer_memory_barriers_) {
|
|
if (other_buffer_memory_barrier.buffer != buffer ||
|
|
(size != VK_WHOLE_SIZE &&
|
|
offset + size <= other_buffer_memory_barrier.offset) ||
|
|
(other_buffer_memory_barrier.size != VK_WHOLE_SIZE &&
|
|
other_buffer_memory_barrier.offset +
|
|
other_buffer_memory_barrier.size <=
|
|
offset)) {
|
|
continue;
|
|
}
|
|
if (other_buffer_memory_barrier.offset == offset &&
|
|
other_buffer_memory_barrier.size == size &&
|
|
other_buffer_memory_barrier.srcAccessMask == src_access_mask &&
|
|
other_buffer_memory_barrier.dstAccessMask == dst_access_mask &&
|
|
other_buffer_memory_barrier.srcQueueFamilyIndex ==
|
|
src_queue_family_index &&
|
|
other_buffer_memory_barrier.dstQueueFamilyIndex ==
|
|
dst_queue_family_index) {
|
|
// The barrier is already pending.
|
|
current_pending_barrier_.src_stage_mask |= src_stage_mask;
|
|
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
|
|
return true;
|
|
}
|
|
SplitPendingBarrier();
|
|
break;
|
|
}
|
|
|
|
current_pending_barrier_.src_stage_mask |= src_stage_mask;
|
|
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
|
|
VkBufferMemoryBarrier& buffer_memory_barrier =
|
|
pending_barriers_buffer_memory_barriers_.emplace_back();
|
|
buffer_memory_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
|
buffer_memory_barrier.pNext = nullptr;
|
|
buffer_memory_barrier.srcAccessMask = src_access_mask;
|
|
buffer_memory_barrier.dstAccessMask = dst_access_mask;
|
|
buffer_memory_barrier.srcQueueFamilyIndex = src_queue_family_index;
|
|
buffer_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index;
|
|
buffer_memory_barrier.buffer = buffer;
|
|
buffer_memory_barrier.offset = offset;
|
|
buffer_memory_barrier.size = size;
|
|
return true;
|
|
}
|
|
|
|
bool VulkanCommandProcessor::PushImageMemoryBarrier(
|
|
VkImage image, const VkImageSubresourceRange& subresource_range,
|
|
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
|
|
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
|
|
VkImageLayout old_layout, VkImageLayout new_layout,
|
|
uint32_t src_queue_family_index, uint32_t dst_queue_family_index,
|
|
bool skip_if_equal) {
|
|
if (skip_if_equal && src_stage_mask == dst_stage_mask &&
|
|
src_access_mask == dst_access_mask && old_layout == new_layout &&
|
|
src_queue_family_index == dst_queue_family_index) {
|
|
return false;
|
|
}
|
|
|
|
// Separate different barriers for overlapping image subresource ranges into
|
|
// different pipeline barrier commands.
|
|
for (const VkImageMemoryBarrier& other_image_memory_barrier :
|
|
pending_barriers_image_memory_barriers_) {
|
|
if (other_image_memory_barrier.image != image ||
|
|
!(other_image_memory_barrier.subresourceRange.aspectMask &
|
|
subresource_range.aspectMask) ||
|
|
(subresource_range.levelCount != VK_REMAINING_MIP_LEVELS &&
|
|
subresource_range.baseMipLevel + subresource_range.levelCount <=
|
|
other_image_memory_barrier.subresourceRange.baseMipLevel) ||
|
|
(other_image_memory_barrier.subresourceRange.levelCount !=
|
|
VK_REMAINING_MIP_LEVELS &&
|
|
other_image_memory_barrier.subresourceRange.baseMipLevel +
|
|
other_image_memory_barrier.subresourceRange.levelCount <=
|
|
subresource_range.baseMipLevel) ||
|
|
(subresource_range.layerCount != VK_REMAINING_ARRAY_LAYERS &&
|
|
subresource_range.baseArrayLayer + subresource_range.layerCount <=
|
|
other_image_memory_barrier.subresourceRange.baseArrayLayer) ||
|
|
(other_image_memory_barrier.subresourceRange.layerCount !=
|
|
VK_REMAINING_ARRAY_LAYERS &&
|
|
other_image_memory_barrier.subresourceRange.baseArrayLayer +
|
|
other_image_memory_barrier.subresourceRange.layerCount <=
|
|
subresource_range.baseArrayLayer)) {
|
|
continue;
|
|
}
|
|
if (other_image_memory_barrier.subresourceRange.aspectMask ==
|
|
subresource_range.aspectMask &&
|
|
other_image_memory_barrier.subresourceRange.baseMipLevel ==
|
|
subresource_range.baseMipLevel &&
|
|
other_image_memory_barrier.subresourceRange.levelCount ==
|
|
subresource_range.levelCount &&
|
|
other_image_memory_barrier.subresourceRange.baseArrayLayer ==
|
|
subresource_range.baseArrayLayer &&
|
|
other_image_memory_barrier.subresourceRange.layerCount ==
|
|
subresource_range.layerCount &&
|
|
other_image_memory_barrier.srcAccessMask == src_access_mask &&
|
|
other_image_memory_barrier.dstAccessMask == dst_access_mask &&
|
|
other_image_memory_barrier.oldLayout == old_layout &&
|
|
other_image_memory_barrier.newLayout == new_layout &&
|
|
other_image_memory_barrier.srcQueueFamilyIndex ==
|
|
src_queue_family_index &&
|
|
other_image_memory_barrier.dstQueueFamilyIndex ==
|
|
dst_queue_family_index) {
|
|
// The barrier is already pending.
|
|
current_pending_barrier_.src_stage_mask |= src_stage_mask;
|
|
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
|
|
return true;
|
|
}
|
|
SplitPendingBarrier();
|
|
break;
|
|
}
|
|
|
|
current_pending_barrier_.src_stage_mask |= src_stage_mask;
|
|
current_pending_barrier_.dst_stage_mask |= dst_stage_mask;
|
|
VkImageMemoryBarrier& image_memory_barrier =
|
|
pending_barriers_image_memory_barriers_.emplace_back();
|
|
image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
|
image_memory_barrier.pNext = nullptr;
|
|
image_memory_barrier.srcAccessMask = src_access_mask;
|
|
image_memory_barrier.dstAccessMask = dst_access_mask;
|
|
image_memory_barrier.oldLayout = old_layout;
|
|
image_memory_barrier.newLayout = new_layout;
|
|
image_memory_barrier.srcQueueFamilyIndex = src_queue_family_index;
|
|
image_memory_barrier.dstQueueFamilyIndex = dst_queue_family_index;
|
|
image_memory_barrier.image = image;
|
|
image_memory_barrier.subresourceRange = subresource_range;
|
|
return true;
|
|
}
|
|
|
|
bool VulkanCommandProcessor::SubmitBarriers(bool force_end_render_pass) {
|
|
assert_true(submission_open_);
|
|
SplitPendingBarrier();
|
|
if (pending_barriers_.empty()) {
|
|
if (force_end_render_pass) {
|
|
EndRenderPass();
|
|
}
|
|
return false;
|
|
}
|
|
EndRenderPass();
|
|
for (auto it = pending_barriers_.cbegin(); it != pending_barriers_.cend();
|
|
++it) {
|
|
auto it_next = std::next(it);
|
|
bool is_last = it_next == pending_barriers_.cend();
|
|
// .data() + offset, not &[offset], for buffer and image barriers, because
|
|
// if there are no buffer or image memory barriers in the last pipeline
|
|
// barriers, the offsets may be equal to the sizes of the vectors.
|
|
deferred_command_buffer_.CmdVkPipelineBarrier(
|
|
it->src_stage_mask ? it->src_stage_mask
|
|
: VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
|
it->dst_stage_mask ? it->dst_stage_mask
|
|
: VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
|
0, 0, nullptr,
|
|
uint32_t((is_last ? pending_barriers_buffer_memory_barriers_.size()
|
|
: it_next->buffer_memory_barriers_offset) -
|
|
it->buffer_memory_barriers_offset),
|
|
pending_barriers_buffer_memory_barriers_.data() +
|
|
it->buffer_memory_barriers_offset,
|
|
uint32_t((is_last ? pending_barriers_image_memory_barriers_.size()
|
|
: it_next->image_memory_barriers_offset) -
|
|
it->image_memory_barriers_offset),
|
|
pending_barriers_image_memory_barriers_.data() +
|
|
it->image_memory_barriers_offset);
|
|
}
|
|
pending_barriers_.clear();
|
|
pending_barriers_buffer_memory_barriers_.clear();
|
|
pending_barriers_image_memory_barriers_.clear();
|
|
current_pending_barrier_.buffer_memory_barriers_offset = 0;
|
|
current_pending_barrier_.image_memory_barriers_offset = 0;
|
|
return true;
|
|
}
|
|
|
|
void VulkanCommandProcessor::SubmitBarriersAndEnterRenderTargetCacheRenderPass(
|
|
VkRenderPass render_pass,
|
|
const VulkanRenderTargetCache::Framebuffer* framebuffer) {
|
|
SubmitBarriers(false);
|
|
if (current_render_pass_ == render_pass &&
|
|
current_framebuffer_ == framebuffer) {
|
|
return;
|
|
}
|
|
if (current_render_pass_ != VK_NULL_HANDLE) {
|
|
deferred_command_buffer_.CmdVkEndRenderPass();
|
|
}
|
|
current_render_pass_ = render_pass;
|
|
current_framebuffer_ = framebuffer;
|
|
VkRenderPassBeginInfo render_pass_begin_info;
|
|
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
|
render_pass_begin_info.pNext = nullptr;
|
|
render_pass_begin_info.renderPass = render_pass;
|
|
render_pass_begin_info.framebuffer = framebuffer->framebuffer;
|
|
render_pass_begin_info.renderArea.offset.x = 0;
|
|
render_pass_begin_info.renderArea.offset.y = 0;
|
|
// TODO(Triang3l): Actual dirty width / height in the deferred command
|
|
// buffer.
|
|
render_pass_begin_info.renderArea.extent = framebuffer->host_extent;
|
|
render_pass_begin_info.clearValueCount = 0;
|
|
render_pass_begin_info.pClearValues = nullptr;
|
|
deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info,
|
|
VK_SUBPASS_CONTENTS_INLINE);
|
|
}
|
|
|
|
void VulkanCommandProcessor::EndRenderPass() {
|
|
assert_true(submission_open_);
|
|
if (current_render_pass_ == VK_NULL_HANDLE) {
|
|
return;
|
|
}
|
|
deferred_command_buffer_.CmdVkEndRenderPass();
|
|
current_render_pass_ = VK_NULL_HANDLE;
|
|
current_framebuffer_ = nullptr;
|
|
}
|
|
|
|
const VulkanPipelineCache::PipelineLayoutProvider*
|
|
VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel,
|
|
uint32_t texture_count_vertex) {
|
|
PipelineLayoutKey pipeline_layout_key;
|
|
pipeline_layout_key.texture_count_pixel = texture_count_pixel;
|
|
pipeline_layout_key.texture_count_vertex = texture_count_vertex;
|
|
{
|
|
auto it = pipeline_layouts_.find(pipeline_layout_key.key);
|
|
if (it != pipeline_layouts_.end()) {
|
|
return &it->second;
|
|
}
|
|
}
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
|
|
VkDescriptorSetLayout descriptor_set_layout_textures_pixel;
|
|
if (texture_count_pixel) {
|
|
TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key;
|
|
texture_descriptor_set_layout_key.is_vertex = 0;
|
|
texture_descriptor_set_layout_key.texture_count = texture_count_pixel;
|
|
auto it = descriptor_set_layouts_textures_.find(
|
|
texture_descriptor_set_layout_key.key);
|
|
if (it != descriptor_set_layouts_textures_.end()) {
|
|
descriptor_set_layout_textures_pixel = it->second;
|
|
} else {
|
|
VkDescriptorSetLayoutBinding descriptor_set_layout_binding;
|
|
descriptor_set_layout_binding.binding = 0;
|
|
descriptor_set_layout_binding.descriptorType =
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
descriptor_set_layout_binding.descriptorCount = texture_count_pixel;
|
|
descriptor_set_layout_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
|
descriptor_set_layout_binding.pImmutableSamplers = nullptr;
|
|
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info;
|
|
descriptor_set_layout_create_info.sType =
|
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
|
descriptor_set_layout_create_info.pNext = nullptr;
|
|
descriptor_set_layout_create_info.flags = 0;
|
|
descriptor_set_layout_create_info.bindingCount = 1;
|
|
descriptor_set_layout_create_info.pBindings =
|
|
&descriptor_set_layout_binding;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_textures_pixel) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for {} combined "
|
|
"images and samplers for guest pixel shaders",
|
|
texture_count_pixel);
|
|
return nullptr;
|
|
}
|
|
descriptor_set_layouts_textures_.emplace(
|
|
texture_descriptor_set_layout_key.key,
|
|
descriptor_set_layout_textures_pixel);
|
|
}
|
|
} else {
|
|
descriptor_set_layout_textures_pixel = descriptor_set_layout_empty_;
|
|
}
|
|
|
|
VkDescriptorSetLayout descriptor_set_layout_textures_vertex;
|
|
if (texture_count_vertex) {
|
|
TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key;
|
|
texture_descriptor_set_layout_key.is_vertex = 0;
|
|
texture_descriptor_set_layout_key.texture_count = texture_count_vertex;
|
|
auto it = descriptor_set_layouts_textures_.find(
|
|
texture_descriptor_set_layout_key.key);
|
|
if (it != descriptor_set_layouts_textures_.end()) {
|
|
descriptor_set_layout_textures_vertex = it->second;
|
|
} else {
|
|
VkDescriptorSetLayoutBinding descriptor_set_layout_binding;
|
|
descriptor_set_layout_binding.binding = 0;
|
|
descriptor_set_layout_binding.descriptorType =
|
|
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
|
descriptor_set_layout_binding.descriptorCount = texture_count_vertex;
|
|
descriptor_set_layout_binding.stageFlags = guest_shader_vertex_stages_;
|
|
descriptor_set_layout_binding.pImmutableSamplers = nullptr;
|
|
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info;
|
|
descriptor_set_layout_create_info.sType =
|
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
|
descriptor_set_layout_create_info.pNext = nullptr;
|
|
descriptor_set_layout_create_info.flags = 0;
|
|
descriptor_set_layout_create_info.bindingCount = 1;
|
|
descriptor_set_layout_create_info.pBindings =
|
|
&descriptor_set_layout_binding;
|
|
if (dfn.vkCreateDescriptorSetLayout(
|
|
device, &descriptor_set_layout_create_info, nullptr,
|
|
&descriptor_set_layout_textures_vertex) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan descriptor set layout for {} combined "
|
|
"images and samplers for guest vertex shaders",
|
|
texture_count_vertex);
|
|
return nullptr;
|
|
}
|
|
descriptor_set_layouts_textures_.emplace(
|
|
texture_descriptor_set_layout_key.key,
|
|
descriptor_set_layout_textures_vertex);
|
|
}
|
|
} else {
|
|
descriptor_set_layout_textures_vertex = descriptor_set_layout_empty_;
|
|
}
|
|
|
|
VkDescriptorSetLayout
|
|
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount];
|
|
// Immutable layouts.
|
|
descriptor_set_layouts
|
|
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
|
|
descriptor_set_layout_shared_memory_and_edram_;
|
|
descriptor_set_layouts
|
|
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
|
|
descriptor_set_layout_fetch_bool_loop_constants_;
|
|
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
|
|
descriptor_set_layout_system_constants_;
|
|
descriptor_set_layouts
|
|
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
|
|
descriptor_set_layout_float_constants_pixel_;
|
|
descriptor_set_layouts
|
|
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
|
|
descriptor_set_layout_float_constants_vertex_;
|
|
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
|
|
descriptor_set_layout_fetch_bool_loop_constants_;
|
|
// Mutable layouts.
|
|
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] =
|
|
descriptor_set_layout_textures_vertex;
|
|
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] =
|
|
descriptor_set_layout_textures_pixel;
|
|
|
|
VkPipelineLayoutCreateInfo pipeline_layout_create_info;
|
|
pipeline_layout_create_info.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
|
pipeline_layout_create_info.pNext = nullptr;
|
|
pipeline_layout_create_info.flags = 0;
|
|
pipeline_layout_create_info.setLayoutCount =
|
|
uint32_t(xe::countof(descriptor_set_layouts));
|
|
pipeline_layout_create_info.pSetLayouts = descriptor_set_layouts;
|
|
pipeline_layout_create_info.pushConstantRangeCount = 0;
|
|
pipeline_layout_create_info.pPushConstantRanges = nullptr;
|
|
VkPipelineLayout pipeline_layout;
|
|
if (dfn.vkCreatePipelineLayout(device, &pipeline_layout_create_info, nullptr,
|
|
&pipeline_layout) != VK_SUCCESS) {
|
|
XELOGE(
|
|
"Failed to create a Vulkan pipeline layout for guest drawing with {} "
|
|
"pixel shader and {} vertex shader textures",
|
|
texture_count_pixel, texture_count_vertex);
|
|
return nullptr;
|
|
}
|
|
auto emplaced_pair = pipeline_layouts_.emplace(
|
|
std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key.key),
|
|
std::forward_as_tuple(pipeline_layout,
|
|
descriptor_set_layout_textures_vertex,
|
|
descriptor_set_layout_textures_pixel));
|
|
// unordered_map insertion doesn't invalidate element references.
|
|
return &emplaced_pair.first->second;
|
|
}
|
|
|
|
void VulkanCommandProcessor::BindExternalGraphicsPipeline(
|
|
VkPipeline pipeline, bool keep_dynamic_depth_bias,
|
|
bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) {
|
|
if (!keep_dynamic_depth_bias) {
|
|
dynamic_depth_bias_update_needed_ = true;
|
|
}
|
|
if (!keep_dynamic_blend_constants) {
|
|
dynamic_blend_constants_update_needed_ = true;
|
|
}
|
|
if (!keep_dynamic_stencil_mask_ref) {
|
|
dynamic_stencil_compare_mask_front_update_needed_ = true;
|
|
dynamic_stencil_compare_mask_back_update_needed_ = true;
|
|
dynamic_stencil_write_mask_front_update_needed_ = true;
|
|
dynamic_stencil_write_mask_back_update_needed_ = true;
|
|
dynamic_stencil_reference_front_update_needed_ = true;
|
|
dynamic_stencil_reference_back_update_needed_ = true;
|
|
}
|
|
if (current_external_graphics_pipeline_ == pipeline) {
|
|
return;
|
|
}
|
|
deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
pipeline);
|
|
current_external_graphics_pipeline_ = pipeline;
|
|
current_guest_graphics_pipeline_ = VK_NULL_HANDLE;
|
|
current_guest_graphics_pipeline_layout_ = VK_NULL_HANDLE;
|
|
}
|
|
|
|
void VulkanCommandProcessor::BindExternalComputePipeline(VkPipeline pipeline) {
|
|
if (current_external_compute_pipeline_ == pipeline) {
|
|
return;
|
|
}
|
|
deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
pipeline);
|
|
current_external_compute_pipeline_ = pipeline;
|
|
}
|
|
|
|
void VulkanCommandProcessor::SetViewport(const VkViewport& viewport) {
|
|
if (!dynamic_viewport_update_needed_) {
|
|
dynamic_viewport_update_needed_ |= dynamic_viewport_.x != viewport.x;
|
|
dynamic_viewport_update_needed_ |= dynamic_viewport_.y != viewport.y;
|
|
dynamic_viewport_update_needed_ |=
|
|
dynamic_viewport_.width != viewport.width;
|
|
dynamic_viewport_update_needed_ |=
|
|
dynamic_viewport_.height != viewport.height;
|
|
dynamic_viewport_update_needed_ |=
|
|
dynamic_viewport_.minDepth != viewport.minDepth;
|
|
dynamic_viewport_update_needed_ |=
|
|
dynamic_viewport_.maxDepth != viewport.maxDepth;
|
|
}
|
|
if (dynamic_viewport_update_needed_) {
|
|
dynamic_viewport_ = viewport;
|
|
deferred_command_buffer_.CmdVkSetViewport(0, 1, &dynamic_viewport_);
|
|
dynamic_viewport_update_needed_ = false;
|
|
}
|
|
}
|
|
|
|
void VulkanCommandProcessor::SetScissor(const VkRect2D& scissor) {
|
|
if (!dynamic_scissor_update_needed_) {
|
|
dynamic_scissor_update_needed_ |=
|
|
dynamic_scissor_.offset.x != scissor.offset.x;
|
|
dynamic_scissor_update_needed_ |=
|
|
dynamic_scissor_.offset.y != scissor.offset.y;
|
|
dynamic_scissor_update_needed_ |=
|
|
dynamic_scissor_.extent.width != scissor.extent.width;
|
|
dynamic_scissor_update_needed_ |=
|
|
dynamic_scissor_.extent.height != scissor.extent.height;
|
|
}
|
|
if (dynamic_scissor_update_needed_) {
|
|
dynamic_scissor_ = scissor;
|
|
deferred_command_buffer_.CmdVkSetScissor(0, 1, &dynamic_scissor_);
|
|
dynamic_scissor_update_needed_ = false;
|
|
}
|
|
}
|
|
|
|
Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type,
|
|
uint32_t guest_address,
|
|
const uint32_t* host_address,
|
|
uint32_t dword_count) {
|
|
return pipeline_cache_->LoadShader(shader_type, host_address, dword_count);
|
|
}
|
|
|
|
bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|
uint32_t index_count,
|
|
IndexBufferInfo* index_buffer_info,
|
|
bool major_mode_explicit) {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
|
|
if (edram_mode == xenos::ModeControl::kCopy) {
|
|
// Special copy handling.
|
|
return IssueCopy();
|
|
}
|
|
|
|
// Vertex shader analysis.
|
|
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
|
if (!vertex_shader) {
|
|
// Always need a vertex shader.
|
|
return false;
|
|
}
|
|
pipeline_cache_->AnalyzeShaderUcode(*vertex_shader);
|
|
bool memexport_used_vertex = vertex_shader->is_valid_memexport_used();
|
|
|
|
// Pixel shader analysis.
|
|
bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs);
|
|
bool is_rasterization_done =
|
|
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
|
|
VulkanShader* pixel_shader = nullptr;
|
|
if (is_rasterization_done) {
|
|
// See xenos::ModeControl for explanation why the pixel shader is only used
|
|
// when it's kColorDepth here.
|
|
if (edram_mode == xenos::ModeControl::kColorDepth) {
|
|
pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
|
|
if (pixel_shader) {
|
|
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);
|
|
if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader,
|
|
regs)) {
|
|
pixel_shader = nullptr;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Disabling pixel shader for this case is also required by the pipeline
|
|
// cache.
|
|
if (!memexport_used_vertex) {
|
|
// This draw has no effect.
|
|
return true;
|
|
}
|
|
}
|
|
// TODO(Triang3l): Memory export.
|
|
|
|
if (!BeginSubmission(true)) {
|
|
return false;
|
|
}
|
|
|
|
// Process primitives.
|
|
PrimitiveProcessor::ProcessingResult primitive_processing_result;
|
|
if (!primitive_processor_->Process(primitive_processing_result)) {
|
|
return false;
|
|
}
|
|
if (!primitive_processing_result.host_draw_vertex_count) {
|
|
// Nothing to draw.
|
|
return true;
|
|
}
|
|
// TODO(Triang3l): Tessellation, geometry-type-specific vertex shader, vertex
|
|
// shader as compute.
|
|
if (primitive_processing_result.host_vertex_shader_type !=
|
|
Shader::HostVertexShaderType::kVertex) {
|
|
return false;
|
|
}
|
|
|
|
reg::RB_DEPTHCONTROL normalized_depth_control =
|
|
draw_util::GetNormalizedDepthControl(regs);
|
|
uint32_t normalized_color_mask =
|
|
pixel_shader ? draw_util::GetNormalizedColorMask(
|
|
regs, pixel_shader->writes_color_targets())
|
|
: 0;
|
|
|
|
// Shader modifications.
|
|
SpirvShaderTranslator::Modification vertex_shader_modification =
|
|
pipeline_cache_->GetCurrentVertexShaderModification(
|
|
*vertex_shader, primitive_processing_result.host_vertex_shader_type);
|
|
SpirvShaderTranslator::Modification pixel_shader_modification =
|
|
pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification(
|
|
*pixel_shader, normalized_color_mask)
|
|
: SpirvShaderTranslator::Modification(0);
|
|
|
|
// Set up the render targets - this may perform dispatches and draws.
|
|
if (!render_target_cache_->Update(is_rasterization_done,
|
|
normalized_depth_control,
|
|
normalized_color_mask, *vertex_shader)) {
|
|
return false;
|
|
}
|
|
|
|
// Translate the shaders.
|
|
VulkanShader::VulkanTranslation* vertex_shader_translation =
|
|
static_cast<VulkanShader::VulkanTranslation*>(
|
|
vertex_shader->GetOrCreateTranslation(
|
|
vertex_shader_modification.value));
|
|
VulkanShader::VulkanTranslation* pixel_shader_translation =
|
|
pixel_shader ? static_cast<VulkanShader::VulkanTranslation*>(
|
|
pixel_shader->GetOrCreateTranslation(
|
|
pixel_shader_modification.value))
|
|
: nullptr;
|
|
|
|
// Update the graphics pipeline, and if the new graphics pipeline has a
|
|
// different layout, invalidate incompatible descriptor sets before updating
|
|
// current_guest_graphics_pipeline_layout_.
|
|
VkPipeline pipeline;
|
|
const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider;
|
|
if (!pipeline_cache_->ConfigurePipeline(
|
|
vertex_shader_translation, pixel_shader_translation,
|
|
primitive_processing_result, normalized_depth_control,
|
|
normalized_color_mask,
|
|
render_target_cache_->last_update_render_pass_key(), pipeline,
|
|
pipeline_layout_provider)) {
|
|
return false;
|
|
}
|
|
if (current_guest_graphics_pipeline_ != pipeline) {
|
|
deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
pipeline);
|
|
current_guest_graphics_pipeline_ = pipeline;
|
|
current_external_graphics_pipeline_ = VK_NULL_HANDLE;
|
|
}
|
|
auto pipeline_layout =
|
|
static_cast<const PipelineLayout*>(pipeline_layout_provider);
|
|
if (current_guest_graphics_pipeline_layout_ != pipeline_layout) {
|
|
if (current_guest_graphics_pipeline_layout_) {
|
|
// Keep descriptor set layouts for which the new pipeline layout is
|
|
// compatible with the previous one (pipeline layouts are compatible for
|
|
// set N if set layouts 0 through N are compatible).
|
|
uint32_t descriptor_sets_kept =
|
|
uint32_t(SpirvShaderTranslator::kDescriptorSetCount);
|
|
if (current_guest_graphics_pipeline_layout_
|
|
->descriptor_set_layout_textures_vertex_ref() !=
|
|
pipeline_layout->descriptor_set_layout_textures_vertex_ref()) {
|
|
descriptor_sets_kept = std::min(
|
|
descriptor_sets_kept,
|
|
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex));
|
|
}
|
|
if (current_guest_graphics_pipeline_layout_
|
|
->descriptor_set_layout_textures_pixel_ref() !=
|
|
pipeline_layout->descriptor_set_layout_textures_pixel_ref()) {
|
|
descriptor_sets_kept = std::min(
|
|
descriptor_sets_kept,
|
|
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel));
|
|
}
|
|
} else {
|
|
// No or unknown pipeline layout previously bound - all bindings are in an
|
|
// indeterminate state.
|
|
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
|
|
}
|
|
current_guest_graphics_pipeline_layout_ = pipeline_layout;
|
|
}
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const VkPhysicalDeviceLimits& device_limits =
|
|
provider.device_properties().limits;
|
|
|
|
// Get dynamic rasterizer state.
|
|
draw_util::ViewportInfo viewport_info;
|
|
// Just handling maxViewportDimensions is enough - viewportBoundsRange[1] must
|
|
// be at least 2 * max(maxViewportDimensions[0...1]) - 1, and
|
|
// maxViewportDimensions must be greater than or equal to the size of the
|
|
// largest possible framebuffer attachment (if the viewport has positive
|
|
// offset and is between maxViewportDimensions and viewportBoundsRange[1],
|
|
// GetHostViewportInfo will adjust ndc_scale/ndc_offset to clamp it, and the
|
|
// clamped range will be outside the largest possible framebuffer anyway.
|
|
// FIXME(Triang3l): Possibly handle maxViewportDimensions and
|
|
// viewportBoundsRange separately because when using fragment shader
|
|
// interlocks, framebuffers are not used, while the range may be wider than
|
|
// dimensions? Though viewport bigger than 4096 - the smallest possible
|
|
// maximum dimension (which is below the 8192 texture size limit on the Xbox
|
|
// 360) - and with offset, is probably a situation that never happens in real
|
|
// life. Or even disregard the viewport bounds range in the fragment shader
|
|
// interlocks case completely - apply the viewport and the scissor offset
|
|
// directly to pixel address and to things like ps_param_gen.
|
|
draw_util::GetHostViewportInfo(
|
|
regs, 1, 1, false, device_limits.maxViewportDimensions[0],
|
|
device_limits.maxViewportDimensions[1], true, normalized_depth_control,
|
|
false, false, false, viewport_info);
|
|
|
|
// Update dynamic graphics pipeline state.
|
|
UpdateDynamicState(viewport_info, primitive_polygonal,
|
|
normalized_depth_control);
|
|
|
|
// Update system constants before uploading them.
|
|
UpdateSystemConstantValues(primitive_processing_result.host_index_endian,
|
|
viewport_info);
|
|
|
|
// Update uniform buffers and descriptor sets after binding the pipeline with
|
|
// the new layout.
|
|
if (!UpdateBindings(vertex_shader, pixel_shader)) {
|
|
return false;
|
|
}
|
|
|
|
// Ensure vertex buffers are resident.
|
|
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
|
// validity is tracked.
|
|
uint64_t vertex_buffers_resident[2] = {};
|
|
for (const Shader::VertexBinding& vertex_binding :
|
|
vertex_shader->vertex_bindings()) {
|
|
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
|
if (vertex_buffers_resident[vfetch_index >> 6] &
|
|
(uint64_t(1) << (vfetch_index & 63))) {
|
|
continue;
|
|
}
|
|
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
|
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
|
switch (vfetch_constant.type) {
|
|
case xenos::FetchConstantType::kVertex:
|
|
break;
|
|
case xenos::FetchConstantType::kInvalidVertex:
|
|
if (cvars::gpu_allow_invalid_fetch_constants) {
|
|
break;
|
|
}
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
|
|
"This "
|
|
"is incorrect behavior, but you can try bypassing this by "
|
|
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
default:
|
|
XELOGW(
|
|
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
|
|
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
|
return false;
|
|
}
|
|
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
|
|
vfetch_constant.size << 2)) {
|
|
XELOGE(
|
|
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
|
|
"memory",
|
|
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
|
return false;
|
|
}
|
|
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
|
|
<< (vfetch_index & 63);
|
|
}
|
|
|
|
// Insert the shared memory barrier if needed.
|
|
// TODO(Triang3l): Memory export.
|
|
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
|
|
|
|
// After all commands that may dispatch, copy or insert barriers, submit the
|
|
// barriers (may end the render pass), and (re)enter the render pass before
|
|
// drawing.
|
|
SubmitBarriersAndEnterRenderTargetCacheRenderPass(
|
|
render_target_cache_->last_update_render_pass(),
|
|
render_target_cache_->last_update_framebuffer());
|
|
|
|
// Draw.
|
|
if (primitive_processing_result.index_buffer_type ==
|
|
PrimitiveProcessor::ProcessedIndexBufferType::kNone) {
|
|
deferred_command_buffer_.CmdVkDraw(
|
|
primitive_processing_result.host_draw_vertex_count, 1, 0, 0);
|
|
} else {
|
|
std::pair<VkBuffer, VkDeviceSize> index_buffer;
|
|
switch (primitive_processing_result.index_buffer_type) {
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kGuest:
|
|
index_buffer.first = shared_memory_->buffer();
|
|
index_buffer.second = primitive_processing_result.guest_index_base;
|
|
break;
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted:
|
|
index_buffer = primitive_processor_->GetConvertedIndexBuffer(
|
|
primitive_processing_result.host_index_buffer_handle);
|
|
break;
|
|
case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin:
|
|
index_buffer = primitive_processor_->GetBuiltinIndexBuffer(
|
|
primitive_processing_result.host_index_buffer_handle);
|
|
break;
|
|
default:
|
|
assert_unhandled_case(primitive_processing_result.index_buffer_type);
|
|
return false;
|
|
}
|
|
deferred_command_buffer_.CmdVkBindIndexBuffer(
|
|
index_buffer.first, index_buffer.second,
|
|
primitive_processing_result.host_index_format ==
|
|
xenos::IndexFormat::kInt16
|
|
? VK_INDEX_TYPE_UINT16
|
|
: VK_INDEX_TYPE_UINT32);
|
|
deferred_command_buffer_.CmdVkDrawIndexed(
|
|
primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool VulkanCommandProcessor::IssueCopy() {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
if (!BeginSubmission(true)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void VulkanCommandProcessor::InitializeTrace() {
|
|
CommandProcessor::InitializeTrace();
|
|
|
|
if (!BeginSubmission(true)) {
|
|
return;
|
|
}
|
|
bool shared_memory_submitted =
|
|
shared_memory_->InitializeTraceSubmitDownloads();
|
|
if (!shared_memory_submitted) {
|
|
return;
|
|
}
|
|
AwaitAllQueueOperationsCompletion();
|
|
if (shared_memory_submitted) {
|
|
shared_memory_->InitializeTraceCompleteDownloads();
|
|
}
|
|
}
|
|
|
|
void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss(
|
|
uint64_t await_submission) {
|
|
// Only report once, no need to retry a wait that won't succeed anyway.
|
|
if (device_lost_) {
|
|
return;
|
|
}
|
|
|
|
if (await_submission >= GetCurrentSubmission()) {
|
|
if (submission_open_) {
|
|
EndSubmission(false);
|
|
}
|
|
// A submission won't be ended if it hasn't been started, or if ending
|
|
// has failed - clamp the index.
|
|
await_submission = GetCurrentSubmission() - 1;
|
|
}
|
|
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
|
|
size_t fences_total = submissions_in_flight_fences_.size();
|
|
size_t fences_awaited = 0;
|
|
if (await_submission > submission_completed_) {
|
|
// Await in a blocking way if requested.
|
|
// TODO(Triang3l): Await only one fence. "Fence signal operations that are
|
|
// defined by vkQueueSubmit additionally include in the first
|
|
// synchronization scope all commands that occur earlier in submission
|
|
// order."
|
|
VkResult wait_result = dfn.vkWaitForFences(
|
|
device, uint32_t(await_submission - submission_completed_),
|
|
submissions_in_flight_fences_.data(), VK_TRUE, UINT64_MAX);
|
|
if (wait_result == VK_SUCCESS) {
|
|
fences_awaited += await_submission - submission_completed_;
|
|
} else {
|
|
XELOGE("Failed to await submission completion Vulkan fences");
|
|
if (wait_result == VK_ERROR_DEVICE_LOST) {
|
|
device_lost_ = true;
|
|
}
|
|
}
|
|
}
|
|
// Check how far into the submissions the GPU currently is, in order because
|
|
// submission themselves can be executed out of order, but Xenia serializes
|
|
// that for simplicity.
|
|
while (fences_awaited < fences_total) {
|
|
VkResult fence_status = dfn.vkWaitForFences(
|
|
device, 1, &submissions_in_flight_fences_[fences_awaited], VK_TRUE, 0);
|
|
if (fence_status != VK_SUCCESS) {
|
|
if (fence_status == VK_ERROR_DEVICE_LOST) {
|
|
device_lost_ = true;
|
|
}
|
|
break;
|
|
}
|
|
++fences_awaited;
|
|
}
|
|
if (device_lost_) {
|
|
graphics_system_->OnHostGpuLossFromAnyThread(true);
|
|
return;
|
|
}
|
|
if (!fences_awaited) {
|
|
// Not updated - no need to reclaim or download things.
|
|
return;
|
|
}
|
|
// Reclaim fences.
|
|
fences_free_.reserve(fences_free_.size() + fences_awaited);
|
|
auto submissions_in_flight_fences_awaited_end =
|
|
submissions_in_flight_fences_.cbegin();
|
|
std::advance(submissions_in_flight_fences_awaited_end, fences_awaited);
|
|
fences_free_.insert(fences_free_.cend(),
|
|
submissions_in_flight_fences_.cbegin(),
|
|
submissions_in_flight_fences_awaited_end);
|
|
submissions_in_flight_fences_.erase(submissions_in_flight_fences_.cbegin(),
|
|
submissions_in_flight_fences_awaited_end);
|
|
submission_completed_ += fences_awaited;
|
|
|
|
// Reclaim semaphores.
|
|
while (!submissions_in_flight_semaphores_.empty()) {
|
|
const auto& semaphore_submission =
|
|
submissions_in_flight_semaphores_.front();
|
|
if (semaphore_submission.first > submission_completed_) {
|
|
break;
|
|
}
|
|
semaphores_free_.push_back(semaphore_submission.second);
|
|
submissions_in_flight_semaphores_.pop_front();
|
|
}
|
|
|
|
// Reclaim command pools.
|
|
while (!command_buffers_submitted_.empty()) {
|
|
const auto& command_buffer_pair = command_buffers_submitted_.front();
|
|
if (command_buffer_pair.first > submission_completed_) {
|
|
break;
|
|
}
|
|
command_buffers_writable_.push_back(command_buffer_pair.second);
|
|
command_buffers_submitted_.pop_front();
|
|
}
|
|
|
|
shared_memory_->CompletedSubmissionUpdated();
|
|
|
|
primitive_processor_->CompletedSubmissionUpdated();
|
|
|
|
render_target_cache_->CompletedSubmissionUpdated();
|
|
|
|
// Destroy outdated swap objects.
|
|
while (!swap_framebuffers_outdated_.empty()) {
|
|
const auto& framebuffer_pair = swap_framebuffers_outdated_.front();
|
|
if (framebuffer_pair.first > submission_completed_) {
|
|
break;
|
|
}
|
|
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
|
|
swap_framebuffers_outdated_.pop_front();
|
|
}
|
|
}
|
|
|
|
bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
if (device_lost_) {
|
|
return false;
|
|
}
|
|
|
|
bool is_opening_frame = is_guest_command && !frame_open_;
|
|
if (submission_open_ && !is_opening_frame) {
|
|
return true;
|
|
}
|
|
|
|
// Check the fence - needed for all kinds of submissions (to reclaim transient
|
|
// resources early) and specifically for frames (not to queue too many), and
|
|
// await the availability of the current frame. Also check whether the device
|
|
// is still available, and whether the await was successful.
|
|
uint64_t await_submission =
|
|
is_opening_frame
|
|
? closed_frame_submissions_[frame_current_ % kMaxFramesInFlight]
|
|
: 0;
|
|
CheckSubmissionFenceAndDeviceLoss(await_submission);
|
|
if (device_lost_ || submission_completed_ < await_submission) {
|
|
return false;
|
|
}
|
|
|
|
if (is_opening_frame) {
|
|
// Update the completed frame index, also obtaining the actual completed
|
|
// frame number (since the CPU may be actually less than 3 frames behind)
|
|
// before reclaiming resources tracked with the frame number.
|
|
frame_completed_ = std::max(frame_current_, uint64_t(kMaxFramesInFlight)) -
|
|
kMaxFramesInFlight;
|
|
for (uint64_t frame = frame_completed_ + 1; frame < frame_current_;
|
|
++frame) {
|
|
if (closed_frame_submissions_[frame % kMaxFramesInFlight] >
|
|
submission_completed_) {
|
|
break;
|
|
}
|
|
frame_completed_ = frame;
|
|
}
|
|
}
|
|
|
|
if (!submission_open_) {
|
|
submission_open_ = true;
|
|
|
|
// Start a new deferred command buffer - will submit it to the real one in
|
|
// the end of the submission (when async pipeline object creation requests
|
|
// are fulfilled).
|
|
deferred_command_buffer_.Reset();
|
|
|
|
// Reset cached state of the command buffer.
|
|
dynamic_viewport_update_needed_ = true;
|
|
dynamic_scissor_update_needed_ = true;
|
|
dynamic_depth_bias_update_needed_ = true;
|
|
dynamic_blend_constants_update_needed_ = true;
|
|
dynamic_stencil_compare_mask_front_update_needed_ = true;
|
|
dynamic_stencil_compare_mask_back_update_needed_ = true;
|
|
dynamic_stencil_write_mask_front_update_needed_ = true;
|
|
dynamic_stencil_write_mask_back_update_needed_ = true;
|
|
dynamic_stencil_reference_front_update_needed_ = true;
|
|
dynamic_stencil_reference_back_update_needed_ = true;
|
|
current_render_pass_ = VK_NULL_HANDLE;
|
|
current_framebuffer_ = nullptr;
|
|
current_guest_graphics_pipeline_ = VK_NULL_HANDLE;
|
|
current_external_graphics_pipeline_ = VK_NULL_HANDLE;
|
|
current_external_compute_pipeline_ = VK_NULL_HANDLE;
|
|
current_guest_graphics_pipeline_layout_ = nullptr;
|
|
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
|
|
|
|
primitive_processor_->BeginSubmission();
|
|
}
|
|
|
|
if (is_opening_frame) {
|
|
frame_open_ = true;
|
|
|
|
// Reset bindings that depend on transient data.
|
|
std::memset(current_float_constant_map_vertex_, 0,
|
|
sizeof(current_float_constant_map_vertex_));
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
std::memset(current_graphics_descriptor_sets_, 0,
|
|
sizeof(current_graphics_descriptor_sets_));
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
|
|
shared_memory_and_edram_descriptor_set_;
|
|
current_graphics_descriptor_set_values_up_to_date_ =
|
|
uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram;
|
|
|
|
// Reclaim pool pages - no need to do this every small submission since some
|
|
// may be reused.
|
|
transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_);
|
|
uniform_buffer_pool_->Reclaim(frame_completed_);
|
|
|
|
primitive_processor_->BeginFrame();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
|
ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
|
|
// Make sure everything needed for submitting exist.
|
|
if (submission_open_) {
|
|
if (fences_free_.empty()) {
|
|
VkFenceCreateInfo fence_create_info;
|
|
fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
fence_create_info.pNext = nullptr;
|
|
fence_create_info.flags = 0;
|
|
VkFence fence;
|
|
if (dfn.vkCreateFence(device, &fence_create_info, nullptr, &fence) !=
|
|
VK_SUCCESS) {
|
|
XELOGE("Failed to create a Vulkan fence");
|
|
// Try to submit later. Completely dropping the submission is not
|
|
// permitted because resources would be left in an undefined state.
|
|
return false;
|
|
}
|
|
fences_free_.push_back(fence);
|
|
}
|
|
if (!sparse_memory_binds_.empty() && semaphores_free_.empty()) {
|
|
VkSemaphoreCreateInfo semaphore_create_info;
|
|
semaphore_create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
|
semaphore_create_info.pNext = nullptr;
|
|
semaphore_create_info.flags = 0;
|
|
VkSemaphore semaphore;
|
|
if (dfn.vkCreateSemaphore(device, &semaphore_create_info, nullptr,
|
|
&semaphore) != VK_SUCCESS) {
|
|
XELOGE("Failed to create a Vulkan semaphore");
|
|
return false;
|
|
}
|
|
semaphores_free_.push_back(semaphore);
|
|
}
|
|
if (command_buffers_writable_.empty()) {
|
|
CommandBuffer command_buffer;
|
|
VkCommandPoolCreateInfo command_pool_create_info;
|
|
command_pool_create_info.sType =
|
|
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
|
command_pool_create_info.pNext = nullptr;
|
|
command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
|
|
command_pool_create_info.queueFamilyIndex =
|
|
provider.queue_family_graphics_compute();
|
|
if (dfn.vkCreateCommandPool(device, &command_pool_create_info, nullptr,
|
|
&command_buffer.pool) != VK_SUCCESS) {
|
|
XELOGE("Failed to create a Vulkan command pool");
|
|
return false;
|
|
}
|
|
VkCommandBufferAllocateInfo command_buffer_allocate_info;
|
|
command_buffer_allocate_info.sType =
|
|
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
|
command_buffer_allocate_info.pNext = nullptr;
|
|
command_buffer_allocate_info.commandPool = command_buffer.pool;
|
|
command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
|
command_buffer_allocate_info.commandBufferCount = 1;
|
|
if (dfn.vkAllocateCommandBuffers(device, &command_buffer_allocate_info,
|
|
&command_buffer.buffer) != VK_SUCCESS) {
|
|
XELOGE("Failed to allocate a Vulkan command buffer");
|
|
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
|
|
return false;
|
|
}
|
|
command_buffers_writable_.push_back(command_buffer);
|
|
}
|
|
}
|
|
|
|
bool is_closing_frame = is_swap && frame_open_;
|
|
|
|
if (is_closing_frame) {
|
|
primitive_processor_->EndFrame();
|
|
}
|
|
|
|
if (submission_open_) {
|
|
EndRenderPass();
|
|
|
|
render_target_cache_->EndSubmission();
|
|
|
|
primitive_processor_->EndSubmission();
|
|
|
|
shared_memory_->EndSubmission();
|
|
|
|
uniform_buffer_pool_->FlushWrites();
|
|
|
|
// Submit sparse binds earlier, before executing the deferred command
|
|
// buffer, to reduce latency.
|
|
if (!sparse_memory_binds_.empty()) {
|
|
sparse_buffer_bind_infos_temp_.clear();
|
|
sparse_buffer_bind_infos_temp_.reserve(sparse_buffer_binds_.size());
|
|
for (const SparseBufferBind& sparse_buffer_bind : sparse_buffer_binds_) {
|
|
VkSparseBufferMemoryBindInfo& sparse_buffer_bind_info =
|
|
sparse_buffer_bind_infos_temp_.emplace_back();
|
|
sparse_buffer_bind_info.buffer = sparse_buffer_bind.buffer;
|
|
sparse_buffer_bind_info.bindCount = sparse_buffer_bind.bind_count;
|
|
sparse_buffer_bind_info.pBinds =
|
|
sparse_memory_binds_.data() + sparse_buffer_bind.bind_offset;
|
|
}
|
|
assert_false(semaphores_free_.empty());
|
|
VkSemaphore bind_sparse_semaphore = semaphores_free_.back();
|
|
VkBindSparseInfo bind_sparse_info;
|
|
bind_sparse_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
|
|
bind_sparse_info.pNext = nullptr;
|
|
bind_sparse_info.waitSemaphoreCount = 0;
|
|
bind_sparse_info.pWaitSemaphores = nullptr;
|
|
bind_sparse_info.bufferBindCount =
|
|
uint32_t(sparse_buffer_bind_infos_temp_.size());
|
|
bind_sparse_info.pBufferBinds =
|
|
!sparse_buffer_bind_infos_temp_.empty()
|
|
? sparse_buffer_bind_infos_temp_.data()
|
|
: nullptr;
|
|
bind_sparse_info.imageOpaqueBindCount = 0;
|
|
bind_sparse_info.pImageOpaqueBinds = nullptr;
|
|
bind_sparse_info.imageBindCount = 0;
|
|
bind_sparse_info.pImageBinds = 0;
|
|
bind_sparse_info.signalSemaphoreCount = 1;
|
|
bind_sparse_info.pSignalSemaphores = &bind_sparse_semaphore;
|
|
VkResult bind_sparse_result;
|
|
{
|
|
ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition(
|
|
provider.AcquireQueue(provider.queue_family_sparse_binding(), 0));
|
|
bind_sparse_result = dfn.vkQueueBindSparse(
|
|
queue_acquisition.queue, 1, &bind_sparse_info, VK_NULL_HANDLE);
|
|
}
|
|
if (bind_sparse_result != VK_SUCCESS) {
|
|
XELOGE("Failed to submit Vulkan sparse binds");
|
|
return false;
|
|
}
|
|
current_submission_wait_semaphores_.push_back(bind_sparse_semaphore);
|
|
semaphores_free_.pop_back();
|
|
current_submission_wait_stage_masks_.push_back(
|
|
sparse_bind_wait_stage_mask_);
|
|
sparse_bind_wait_stage_mask_ = 0;
|
|
sparse_buffer_binds_.clear();
|
|
sparse_memory_binds_.clear();
|
|
}
|
|
|
|
SubmitBarriers(true);
|
|
|
|
assert_false(command_buffers_writable_.empty());
|
|
CommandBuffer command_buffer = command_buffers_writable_.back();
|
|
if (dfn.vkResetCommandPool(device, command_buffer.pool, 0) != VK_SUCCESS) {
|
|
XELOGE("Failed to reset a Vulkan command pool");
|
|
return false;
|
|
}
|
|
VkCommandBufferBeginInfo command_buffer_begin_info;
|
|
command_buffer_begin_info.sType =
|
|
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
|
command_buffer_begin_info.pNext = nullptr;
|
|
command_buffer_begin_info.flags =
|
|
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
|
command_buffer_begin_info.pInheritanceInfo = nullptr;
|
|
if (dfn.vkBeginCommandBuffer(command_buffer.buffer,
|
|
&command_buffer_begin_info) != VK_SUCCESS) {
|
|
XELOGE("Failed to begin a Vulkan command buffer");
|
|
return false;
|
|
}
|
|
deferred_command_buffer_.Execute(command_buffer.buffer);
|
|
if (dfn.vkEndCommandBuffer(command_buffer.buffer) != VK_SUCCESS) {
|
|
XELOGE("Failed to end a Vulkan command buffer");
|
|
return false;
|
|
}
|
|
|
|
VkSubmitInfo submit_info;
|
|
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
|
submit_info.pNext = nullptr;
|
|
if (!current_submission_wait_semaphores_.empty()) {
|
|
submit_info.waitSemaphoreCount =
|
|
uint32_t(current_submission_wait_semaphores_.size());
|
|
submit_info.pWaitSemaphores = current_submission_wait_semaphores_.data();
|
|
submit_info.pWaitDstStageMask =
|
|
current_submission_wait_stage_masks_.data();
|
|
} else {
|
|
submit_info.waitSemaphoreCount = 0;
|
|
submit_info.pWaitSemaphores = nullptr;
|
|
submit_info.pWaitDstStageMask = nullptr;
|
|
}
|
|
submit_info.commandBufferCount = 1;
|
|
submit_info.pCommandBuffers = &command_buffer.buffer;
|
|
submit_info.signalSemaphoreCount = 0;
|
|
submit_info.pSignalSemaphores = nullptr;
|
|
assert_false(fences_free_.empty());
|
|
VkFence fence = fences_free_.back();
|
|
if (dfn.vkResetFences(device, 1, &fence) != VK_SUCCESS) {
|
|
XELOGE("Failed to reset a Vulkan submission fence");
|
|
return false;
|
|
}
|
|
VkResult submit_result;
|
|
{
|
|
ui::vulkan::VulkanProvider::QueueAcquisition queue_acquisition(
|
|
provider.AcquireQueue(provider.queue_family_graphics_compute(), 0));
|
|
submit_result =
|
|
dfn.vkQueueSubmit(queue_acquisition.queue, 1, &submit_info, fence);
|
|
}
|
|
if (submit_result != VK_SUCCESS) {
|
|
XELOGE("Failed to submit a Vulkan command buffer");
|
|
if (submit_result == VK_ERROR_DEVICE_LOST && !device_lost_) {
|
|
device_lost_ = true;
|
|
graphics_system_->OnHostGpuLossFromAnyThread(true);
|
|
}
|
|
return false;
|
|
}
|
|
uint64_t submission_current = GetCurrentSubmission();
|
|
current_submission_wait_stage_masks_.clear();
|
|
for (VkSemaphore semaphore : current_submission_wait_semaphores_) {
|
|
submissions_in_flight_semaphores_.emplace_back(submission_current,
|
|
semaphore);
|
|
}
|
|
current_submission_wait_semaphores_.clear();
|
|
command_buffers_submitted_.emplace_back(submission_current, command_buffer);
|
|
command_buffers_writable_.pop_back();
|
|
// Increments the current submission number, going to the next submission.
|
|
submissions_in_flight_fences_.push_back(fence);
|
|
fences_free_.pop_back();
|
|
|
|
submission_open_ = false;
|
|
}
|
|
|
|
if (is_closing_frame) {
|
|
frame_open_ = false;
|
|
// Submission already closed now, so minus 1.
|
|
closed_frame_submissions_[(frame_current_++) % kMaxFramesInFlight] =
|
|
GetCurrentSubmission() - 1;
|
|
|
|
if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) {
|
|
cache_clear_requested_ = false;
|
|
|
|
assert_true(command_buffers_submitted_.empty());
|
|
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
|
|
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
|
|
}
|
|
command_buffers_writable_.clear();
|
|
|
|
uniform_buffer_pool_->ClearCache();
|
|
transient_descriptor_pool_uniform_buffers_->ClearCache();
|
|
|
|
pipeline_cache_->ClearCache();
|
|
|
|
render_target_cache_->ClearCache();
|
|
|
|
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
|
|
dfn.vkDestroyPipelineLayout(
|
|
device, pipeline_layout_pair.second.GetPipelineLayout(), nullptr);
|
|
}
|
|
pipeline_layouts_.clear();
|
|
for (const auto& descriptor_set_layout_pair :
|
|
descriptor_set_layouts_textures_) {
|
|
dfn.vkDestroyDescriptorSetLayout(
|
|
device, descriptor_set_layout_pair.second, nullptr);
|
|
}
|
|
descriptor_set_layouts_textures_.clear();
|
|
|
|
primitive_processor_->ClearCache();
|
|
|
|
for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) {
|
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device,
|
|
swap_framebuffer.framebuffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void VulkanCommandProcessor::SplitPendingBarrier() {
|
|
size_t pending_buffer_memory_barrier_count =
|
|
pending_barriers_buffer_memory_barriers_.size();
|
|
size_t pending_image_memory_barrier_count =
|
|
pending_barriers_image_memory_barriers_.size();
|
|
if (!current_pending_barrier_.src_stage_mask &&
|
|
!current_pending_barrier_.dst_stage_mask &&
|
|
current_pending_barrier_.buffer_memory_barriers_offset >=
|
|
pending_buffer_memory_barrier_count &&
|
|
current_pending_barrier_.image_memory_barriers_offset >=
|
|
pending_image_memory_barrier_count) {
|
|
return;
|
|
}
|
|
pending_barriers_.emplace_back(current_pending_barrier_);
|
|
current_pending_barrier_.src_stage_mask = 0;
|
|
current_pending_barrier_.dst_stage_mask = 0;
|
|
current_pending_barrier_.buffer_memory_barriers_offset =
|
|
pending_buffer_memory_barrier_count;
|
|
current_pending_barrier_.image_memory_barriers_offset =
|
|
pending_image_memory_barrier_count;
|
|
}
|
|
|
|
void VulkanCommandProcessor::UpdateDynamicState(
|
|
const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal,
|
|
reg::RB_DEPTHCONTROL normalized_depth_control) {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
// Window parameters.
|
|
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
|
// See r200UpdateWindow:
|
|
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
|
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
|
|
|
// Viewport.
|
|
VkViewport viewport;
|
|
if (viewport_info.xy_extent[0] && viewport_info.xy_extent[1]) {
|
|
viewport.x = float(viewport_info.xy_offset[0]);
|
|
viewport.y = float(viewport_info.xy_offset[1]);
|
|
viewport.width = float(viewport_info.xy_extent[0]);
|
|
viewport.height = float(viewport_info.xy_extent[1]);
|
|
} else {
|
|
// Vulkan viewport width must be greater than 0.0f, but the Xenia viewport
|
|
// may be empty for various reasons - set the viewport to outside the
|
|
// framebuffer.
|
|
viewport.x = -1.0f;
|
|
viewport.y = -1.0f;
|
|
viewport.width = 1.0f;
|
|
viewport.height = 1.0f;
|
|
}
|
|
viewport.minDepth = viewport_info.z_min;
|
|
viewport.maxDepth = viewport_info.z_max;
|
|
SetViewport(viewport);
|
|
|
|
// Scissor.
|
|
draw_util::Scissor scissor;
|
|
draw_util::GetScissor(regs, scissor);
|
|
VkRect2D scissor_rect;
|
|
scissor_rect.offset.x = int32_t(scissor.offset[0]);
|
|
scissor_rect.offset.y = int32_t(scissor.offset[1]);
|
|
scissor_rect.extent.width = scissor.extent[0];
|
|
scissor_rect.extent.height = scissor.extent[1];
|
|
SetScissor(scissor_rect);
|
|
|
|
// Depth bias.
|
|
// TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB
|
|
// implementation.
|
|
float depth_bias_constant_factor, depth_bias_slope_factor;
|
|
draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal,
|
|
depth_bias_slope_factor,
|
|
depth_bias_constant_factor);
|
|
depth_bias_constant_factor *= draw_util::GetD3D10PolygonOffsetFactor(
|
|
regs.Get<reg::RB_DEPTH_INFO>().depth_format, true);
|
|
// With non-square resolution scaling, make sure the worst-case impact is
|
|
// reverted (slope only along the scaled axis), thus max. More bias is better
|
|
// than less bias, because less bias means Z fighting with the background is
|
|
// more likely.
|
|
depth_bias_slope_factor *=
|
|
xenos::kPolygonOffsetScaleSubpixelUnit *
|
|
float(std::max(render_target_cache_->draw_resolution_scale_x(),
|
|
render_target_cache_->draw_resolution_scale_y()));
|
|
// std::memcmp instead of != so in case of NaN, every draw won't be
|
|
// invalidating it.
|
|
dynamic_depth_bias_update_needed_ |=
|
|
std::memcmp(&dynamic_depth_bias_constant_factor_,
|
|
&depth_bias_constant_factor, sizeof(float)) != 0;
|
|
dynamic_depth_bias_update_needed_ |=
|
|
std::memcmp(&dynamic_depth_bias_slope_factor_, &depth_bias_slope_factor,
|
|
sizeof(float)) != 0;
|
|
if (dynamic_depth_bias_update_needed_) {
|
|
dynamic_depth_bias_constant_factor_ = depth_bias_constant_factor;
|
|
dynamic_depth_bias_slope_factor_ = depth_bias_slope_factor;
|
|
deferred_command_buffer_.CmdVkSetDepthBias(
|
|
dynamic_depth_bias_constant_factor_, 0.0f,
|
|
dynamic_depth_bias_slope_factor_);
|
|
dynamic_depth_bias_update_needed_ = false;
|
|
}
|
|
|
|
// Blend constants.
|
|
float blend_constants[] = {
|
|
regs[XE_GPU_REG_RB_BLEND_RED].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32,
|
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32,
|
|
};
|
|
dynamic_blend_constants_update_needed_ |=
|
|
std::memcmp(dynamic_blend_constants_, blend_constants,
|
|
sizeof(float) * 4) != 0;
|
|
if (dynamic_blend_constants_update_needed_) {
|
|
std::memcpy(dynamic_blend_constants_, blend_constants, sizeof(float) * 4);
|
|
deferred_command_buffer_.CmdVkSetBlendConstants(dynamic_blend_constants_);
|
|
dynamic_blend_constants_update_needed_ = false;
|
|
}
|
|
|
|
// Stencil masks and references.
|
|
// Due to pretty complex conditions involving registers not directly related
|
|
// to stencil (primitive type, culling), changing the values only when stencil
|
|
// is actually needed. However, due to the way dynamic state needs to be set
|
|
// in Vulkan, which doesn't take into account whether the state actually has
|
|
// effect on drawing, and because the masks and the references are always
|
|
// dynamic in Xenia guest pipelines, they must be set in the command buffer
|
|
// before any draw.
|
|
if (normalized_depth_control.stencil_enable) {
|
|
Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg;
|
|
if (primitive_polygonal && normalized_depth_control.backface_enable) {
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
|
|
device_portability_subset_features =
|
|
provider.device_portability_subset_features();
|
|
if (!device_portability_subset_features ||
|
|
device_portability_subset_features->separateStencilMaskRef) {
|
|
// Choose the back face values only if drawing only back faces.
|
|
stencil_ref_mask_front_reg =
|
|
regs.Get<reg::PA_SU_SC_MODE_CNTL>().cull_front
|
|
? XE_GPU_REG_RB_STENCILREFMASK_BF
|
|
: XE_GPU_REG_RB_STENCILREFMASK;
|
|
stencil_ref_mask_back_reg = stencil_ref_mask_front_reg;
|
|
} else {
|
|
stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
|
stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK_BF;
|
|
}
|
|
} else {
|
|
stencil_ref_mask_front_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
|
stencil_ref_mask_back_reg = XE_GPU_REG_RB_STENCILREFMASK;
|
|
}
|
|
auto stencil_ref_mask_front =
|
|
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_front_reg);
|
|
auto stencil_ref_mask_back =
|
|
regs.Get<reg::RB_STENCILREFMASK>(stencil_ref_mask_back_reg);
|
|
// Compare mask.
|
|
dynamic_stencil_compare_mask_front_update_needed_ |=
|
|
dynamic_stencil_compare_mask_front_ !=
|
|
stencil_ref_mask_front.stencilmask;
|
|
dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask;
|
|
dynamic_stencil_compare_mask_back_update_needed_ |=
|
|
dynamic_stencil_compare_mask_back_ != stencil_ref_mask_back.stencilmask;
|
|
dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask;
|
|
// Write mask.
|
|
dynamic_stencil_write_mask_front_update_needed_ |=
|
|
dynamic_stencil_write_mask_front_ !=
|
|
stencil_ref_mask_front.stencilwritemask;
|
|
dynamic_stencil_write_mask_front_ = stencil_ref_mask_front.stencilwritemask;
|
|
dynamic_stencil_write_mask_back_update_needed_ |=
|
|
dynamic_stencil_write_mask_back_ !=
|
|
stencil_ref_mask_back.stencilwritemask;
|
|
dynamic_stencil_write_mask_back_ = stencil_ref_mask_back.stencilwritemask;
|
|
// Reference.
|
|
dynamic_stencil_reference_front_update_needed_ |=
|
|
dynamic_stencil_reference_front_ != stencil_ref_mask_front.stencilref;
|
|
dynamic_stencil_reference_front_ = stencil_ref_mask_front.stencilref;
|
|
dynamic_stencil_reference_back_update_needed_ |=
|
|
dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref;
|
|
dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref;
|
|
}
|
|
// Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on the
|
|
// Vulkan portability subset without separateStencilMaskRef.
|
|
if (dynamic_stencil_compare_mask_front_update_needed_ ||
|
|
dynamic_stencil_compare_mask_back_update_needed_) {
|
|
if (dynamic_stencil_compare_mask_front_ ==
|
|
dynamic_stencil_compare_mask_back_) {
|
|
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
|
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_compare_mask_front_);
|
|
} else {
|
|
if (dynamic_stencil_compare_mask_front_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
|
VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_compare_mask_front_);
|
|
}
|
|
if (dynamic_stencil_compare_mask_back_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
|
VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_compare_mask_back_);
|
|
}
|
|
}
|
|
dynamic_stencil_compare_mask_front_update_needed_ = false;
|
|
dynamic_stencil_compare_mask_back_update_needed_ = false;
|
|
}
|
|
if (dynamic_stencil_write_mask_front_update_needed_ ||
|
|
dynamic_stencil_write_mask_back_update_needed_) {
|
|
if (dynamic_stencil_write_mask_front_ == dynamic_stencil_write_mask_back_) {
|
|
deferred_command_buffer_.CmdVkSetStencilWriteMask(
|
|
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_);
|
|
} else {
|
|
if (dynamic_stencil_write_mask_front_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilWriteMask(
|
|
VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_write_mask_front_);
|
|
}
|
|
if (dynamic_stencil_write_mask_back_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilWriteMask(
|
|
VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_write_mask_back_);
|
|
}
|
|
}
|
|
dynamic_stencil_write_mask_front_update_needed_ = false;
|
|
dynamic_stencil_write_mask_back_update_needed_ = false;
|
|
}
|
|
if (dynamic_stencil_reference_front_update_needed_ ||
|
|
dynamic_stencil_reference_back_update_needed_) {
|
|
if (dynamic_stencil_reference_front_ == dynamic_stencil_reference_back_) {
|
|
deferred_command_buffer_.CmdVkSetStencilReference(
|
|
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_reference_front_);
|
|
} else {
|
|
if (dynamic_stencil_reference_front_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilReference(
|
|
VK_STENCIL_FACE_FRONT_BIT, dynamic_stencil_reference_front_);
|
|
}
|
|
if (dynamic_stencil_reference_back_update_needed_) {
|
|
deferred_command_buffer_.CmdVkSetStencilReference(
|
|
VK_STENCIL_FACE_BACK_BIT, dynamic_stencil_reference_back_);
|
|
}
|
|
}
|
|
dynamic_stencil_reference_front_update_needed_ = false;
|
|
dynamic_stencil_reference_back_update_needed_ = false;
|
|
}
|
|
|
|
// TODO(Triang3l): VK_EXT_extended_dynamic_state and
|
|
// VK_EXT_extended_dynamic_state2.
|
|
}
|
|
|
|
void VulkanCommandProcessor::UpdateSystemConstantValues(
|
|
xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info) {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
const RegisterFile& regs = *register_file_;
|
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
|
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
|
|
|
bool dirty = false;
|
|
|
|
// Flags.
|
|
uint32_t flags = 0;
|
|
// W0 division control.
|
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
|
// = false: multiply the X, Y coordinates by 1/W0.
|
|
// 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
|
// = false: multiply the Z coordinate by 1/W0.
|
|
// 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal
|
|
// to get 1/W0.
|
|
if (pa_cl_vte_cntl.vtx_xy_fmt) {
|
|
flags |= SpirvShaderTranslator::kSysFlag_XYDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_z_fmt) {
|
|
flags |= SpirvShaderTranslator::kSysFlag_ZDividedByW;
|
|
}
|
|
if (pa_cl_vte_cntl.vtx_w0_fmt) {
|
|
flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal;
|
|
}
|
|
dirty |= system_constants_.flags != flags;
|
|
system_constants_.flags = flags;
|
|
|
|
// Index or tessellation edge factor buffer endianness.
|
|
dirty |= system_constants_.vertex_index_endian != index_endian;
|
|
system_constants_.vertex_index_endian = index_endian;
|
|
|
|
// Vertex index offset.
|
|
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
|
system_constants_.vertex_base_index = vgt_indx_offset;
|
|
|
|
// Conversion to host normalized device coordinates.
|
|
for (uint32_t i = 0; i < 3; ++i) {
|
|
dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i];
|
|
dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i];
|
|
system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i];
|
|
system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i];
|
|
}
|
|
|
|
if (dirty) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants);
|
|
}
|
|
}
|
|
|
|
bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
|
const VulkanShader* pixel_shader) {
|
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
SCOPE_profile_cpu_f("gpu");
|
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
|
|
|
const RegisterFile& regs = *register_file_;
|
|
|
|
// Invalidate descriptors for changed data.
|
|
// These are the constant base addresses/ranges for shaders.
|
|
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
|
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
|
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
|
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
|
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
|
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
|
// Check if the float constant layout is still the same and get the counts.
|
|
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
|
vertex_shader->constant_register_map();
|
|
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_vertex_[i] !=
|
|
float_constant_map_vertex.float_bitmap[i]) {
|
|
current_float_constant_map_vertex_[i] =
|
|
float_constant_map_vertex.float_bitmap[i];
|
|
// If no float constants at all, any buffer can be reused for them, so not
|
|
// invalidating.
|
|
if (float_constant_count_vertex) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(
|
|
uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
|
|
}
|
|
}
|
|
}
|
|
uint32_t float_constant_count_pixel = 0;
|
|
if (pixel_shader != nullptr) {
|
|
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
|
pixel_shader->constant_register_map();
|
|
float_constant_count_pixel = float_constant_map_pixel.float_count;
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
if (current_float_constant_map_pixel_[i] !=
|
|
float_constant_map_pixel.float_bitmap[i]) {
|
|
current_float_constant_map_pixel_[i] =
|
|
float_constant_map_pixel.float_bitmap[i];
|
|
if (float_constant_count_pixel) {
|
|
current_graphics_descriptor_set_values_up_to_date_ &=
|
|
~(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
std::memset(current_float_constant_map_pixel_, 0,
|
|
sizeof(current_float_constant_map_pixel_));
|
|
}
|
|
|
|
// Make sure new descriptor sets are bound to the command buffer.
|
|
current_graphics_descriptor_sets_bound_up_to_date_ &=
|
|
current_graphics_descriptor_set_values_up_to_date_;
|
|
|
|
// Write the new descriptor sets.
|
|
VkWriteDescriptorSet
|
|
write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount];
|
|
uint32_t write_descriptor_set_count = 0;
|
|
uint32_t write_descriptor_set_bits = 0;
|
|
assert_not_zero(
|
|
current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram));
|
|
VkDescriptorBufferInfo buffer_info_bool_loop_constants;
|
|
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) {
|
|
VkWriteDescriptorSet& write_bool_loop_constants =
|
|
write_descriptor_sets[write_descriptor_set_count++];
|
|
constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32);
|
|
uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding(
|
|
kBoolLoopConstantsSize,
|
|
descriptor_set_layout_fetch_bool_loop_constants_,
|
|
buffer_info_bool_loop_constants, write_bool_loop_constants);
|
|
if (!mapping_bool_loop_constants) {
|
|
return false;
|
|
}
|
|
std::memcpy(mapping_bool_loop_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
|
kBoolLoopConstantsSize);
|
|
write_descriptor_set_bits |=
|
|
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants;
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
|
|
write_bool_loop_constants.dstSet;
|
|
}
|
|
VkDescriptorBufferInfo buffer_info_system_constants;
|
|
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetSystemConstants))) {
|
|
VkWriteDescriptorSet& write_system_constants =
|
|
write_descriptor_sets[write_descriptor_set_count++];
|
|
uint8_t* mapping_system_constants = WriteUniformBufferBinding(
|
|
sizeof(SpirvShaderTranslator::SystemConstants),
|
|
descriptor_set_layout_system_constants_, buffer_info_system_constants,
|
|
write_system_constants);
|
|
if (!mapping_system_constants) {
|
|
return false;
|
|
}
|
|
std::memcpy(mapping_system_constants, &system_constants_,
|
|
sizeof(SpirvShaderTranslator::SystemConstants));
|
|
write_descriptor_set_bits |=
|
|
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants;
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
|
|
write_system_constants.dstSet;
|
|
}
|
|
VkDescriptorBufferInfo buffer_info_float_constant_pixel;
|
|
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) {
|
|
// Even if the shader doesn't need any float constants, a valid binding must
|
|
// still be provided (the pipeline layout always has float constants, for
|
|
// both the vertex shader and the pixel shader), so if the first draw in the
|
|
// frame doesn't have float constants at all, still allocate an empty
|
|
// buffer.
|
|
VkWriteDescriptorSet& write_float_constants_pixel =
|
|
write_descriptor_sets[write_descriptor_set_count++];
|
|
uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding(
|
|
sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)),
|
|
descriptor_set_layout_float_constants_pixel_,
|
|
buffer_info_float_constant_pixel, write_float_constants_pixel);
|
|
if (!mapping_float_constants_pixel) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(mapping_float_constants_pixel,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
sizeof(float) * 4);
|
|
mapping_float_constants_pixel += sizeof(float) * 4;
|
|
}
|
|
}
|
|
write_descriptor_set_bits |=
|
|
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel;
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
|
|
write_float_constants_pixel.dstSet;
|
|
}
|
|
VkDescriptorBufferInfo buffer_info_float_constant_vertex;
|
|
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) {
|
|
VkWriteDescriptorSet& write_float_constants_vertex =
|
|
write_descriptor_sets[write_descriptor_set_count++];
|
|
uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding(
|
|
sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)),
|
|
descriptor_set_layout_float_constants_vertex_,
|
|
buffer_info_float_constant_vertex, write_float_constants_vertex);
|
|
if (!mapping_float_constants_vertex) {
|
|
return false;
|
|
}
|
|
for (uint32_t i = 0; i < 4; ++i) {
|
|
uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i];
|
|
uint32_t float_constant_index;
|
|
while (xe::bit_scan_forward(float_constant_map_entry,
|
|
&float_constant_index)) {
|
|
float_constant_map_entry &= ~(1ull << float_constant_index);
|
|
std::memcpy(mapping_float_constants_vertex,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
|
(float_constant_index << 2)]
|
|
.f32,
|
|
sizeof(float) * 4);
|
|
mapping_float_constants_vertex += sizeof(float) * 4;
|
|
}
|
|
}
|
|
write_descriptor_set_bits |=
|
|
uint32_t(1)
|
|
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex;
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
|
|
write_float_constants_vertex.dstSet;
|
|
}
|
|
VkDescriptorBufferInfo buffer_info_fetch_constants;
|
|
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
|
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) {
|
|
VkWriteDescriptorSet& write_fetch_constants =
|
|
write_descriptor_sets[write_descriptor_set_count++];
|
|
constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32;
|
|
uint8_t* mapping_fetch_constants = WriteUniformBufferBinding(
|
|
kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_,
|
|
buffer_info_fetch_constants, write_fetch_constants);
|
|
if (!mapping_fetch_constants) {
|
|
return false;
|
|
}
|
|
std::memcpy(mapping_fetch_constants,
|
|
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
|
kFetchConstantsSize);
|
|
write_descriptor_set_bits |=
|
|
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants;
|
|
current_graphics_descriptor_sets_
|
|
[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
|
|
write_fetch_constants.dstSet;
|
|
}
|
|
if (write_descriptor_set_count) {
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
|
VkDevice device = provider.device();
|
|
dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count,
|
|
write_descriptor_sets, 0, nullptr);
|
|
}
|
|
// Only make valid if written successfully.
|
|
current_graphics_descriptor_set_values_up_to_date_ |=
|
|
write_descriptor_set_bits;
|
|
|
|
// Bind the new descriptor sets.
|
|
uint32_t descriptor_sets_needed =
|
|
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1;
|
|
if (current_guest_graphics_pipeline_layout_
|
|
->descriptor_set_layout_textures_vertex_ref() ==
|
|
descriptor_set_layout_empty_) {
|
|
descriptor_sets_needed &=
|
|
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex);
|
|
}
|
|
if (current_guest_graphics_pipeline_layout_
|
|
->descriptor_set_layout_textures_pixel_ref() ==
|
|
descriptor_set_layout_empty_) {
|
|
descriptor_sets_needed &=
|
|
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel);
|
|
}
|
|
uint32_t descriptor_sets_remaining =
|
|
descriptor_sets_needed &
|
|
~current_graphics_descriptor_sets_bound_up_to_date_;
|
|
uint32_t descriptor_set_index;
|
|
while (
|
|
xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) {
|
|
uint32_t descriptor_set_mask_tzcnt =
|
|
xe::tzcnt(~(descriptor_sets_remaining |
|
|
((uint32_t(1) << descriptor_set_index) - 1)));
|
|
// TODO(Triang3l): Bind to compute for rectangle list emulation without
|
|
// geometry shaders.
|
|
deferred_command_buffer_.CmdVkBindDescriptorSets(
|
|
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
current_guest_graphics_pipeline_layout_->GetPipelineLayout(),
|
|
descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index,
|
|
current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr);
|
|
if (descriptor_set_mask_tzcnt >= 32) {
|
|
break;
|
|
}
|
|
descriptor_sets_remaining &=
|
|
~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1);
|
|
}
|
|
current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed;
|
|
|
|
return true;
|
|
}
|
|
|
|
uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding(
|
|
size_t size, VkDescriptorSetLayout descriptor_set_layout,
|
|
VkDescriptorBufferInfo& descriptor_buffer_info_out,
|
|
VkWriteDescriptorSet& write_descriptor_set_out) {
|
|
VkDescriptorSet descriptor_set =
|
|
transient_descriptor_pool_uniform_buffers_->Request(
|
|
frame_current_, descriptor_set_layout, 1);
|
|
if (descriptor_set == VK_NULL_HANDLE) {
|
|
return nullptr;
|
|
}
|
|
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
|
|
uint8_t* mapping = uniform_buffer_pool_->Request(
|
|
frame_current_, size,
|
|
size_t(
|
|
provider.device_properties().limits.minUniformBufferOffsetAlignment),
|
|
descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset);
|
|
if (!mapping) {
|
|
return nullptr;
|
|
}
|
|
descriptor_buffer_info_out.range = VkDeviceSize(size);
|
|
write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
write_descriptor_set_out.pNext = nullptr;
|
|
write_descriptor_set_out.dstSet = descriptor_set;
|
|
write_descriptor_set_out.dstBinding = 0;
|
|
write_descriptor_set_out.dstArrayElement = 0;
|
|
write_descriptor_set_out.descriptorCount = 1;
|
|
write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
write_descriptor_set_out.pImageInfo = nullptr;
|
|
write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out;
|
|
write_descriptor_set_out.pTexelBufferView = nullptr;
|
|
return mapping;
|
|
}
|
|
|
|
} // namespace vulkan
|
|
} // namespace gpu
|
|
} // namespace xe
|