pcsx2/common/Vulkan/Context.cpp

2040 lines
73 KiB
C++

/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "common/Vulkan/Context.h"
#include "common/Align.h"
#include "common/Assertions.h"
#include "common/Console.h"
#include "common/General.h"
#include "common/StringUtil.h"
#include "common/Vulkan/ShaderCompiler.h"
#include "common/Vulkan/SwapChain.h"
#include "common/Vulkan/Util.h"
#include <algorithm>
#include <array>
#include <cstring>
#ifdef _WIN32
#include "common/RedtapeWindows.h"
#else
#include <time.h>
#endif
std::unique_ptr<Vulkan::Context> g_vulkan_context;
// Tweakables
enum : u32
{
MAX_DRAW_CALLS_PER_FRAME = 8192,
MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = 2 * MAX_DRAW_CALLS_PER_FRAME,
MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, // assume at least half our draws aren't going to be shuffle/blending
MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME = 4, // Currently used by CAS only
MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME * 2
};
namespace Vulkan
{
Context::Context(VkInstance instance, VkPhysicalDevice physical_device)
: m_instance(instance)
, m_physical_device(physical_device)
{
// Read device physical memory properties, we need it for allocating buffers
vkGetPhysicalDeviceProperties(physical_device, &m_device_properties);
vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties);
// We need this to be at least 32 byte aligned for AVX2 stores.
m_device_properties.limits.minUniformBufferOffsetAlignment =
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.minTexelBufferOffsetAlignment =
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
Common::NextPow2(std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(32)));
m_device_properties.limits.bufferImageGranularity =
std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(32));
}
Context::~Context() = default;
VkInstance Context::CreateVulkanInstance(
const WindowInfo* wi, bool enable_debug_utils, bool enable_validation_layer)
{
ExtensionList enabled_extensions;
if (!SelectInstanceExtensions(&enabled_extensions, wi, enable_debug_utils))
return VK_NULL_HANDLE;
// Remember to manually update this every release. We don't pull in svnrev.h here, because
// it's only the major/minor version, and rebuilding the file every time something else changes
// is unnecessary.
VkApplicationInfo app_info = {};
app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
app_info.pNext = nullptr;
app_info.pApplicationName = "PCSX2";
app_info.applicationVersion = VK_MAKE_VERSION(1, 7, 0);
app_info.pEngineName = "PCSX2";
app_info.engineVersion = VK_MAKE_VERSION(1, 7, 0);
app_info.apiVersion = VK_API_VERSION_1_1;
VkInstanceCreateInfo instance_create_info = {};
instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instance_create_info.pNext = nullptr;
instance_create_info.flags = 0;
instance_create_info.pApplicationInfo = &app_info;
instance_create_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
instance_create_info.ppEnabledExtensionNames = enabled_extensions.data();
instance_create_info.enabledLayerCount = 0;
instance_create_info.ppEnabledLayerNames = nullptr;
// Enable debug layer on debug builds
if (enable_validation_layer)
{
static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"};
instance_create_info.enabledLayerCount = 1;
instance_create_info.ppEnabledLayerNames = layer_names;
}
VkInstance instance;
VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateInstance failed: ");
return nullptr;
}
return instance;
}
bool Context::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo* wi, bool enable_debug_utils)
{
u32 extension_count = 0;
VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: ");
return false;
}
if (extension_count == 0)
{
Console.Error("Vulkan: No extensions supported by instance.");
return false;
}
std::vector<VkExtensionProperties> available_extension_list(extension_count);
res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data());
pxAssert(res == VK_SUCCESS);
auto SupportsExtension = [&](const char* name, bool required) {
if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
[&](const VkExtensionProperties& properties) { return !strcmp(name, properties.extensionName); }) !=
available_extension_list.end())
{
DevCon.WriteLn("Enabling extension: %s", name);
extension_list->push_back(name);
return true;
}
if (required)
Console.Error("Vulkan: Missing required extension %s.", name);
return false;
};
// Common extensions
if (wi && wi->type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true))
return false;
#if defined(VK_USE_PLATFORM_WIN32_KHR)
if (wi && wi->type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true))
return false;
#endif
#if defined(VK_USE_PLATFORM_XLIB_KHR)
if (wi && wi->type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true))
return false;
#endif
#if defined(VK_USE_PLATFORM_WAYLAND_KHR)
if (wi && wi->type == WindowInfo::Type::Wayland &&
!SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true))
return false;
#endif
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
if (wi && wi->type == WindowInfo::Type::Android &&
!SupportsExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true))
return false;
#endif
#if defined(VK_USE_PLATFORM_METAL_EXT)
if (wi && wi->type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true))
return false;
#endif
#if 0
if (wi && wi->type == WindowInfo::Type::Display && !SupportsExtension(VK_KHR_DISPLAY_EXTENSION_NAME, true))
return false;
#endif
// VK_EXT_debug_utils
if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false))
Console.Warning("Vulkan: Debug report requested, but extension is not available.");
return true;
}
Context::GPUList Context::EnumerateGPUs(VkInstance instance)
{
u32 gpu_count = 0;
VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr);
if (res != VK_SUCCESS || gpu_count == 0)
{
LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
return {};
}
GPUList gpus;
gpus.resize(gpu_count);
res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data());
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
return {};
}
return gpus;
}
Context::GPUNameList Context::EnumerateGPUNames(VkInstance instance)
{
u32 gpu_count = 0;
VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr);
if (res != VK_SUCCESS || gpu_count == 0)
{
LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
return {};
}
GPUList gpus;
gpus.resize(gpu_count);
res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data());
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
return {};
}
GPUNameList gpu_names;
gpu_names.reserve(gpu_count);
for (u32 i = 0; i < gpu_count; i++)
{
VkPhysicalDeviceProperties props = {};
vkGetPhysicalDeviceProperties(gpus[i], &props);
std::string gpu_name(props.deviceName);
// handle duplicate adapter names
if (std::any_of(gpu_names.begin(), gpu_names.end(),
[&gpu_name](const std::string& other) { return (gpu_name == other); }))
{
std::string original_adapter_name = std::move(gpu_name);
u32 current_extra = 2;
do
{
gpu_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra);
current_extra++;
} while (std::any_of(gpu_names.begin(), gpu_names.end(),
[&gpu_name](const std::string& other) { return (gpu_name == other); }));
}
gpu_names.push_back(std::move(gpu_name));
}
return gpu_names;
}
bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr<SwapChain>* out_swap_chain,
VkPresentModeKHR preferred_present_mode, bool threaded_presentation, bool enable_debug_utils,
bool enable_validation_layer)
{
pxAssertMsg(!g_vulkan_context, "Has no current context");
if (!Vulkan::LoadVulkanLibrary())
{
Console.Error("Failed to load Vulkan library");
return false;
}
const bool enable_surface = (wi && wi->type != WindowInfo::Type::Surfaceless);
VkInstance instance = CreateVulkanInstance(wi, enable_debug_utils, enable_validation_layer);
if (instance == VK_NULL_HANDLE)
{
if (enable_debug_utils || enable_validation_layer)
{
// Try again without the validation layer.
enable_debug_utils = false;
enable_validation_layer = false;
instance = CreateVulkanInstance(wi, enable_debug_utils, enable_validation_layer);
if (instance == VK_NULL_HANDLE)
{
Vulkan::UnloadVulkanLibrary();
return false;
}
Console.Error("Vulkan validation/debug layers requested but are unavailable. Creating non-debug device.");
}
}
if (!Vulkan::LoadVulkanInstanceFunctions(instance))
{
Console.Error("Failed to load Vulkan instance functions");
vkDestroyInstance(instance, nullptr);
Vulkan::UnloadVulkanLibrary();
return false;
}
GPUList gpus = EnumerateGPUs(instance);
if (gpus.empty())
{
vkDestroyInstance(instance, nullptr);
Vulkan::UnloadVulkanLibrary();
return false;
}
u32 gpu_index = 0;
GPUNameList gpu_names = EnumerateGPUNames(instance);
if (!gpu_name.empty())
{
for (; gpu_index < static_cast<u32>(gpu_names.size()); gpu_index++)
{
Console.WriteLn("GPU %u: %s", static_cast<u32>(gpu_index), gpu_names[gpu_index].c_str());
if (gpu_names[gpu_index] == gpu_name)
break;
}
if (gpu_index == static_cast<u32>(gpu_names.size()))
{
Console.Warning("Requested GPU '%s' not found, using first (%s)", std::string(gpu_name).c_str(),
gpu_names[0].c_str());
gpu_index = 0;
}
}
else
{
Console.WriteLn("No GPU requested, using first (%s)", gpu_names[0].c_str());
}
VkSurfaceKHR surface = VK_NULL_HANDLE;
WindowInfo wi_copy;
if (wi)
wi_copy = *wi;
if (enable_surface &&
(surface = SwapChain::CreateVulkanSurface(instance, gpus[gpu_index], &wi_copy)) == VK_NULL_HANDLE)
{
vkDestroyInstance(instance, nullptr);
Vulkan::UnloadVulkanLibrary();
return false;
}
g_vulkan_context.reset(new Context(instance, gpus[gpu_index]));
if (enable_debug_utils)
g_vulkan_context->EnableDebugUtils();
// Attempt to create the device.
if (!g_vulkan_context->CreateDevice(surface, enable_validation_layer, nullptr, 0, nullptr, 0, nullptr) ||
!g_vulkan_context->CreateAllocator() || !g_vulkan_context->CreateGlobalDescriptorPool() ||
!g_vulkan_context->CreateCommandBuffers() || !g_vulkan_context->CreateTextureStreamBuffer() ||
!g_vulkan_context->InitSpinResources() ||
(enable_surface && (*out_swap_chain = SwapChain::Create(wi_copy, surface, preferred_present_mode)) == nullptr))
{
// Since we are destroying the instance, we're also responsible for destroying the surface.
if (surface != VK_NULL_HANDLE)
vkDestroySurfaceKHR(instance, surface, nullptr);
g_vulkan_context.reset();
return false;
}
if (threaded_presentation)
g_vulkan_context->StartPresentThread();
return true;
}
void Context::Destroy()
{
pxAssertMsg(g_vulkan_context, "Has context");
g_vulkan_context->StopPresentThread();
if (g_vulkan_context->m_device != VK_NULL_HANDLE)
g_vulkan_context->WaitForGPUIdle();
g_vulkan_context->m_texture_upload_buffer.Destroy(false);
g_vulkan_context->DestroySpinResources();
g_vulkan_context->DestroyRenderPassCache();
g_vulkan_context->DestroyGlobalDescriptorPool();
g_vulkan_context->DestroyCommandBuffers();
g_vulkan_context->DestroyAllocator();
if (g_vulkan_context->m_device != VK_NULL_HANDLE)
vkDestroyDevice(g_vulkan_context->m_device, nullptr);
if (g_vulkan_context->m_debug_messenger_callback != VK_NULL_HANDLE)
g_vulkan_context->DisableDebugUtils();
if (g_vulkan_context->m_instance != VK_NULL_HANDLE)
vkDestroyInstance(g_vulkan_context->m_instance, nullptr);
Vulkan::UnloadVulkanLibrary();
g_vulkan_context.reset();
}
bool Context::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface)
{
u32 extension_count = 0;
VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: ");
return false;
}
if (extension_count == 0)
{
Console.Error("Vulkan: No extensions supported by device.");
return false;
}
std::vector<VkExtensionProperties> available_extension_list(extension_count);
res = vkEnumerateDeviceExtensionProperties(
m_physical_device, nullptr, &extension_count, available_extension_list.data());
pxAssert(res == VK_SUCCESS);
auto SupportsExtension = [&](const char* name, bool required) {
if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
[&](const VkExtensionProperties& properties) { return !strcmp(name, properties.extensionName); }) !=
available_extension_list.end())
{
if (std::none_of(extension_list->begin(), extension_list->end(),
[&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); }))
{
DevCon.WriteLn("Enabling extension: %s", name);
extension_list->push_back(name);
}
return true;
}
if (required)
Console.Error("Vulkan: Missing required extension %s.", name);
return false;
};
if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true))
return false;
m_optional_extensions.vk_ext_provoking_vertex =
SupportsExtension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
m_optional_extensions.vk_ext_memory_budget =
SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false);
m_optional_extensions.vk_ext_calibrated_timestamps =
SupportsExtension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_driver_properties =
SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false);
m_optional_extensions.vk_arm_rasterization_order_attachment_access =
SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
m_optional_extensions.vk_khr_fragment_shader_barycentric =
SupportsExtension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, false);
return true;
}
bool Context::SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_features)
{
VkPhysicalDeviceFeatures available_features;
vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);
if (required_features)
std::memcpy(&m_device_features, required_features, sizeof(m_device_features));
// Enable the features we use.
m_device_features.dualSrcBlend = available_features.dualSrcBlend;
m_device_features.geometryShader = available_features.geometryShader;
m_device_features.largePoints = available_features.largePoints;
m_device_features.wideLines = available_features.wideLines;
m_device_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
m_device_features.textureCompressionBC = available_features.textureCompressionBC;
m_device_features.samplerAnisotropy = available_features.samplerAnisotropy;
return true;
}
bool Context::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer,
const char** required_device_extensions, u32 num_required_device_extensions,
const char** required_device_layers, u32 num_required_device_layers,
const VkPhysicalDeviceFeatures* required_features)
{
u32 queue_family_count;
vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr);
if (queue_family_count == 0)
{
Console.Error("No queue families found on specified vulkan physical device.");
return false;
}
std::vector<VkQueueFamilyProperties> queue_family_properties(queue_family_count);
vkGetPhysicalDeviceQueueFamilyProperties(
m_physical_device, &queue_family_count, queue_family_properties.data());
Console.WriteLn("%u vulkan queue families", queue_family_count);
// Find graphics and present queues.
m_graphics_queue_family_index = queue_family_count;
m_present_queue_family_index = queue_family_count;
m_spin_queue_family_index = queue_family_count;
u32 spin_queue_index = 0;
for (uint32_t i = 0; i < queue_family_count; i++)
{
VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
if (graphics_supported)
{
m_graphics_queue_family_index = i;
// Quit now, no need for a present queue.
if (!surface)
{
break;
}
}
if (surface)
{
VkBool32 present_supported;
VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
return false;
}
if (present_supported)
{
m_present_queue_family_index = i;
}
// Prefer one queue family index that does both graphics and present.
if (graphics_supported && present_supported)
{
break;
}
}
}
for (uint32_t i = 0; i < queue_family_count; i++)
{
// Pick a queue for spinning
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
continue; // We need compute
if (queue_family_properties[i].timestampValidBits == 0)
continue; // We need timing
const bool queue_is_used = i == m_graphics_queue_family_index || i == m_present_queue_family_index;
if (queue_is_used && m_spin_queue_family_index != queue_family_count)
continue; // Found a non-graphics queue to use
spin_queue_index = 0;
m_spin_queue_family_index = i;
if (queue_is_used && queue_family_properties[i].queueCount > 1)
spin_queue_index = 1;
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
break; // Async compute queue, definitely pick this one
}
if (m_graphics_queue_family_index == queue_family_count)
{
Console.Error("Vulkan: Failed to find an acceptable graphics queue.");
return false;
}
if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count)
{
Console.Error("Vulkan: Failed to find an acceptable present queue.");
return false;
}
VkDeviceCreateInfo device_info = {};
device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
device_info.pNext = nullptr;
device_info.flags = 0;
device_info.queueCreateInfoCount = 0;
static constexpr float queue_priorities[] = {1.0f, 0.0f}; // Low priority for the spin queue
std::array<VkDeviceQueueCreateInfo, 3> queue_infos;
VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
graphics_queue_info.pNext = nullptr;
graphics_queue_info.flags = 0;
graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index;
graphics_queue_info.queueCount = 1;
graphics_queue_info.pQueuePriorities = queue_priorities;
if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
{
VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++];
present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
present_queue_info.pNext = nullptr;
present_queue_info.flags = 0;
present_queue_info.queueFamilyIndex = m_present_queue_family_index;
present_queue_info.queueCount = 1;
present_queue_info.pQueuePriorities = queue_priorities;
}
if (m_spin_queue_family_index == m_graphics_queue_family_index)
{
if (spin_queue_index != 0)
graphics_queue_info.queueCount = 2;
}
else if (m_spin_queue_family_index == m_present_queue_family_index)
{
if (spin_queue_index != 0)
queue_infos[1].queueCount = 2; // present queue
}
else
{
VkDeviceQueueCreateInfo& spin_queue_info = queue_infos[device_info.queueCreateInfoCount++];
spin_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
spin_queue_info.pNext = nullptr;
spin_queue_info.flags = 0;
spin_queue_info.queueFamilyIndex = m_spin_queue_family_index;
spin_queue_info.queueCount = 1;
spin_queue_info.pQueuePriorities = queue_priorities + 1;
}
device_info.pQueueCreateInfos = queue_infos.data();
ExtensionList enabled_extensions;
for (u32 i = 0; i < num_required_device_extensions; i++)
enabled_extensions.emplace_back(required_device_extensions[i]);
if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE))
return false;
device_info.enabledLayerCount = num_required_device_layers;
device_info.ppEnabledLayerNames = required_device_layers;
device_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
device_info.ppEnabledExtensionNames = enabled_extensions.data();
// Check for required features before creating.
if (!SelectDeviceFeatures(required_features))
return false;
device_info.pEnabledFeatures = &m_device_features;
// Enable debug layer on debug builds
if (enable_validation_layer)
{
static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"};
device_info.enabledLayerCount = 1;
device_info.ppEnabledLayerNames = layer_names;
}
// provoking vertex
VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT};
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM rasterization_order_access_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM};
if (m_optional_extensions.vk_ext_provoking_vertex)
{
provoking_vertex_feature.provokingVertexLast = VK_TRUE;
Util::AddPointerToChain(&device_info, &provoking_vertex_feature);
}
if (m_optional_extensions.vk_arm_rasterization_order_attachment_access)
{
rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess = VK_TRUE;
Util::AddPointerToChain(&device_info, &rasterization_order_access_feature);
}
VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDevice failed: ");
return false;
}
// With the device created, we can fill the remaining entry points.
if (!LoadVulkanDeviceFunctions(m_device))
return false;
// Grab the graphics and present queues.
vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue);
if (surface)
{
vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
}
m_spinning_supported = m_spin_queue_family_index != queue_family_count &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
m_device_properties.limits.timestampPeriod > 0;
m_spin_queue_is_graphics_queue = m_spin_queue_family_index == m_graphics_queue_family_index && spin_queue_index == 0;
m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
m_device_properties.limits.timestampPeriod > 0);
DevCon.WriteLn("GPU timing is %s (TS=%u TS valid bits=%u, TS period=%f)",
m_gpu_timing_supported ? "supported" : "not supported",
static_cast<u32>(m_device_properties.limits.timestampComputeAndGraphics),
queue_family_properties[m_graphics_queue_family_index].timestampValidBits,
m_device_properties.limits.timestampPeriod);
ProcessDeviceExtensions();
if (m_spinning_supported)
{
vkGetDeviceQueue(m_device, m_spin_queue_family_index, spin_queue_index, &m_spin_queue);
m_spin_timestamp_scale = m_device_properties.limits.timestampPeriod;
if (m_optional_extensions.vk_ext_calibrated_timestamps)
{
#ifdef _WIN32
LARGE_INTEGER Freq;
QueryPerformanceFrequency(&Freq);
m_queryperfcounter_to_ns = 1000000000.0 / static_cast < double > (Freq.QuadPart);
#endif
CalibrateSpinTimestamp();
}
}
return true;
}
void Context::ProcessDeviceExtensions()
{
// advanced feature checks
VkPhysicalDeviceFeatures2 features2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT};
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM rasterization_order_access_feature = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM};
// add in optional feature structs
if (m_optional_extensions.vk_ext_provoking_vertex)
Util::AddPointerToChain(&features2, &provoking_vertex_features);
if (m_optional_extensions.vk_arm_rasterization_order_attachment_access)
Util::AddPointerToChain(&features2, &rasterization_order_access_feature);
// query
vkGetPhysicalDeviceFeatures2(m_physical_device, &features2);
// confirm we actually support it
m_optional_extensions.vk_ext_provoking_vertex &= (provoking_vertex_features.provokingVertexLast == VK_TRUE);
m_optional_extensions.vk_arm_rasterization_order_attachment_access &= (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE);
VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
void** pNext = &properties2.pNext;
if (m_optional_extensions.vk_khr_driver_properties)
{
m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
*pNext = &m_device_driver_properties;
pNext = &m_device_driver_properties.pNext;
}
// query
vkGetPhysicalDeviceProperties2(m_physical_device, &properties2);
// VK_EXT_calibrated_timestamps checking
if (m_optional_extensions.vk_ext_calibrated_timestamps)
{
u32 count = 0;
vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(m_physical_device, &count, nullptr);
std::unique_ptr<VkTimeDomainEXT[]> time_domains = std::make_unique<VkTimeDomainEXT[]>(count);
vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(m_physical_device, &count, time_domains.get());
const VkTimeDomainEXT* begin = &time_domains[0];
const VkTimeDomainEXT* end = &time_domains[count];
if (std::find(begin, end, VK_TIME_DOMAIN_DEVICE_EXT) == end)
m_optional_extensions.vk_ext_calibrated_timestamps = false;
VkTimeDomainEXT preferred_types[] = {
#ifdef _WIN32
VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
#else
#ifdef CLOCK_MONOTONIC_RAW
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
#endif
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
#endif
};
m_calibrated_timestamp_type = VK_TIME_DOMAIN_DEVICE_EXT;
for (VkTimeDomainEXT type : preferred_types)
{
if (std::find(begin, end, type) != end)
{
m_calibrated_timestamp_type = type;
break;
}
}
if (m_calibrated_timestamp_type == VK_TIME_DOMAIN_DEVICE_EXT)
m_optional_extensions.vk_ext_calibrated_timestamps = false;
}
Console.WriteLn("VK_EXT_provoking_vertex is %s",
m_optional_extensions.vk_ext_provoking_vertex ? "supported" : "NOT supported");
Console.WriteLn("VK_EXT_calibrated_timestamps is %s",
m_optional_extensions.vk_ext_calibrated_timestamps ? "supported" : "NOT supported");
Console.WriteLn("VK_ARM_rasterization_order_attachment_access is %s",
m_optional_extensions.vk_arm_rasterization_order_attachment_access ? "supported" : "NOT supported");
}
bool Context::CreateAllocator()
{
VmaAllocatorCreateInfo ci = {};
ci.vulkanApiVersion = VK_API_VERSION_1_1;
ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
ci.physicalDevice = m_physical_device;
ci.device = m_device;
ci.instance = m_instance;
if (m_optional_extensions.vk_ext_memory_budget)
ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
VkResult res = vmaCreateAllocator(&ci, &m_allocator);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: ");
return false;
}
return true;
}
void Context::DestroyAllocator()
{
if (m_allocator == VK_NULL_HANDLE)
return;
vmaDestroyAllocator(m_allocator);
m_allocator = VK_NULL_HANDLE;
}
bool Context::CreateCommandBuffers()
{
VkResult res;
uint32_t frame_index = 0;
for (FrameResources& resources : m_frame_resources)
{
resources.needs_fence_wait = false;
VkCommandPoolCreateInfo pool_info = {
VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, m_graphics_queue_family_index};
res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: ");
return false;
}
Vulkan::Util::SetObjectName(
g_vulkan_context->GetDevice(), resources.command_pool, "Frame Command Pool %u", frame_index);
VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY,
static_cast<u32>(resources.command_buffers.size())};
res = vkAllocateCommandBuffers(m_device, &buffer_info, resources.command_buffers.data());
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
return false;
}
for (u32 i = 0; i < resources.command_buffers.size(); i++)
{
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.command_buffers[i],
"Frame %u %sCommand Buffer", frame_index, (i == 0) ? "Init" : "");
}
VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT};
res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateFence failed: ");
return false;
}
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.fence, "Frame Fence %u", frame_index);
// TODO: A better way to choose the number of descriptors.
VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME},
{VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME},
{VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME},
};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0,
MAX_DESCRIPTOR_SETS_PER_FRAME, static_cast<u32>(std::size(pool_sizes)), pool_sizes};
res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
return false;
}
Vulkan::Util::SetObjectName(
g_vulkan_context->GetDevice(), resources.descriptor_pool, "Frame Descriptor Pool %u", frame_index);
++frame_index;
}
ActivateCommandBuffer(0);
return true;
}
void Context::DestroyCommandBuffers()
{
for (FrameResources& resources : m_frame_resources)
{
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
if (resources.fence != VK_NULL_HANDLE)
{
vkDestroyFence(m_device, resources.fence, nullptr);
resources.fence = VK_NULL_HANDLE;
}
if (resources.descriptor_pool != VK_NULL_HANDLE)
{
vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr);
resources.descriptor_pool = VK_NULL_HANDLE;
}
if (resources.command_buffers[0] != VK_NULL_HANDLE)
{
vkFreeCommandBuffers(m_device, resources.command_pool,
static_cast<u32>(resources.command_buffers.size()), resources.command_buffers.data());
resources.command_buffers.fill(VK_NULL_HANDLE);
}
if (resources.command_pool != VK_NULL_HANDLE)
{
vkDestroyCommandPool(m_device, resources.command_pool, nullptr);
resources.command_pool = VK_NULL_HANDLE;
}
}
}
bool Context::CreateGlobalDescriptorPool()
{
// TODO: A better way to choose the number of descriptors.
VkDescriptorPoolSize pool_sizes[] = {
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
1024, // TODO: tweak this
static_cast<u32>(std::size(pool_sizes)), pool_sizes};
VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
return false;
}
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_global_descriptor_pool, "Global Descriptor Pool");
if (m_gpu_timing_supported)
{
const VkQueryPoolCreateInfo query_create_info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr,
0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0};
res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: ");
m_gpu_timing_supported = false;
return false;
}
}
return true;
}
void Context::DestroyGlobalDescriptorPool()
{
if (m_timestamp_query_pool != VK_NULL_HANDLE)
{
vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr);
m_timestamp_query_pool = VK_NULL_HANDLE;
}
if (m_global_descriptor_pool != VK_NULL_HANDLE)
{
vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr);
m_global_descriptor_pool = VK_NULL_HANDLE;
}
}
bool Context::CreateTextureStreamBuffer()
{
if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE))
{
Console.Error("Failed to allocate texture upload buffer");
return false;
}
return true;
}
VkCommandBuffer Context::GetCurrentInitCommandBuffer()
{
FrameResources& res = m_frame_resources[m_current_frame];
VkCommandBuffer buf = res.command_buffers[0];
if (res.init_buffer_used)
return buf;
VkCommandBufferBeginInfo bi{
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
vkBeginCommandBuffer(buf, &bi);
res.init_buffer_used = true;
return buf;
}
VkDescriptorSet Context::AllocateDescriptorSet(VkDescriptorSetLayout set_layout)
{
VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout};
VkDescriptorSet descriptor_set;
VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
if (res != VK_SUCCESS)
{
// Failing to allocate a descriptor set is not a fatal error, we can
// recover by moving to the next command buffer.
return VK_NULL_HANDLE;
}
return descriptor_set;
}
VkDescriptorSet Context::AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout)
{
VkDescriptorSetAllocateInfo allocate_info = {
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, m_global_descriptor_pool, 1, &set_layout};
VkDescriptorSet descriptor_set;
VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
if (res != VK_SUCCESS)
return VK_NULL_HANDLE;
return descriptor_set;
}
void Context::FreeGlobalDescriptorSet(VkDescriptorSet set)
{
vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set);
}
void Context::WaitForFenceCounter(u64 fence_counter)
{
if (m_completed_fence_counter >= fence_counter)
return;
// Find the first command buffer which covers this counter value.
u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (index != m_current_frame)
{
if (m_frame_resources[index].fence_counter >= fence_counter)
break;
index = (index + 1) % NUM_COMMAND_BUFFERS;
}
pxAssert(index != m_current_frame);
WaitForCommandBufferCompletion(index);
}
void Context::WaitForGPUIdle()
{
WaitForPresentComplete();
vkDeviceWaitIdle(m_device);
}
float Context::GetAndResetAccumulatedGPUTime()
{
const float time = m_accumulated_gpu_time;
m_accumulated_gpu_time = 0.0f;
return time;
}
bool Context::SetEnableGPUTiming(bool enabled)
{
m_gpu_timing_enabled = enabled && m_gpu_timing_supported;
return (enabled == m_gpu_timing_enabled);
}
void Context::ScanForCommandBufferCompletion()
{
for (u32 check_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; check_index != m_current_frame; check_index = (check_index + 1) % NUM_COMMAND_BUFFERS)
{
FrameResources& resources = m_frame_resources[check_index];
if (resources.fence_counter <= m_completed_fence_counter)
continue; // Already completed
if (vkGetFenceStatus(m_device, resources.fence) != VK_SUCCESS)
break; // Fence not signaled, later fences won't be either
CommandBufferCompleted(check_index);
m_completed_fence_counter = resources.fence_counter;
}
for (SpinResources& resources : m_spin_resources)
{
if (!resources.in_progress)
continue;
if (vkGetFenceStatus(m_device, resources.fence) != VK_SUCCESS)
continue;
SpinCommandCompleted(&resources - &m_spin_resources[0]);
}
}
void Context::WaitForCommandBufferCompletion(u32 index)
{
// Wait for this command buffer to be completed.
VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
// Clean up any resources for command buffers between the last known completed buffer and this
// now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
const u64 now_completed_counter = m_frame_resources[index].fence_counter;
u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
while (cleanup_index != m_current_frame)
{
FrameResources& resources = m_frame_resources[cleanup_index];
if (resources.fence_counter > now_completed_counter)
break;
if (resources.fence_counter > m_completed_fence_counter)
CommandBufferCompleted(cleanup_index);
cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
}
m_completed_fence_counter = now_completed_counter;
}
void Context::SubmitCommandBuffer(SwapChain* present_swap_chain /* = nullptr */, bool submit_on_thread /* = false */)
{
FrameResources& resources = m_frame_resources[m_current_frame];
// End the current command buffer.
VkResult res;
if (resources.init_buffer_used)
{
res = vkEndCommandBuffer(resources.command_buffers[0]);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
pxFailRel("Failed to end command buffer");
}
}
bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
if (wants_timestamp && resources.timestamp_written)
{
vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, m_current_frame * 2 + 1);
}
res = vkEndCommandBuffer(resources.command_buffers[1]);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
pxFailRel("Failed to end command buffer");
}
// This command buffer now has commands, so can't be re-used without waiting.
resources.needs_fence_wait = true;
u32 spin_cycles = 0;
const bool spin_enabled = m_spin_timer;
if (spin_enabled)
{
ScanForCommandBufferCompletion();
auto draw = m_spin_manager.DrawSubmitted(m_command_buffer_render_passes);
u32 constant_offset = 400000 * m_spin_manager.SpinsPerUnitTime(); // 400µs, just to be safe since going over gets really bad
if (m_optional_extensions.vk_ext_calibrated_timestamps)
constant_offset /= 2; // Safety factor isn't as important here, going over just hurts this one submission a bit
u32 minimum_spin = 200000 * m_spin_manager.SpinsPerUnitTime();
u32 maximum_spin = std::max<u32>(1024, 16000000 * m_spin_manager.SpinsPerUnitTime()); // 16ms
if (draw.recommended_spin > minimum_spin + constant_offset)
spin_cycles = std::min(draw.recommended_spin - constant_offset, maximum_spin);
resources.spin_id = draw.id;
}
else
{
resources.spin_id = -1;
}
m_command_buffer_render_passes = 0;
if (present_swap_chain != VK_NULL_HANDLE && m_spinning_supported)
{
m_spin_manager.NextFrame();
if (m_spin_timer)
m_spin_timer--;
// Calibrate a max of once per frame
m_wants_new_timestamp_calibration = m_optional_extensions.vk_ext_calibrated_timestamps;
}
if (spin_cycles != 0)
WaitForSpinCompletion(m_current_frame);
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
if (spin_enabled && m_optional_extensions.vk_ext_calibrated_timestamps)
resources.submit_timestamp = GetCPUTimestamp();
if (!submit_on_thread || !m_present_thread.joinable())
{
DoSubmitCommandBuffer(m_current_frame, present_swap_chain, spin_cycles);
if (present_swap_chain)
DoPresent(present_swap_chain);
return;
}
m_queued_present.command_buffer_index = m_current_frame;
m_queued_present.swap_chain = present_swap_chain;
m_queued_present.spin_cycles = spin_cycles;
m_present_done.store(false);
m_present_queued_cv.notify_one();
}
void Context::DoSubmitCommandBuffer(u32 index, SwapChain* present_swap_chain, u32 spin_cycles)
{
FrameResources& resources = m_frame_resources[index];
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
VkSemaphore semas[2];
VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
submit_info.commandBufferCount = resources.init_buffer_used ? 2u : 1u;
submit_info.pCommandBuffers = resources.init_buffer_used ? resources.command_buffers.data() : &resources.command_buffers[1];
if (present_swap_chain)
{
submit_info.pWaitSemaphores = present_swap_chain->GetImageAvailableSemaphorePtr();
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitDstStageMask = &wait_bits;
if (spin_cycles != 0)
{
semas[0] = present_swap_chain->GetRenderingFinishedSemaphore();
semas[1] = m_spin_resources[index].semaphore;
submit_info.signalSemaphoreCount = 2;
submit_info.pSignalSemaphores = semas;
}
else
{
submit_info.pSignalSemaphores = present_swap_chain->GetRenderingFinishedSemaphorePtr();
submit_info.signalSemaphoreCount = 1;
}
}
else if (spin_cycles != 0)
{
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &m_spin_resources[index].semaphore;
}
VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
pxFailRel("Failed to submit command buffer.");
}
if (spin_cycles != 0)
SubmitSpinCommand(index, spin_cycles);
}
void Context::DoPresent(SwapChain* present_swap_chain)
{
const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr,
1, present_swap_chain->GetRenderingFinishedSemaphorePtr(),
1, present_swap_chain->GetSwapChainPtr(), present_swap_chain->GetCurrentImageIndexPtr(),
nullptr};
present_swap_chain->ReleaseCurrentImage();
VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
if (res != VK_SUCCESS)
{
// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
m_last_present_failed.store(true);
return;
}
// Grab the next image as soon as possible, that way we spend less time blocked on the next
// submission. Don't care if it fails, we'll deal with that at the presentation call site.
// Credit to dxvk for the idea.
present_swap_chain->AcquireNextImage();
}
void Context::WaitForPresentComplete()
{
if (m_present_done.load())
return;
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
}
void Context::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
{
if (m_present_done.load())
return;
m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); });
}
void Context::PresentThread()
{
std::unique_lock<std::mutex> lock(m_present_mutex);
while (!m_present_thread_done.load())
{
m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); });
if (m_present_done.load())
continue;
DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain, m_queued_present.spin_cycles);
if (m_queued_present.swap_chain)
DoPresent(m_queued_present.swap_chain);
m_present_done.store(true);
m_present_done_cv.notify_one();
}
}
void Context::StartPresentThread()
{
pxAssert(!m_present_thread.joinable());
m_present_thread_done.store(false);
m_present_thread = std::thread(&Context::PresentThread, this);
}
void Context::StopPresentThread()
{
if (!m_present_thread.joinable())
return;
{
std::unique_lock<std::mutex> lock(m_present_mutex);
WaitForPresentComplete(lock);
m_present_thread_done.store(true);
m_present_queued_cv.notify_one();
}
m_present_thread.join();
}
void Context::CommandBufferCompleted(u32 index)
{
FrameResources& resources = m_frame_resources[index];
for (auto& it : resources.cleanup_resources)
it();
resources.cleanup_resources.clear();
bool wants_timestamps = m_gpu_timing_enabled || resources.spin_id >= 0;
if (wants_timestamps && resources.timestamp_written)
{
std::array<u64, 2> timestamps;
VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast<u32>(timestamps.size()),
sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
if (res == VK_SUCCESS)
{
// if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be zero
if (timestamps[0] > 0 && m_gpu_timing_enabled)
{
const double ns_diff = (timestamps[1] - timestamps[0]) * static_cast<double>(m_device_properties.limits.timestampPeriod);
m_accumulated_gpu_time += ns_diff / 1000000.0;
}
if (resources.spin_id >= 0)
{
if (m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[1] > 0)
{
u64 end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
m_spin_manager.DrawCompleted(resources.spin_id, resources.submit_timestamp, end);
}
else if (!m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[0] > 0)
{
u64 begin = timestamps[0] * m_spin_timestamp_scale;
u64 end = timestamps[1] * m_spin_timestamp_scale;
m_spin_manager.DrawCompleted(resources.spin_id, begin, end);
}
}
}
else
{
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
}
}
}
void Context::MoveToNextCommandBuffer() { ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); }
void Context::ActivateCommandBuffer(u32 index)
{
FrameResources& resources = m_frame_resources[index];
if (!m_present_done.load() && m_queued_present.command_buffer_index == index)
WaitForPresentComplete();
// Wait for the GPU to finish with all resources for this command buffer.
if (resources.fence_counter > m_completed_fence_counter)
WaitForCommandBufferCompletion(index);
// Reset fence to unsignaled before starting.
VkResult res = vkResetFences(m_device, 1, &resources.fence);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkResetFences failed: ");
// Reset command pools to beginning since we can re-use the memory now
res = vkResetCommandPool(m_device, resources.command_pool, 0);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");
// Enable commands to be recorded to the two buffers again.
VkCommandBufferBeginInfo begin_info = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
res = vkBeginCommandBuffer(resources.command_buffers[1], &begin_info);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");
// Also can do the same for the descriptor pools
res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: ");
bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
if (wants_timestamp)
{
vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2);
vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, index * 2);
}
resources.fence_counter = m_next_fence_counter++;
resources.init_buffer_used = false;
resources.timestamp_written = wants_timestamp;
m_current_frame = index;
m_current_command_buffer = resources.command_buffers[1];
// using the lower 32 bits of the fence index should be sufficient here, I hope...
vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter));
}
void Context::ExecuteCommandBuffer(WaitType wait_for_completion)
{
// If we're waiting for completion, don't bother waking the worker thread.
const u32 current_frame = m_current_frame;
SubmitCommandBuffer();
MoveToNextCommandBuffer();
if (wait_for_completion != WaitType::None)
{
// Calibrate while we wait
if (m_wants_new_timestamp_calibration)
CalibrateSpinTimestamp();
if (wait_for_completion == WaitType::Spin)
{
while (vkGetFenceStatus(m_device, m_frame_resources[current_frame].fence) == VK_NOT_READY)
ShortSpin();
}
WaitForCommandBufferCompletion(current_frame);
}
}
bool Context::CheckLastPresentFail()
{
bool res = m_last_present_failed;
m_last_present_failed = false;
return res;
}
void Context::DeferBufferDestruction(VkBuffer object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkDestroyBuffer(m_device, object, nullptr); });
}
void Context::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back(
[this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); });
}
void Context::DeferBufferViewDestruction(VkBufferView object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkDestroyBufferView(m_device, object, nullptr); });
}
void Context::DeferDeviceMemoryDestruction(VkDeviceMemory object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkFreeMemory(m_device, object, nullptr); });
}
void Context::DeferFramebufferDestruction(VkFramebuffer object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); });
}
void Context::DeferImageDestruction(VkImage object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkDestroyImage(m_device, object, nullptr); });
}
void Context::DeferImageDestruction(VkImage object, VmaAllocation allocation)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back(
[this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); });
}
void Context::DeferImageViewDestruction(VkImageView object)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, object]() { vkDestroyImageView(m_device, object, nullptr); });
}
void Context::DeferPipelineDestruction(VkPipeline pipeline)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, pipeline]() { vkDestroyPipeline(m_device, pipeline, nullptr); });
}
void Context::DeferSamplerDestruction(VkSampler sampler)
{
FrameResources& resources = m_frame_resources[m_current_frame];
resources.cleanup_resources.push_back([this, sampler]() { vkDestroySampler(m_device, sampler, nullptr); });
}
VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
void* pUserData)
{
if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
{
Console.Error("Vulkan debug report: (%s) %s",
pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
}
else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT))
{
Console.Warning("Vulkan debug report: (%s) %s",
pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
}
else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT)
{
Console.WriteLn("Vulkan debug report: (%s) %s",
pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
}
else
{
DevCon.WriteLn("Vulkan debug report: (%s) %s",
pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
}
return VK_FALSE;
}
bool Context::EnableDebugUtils()
{
// Already enabled?
if (m_debug_messenger_callback != VK_NULL_HANDLE)
return true;
// Check for presence of the functions before calling
if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT)
{
return false;
}
VkDebugUtilsMessengerCreateInfoEXT messenger_info = {VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
nullptr, 0,
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
DebugMessengerCallback, nullptr};
VkResult res =
vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: ");
return false;
}
return true;
}
void Context::DisableDebugUtils()
{
if (m_debug_messenger_callback != VK_NULL_HANDLE)
{
vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr);
m_debug_messenger_callback = VK_NULL_HANDLE;
}
}
VkRenderPass Context::CreateCachedRenderPass(RenderPassCacheKey key)
{
VkAttachmentReference color_reference;
VkAttachmentReference* color_reference_ptr = nullptr;
VkAttachmentReference depth_reference;
VkAttachmentReference* depth_reference_ptr = nullptr;
VkAttachmentReference input_reference;
VkAttachmentReference* input_reference_ptr = nullptr;
VkSubpassDependency subpass_dependency;
VkSubpassDependency* subpass_dependency_ptr = nullptr;
std::array<VkAttachmentDescription, 2> attachments;
u32 num_attachments = 0;
if (key.color_format != VK_FORMAT_UNDEFINED)
{
attachments[num_attachments] = {0, static_cast<VkFormat>(key.color_format), VK_SAMPLE_COUNT_1_BIT,
static_cast<VkAttachmentLoadOp>(key.color_load_op),
static_cast<VkAttachmentStoreOp>(key.color_store_op), VK_ATTACHMENT_LOAD_OP_DONT_CARE,
VK_ATTACHMENT_STORE_OP_DONT_CARE,
key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
color_reference.attachment = num_attachments;
color_reference.layout =
key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
color_reference_ptr = &color_reference;
if (key.color_feedback_loop)
{
input_reference.attachment = num_attachments;
input_reference.layout = VK_IMAGE_LAYOUT_GENERAL;
input_reference_ptr = &input_reference;
if (!g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access)
{
// don't need the framebuffer-local dependency when we have rasterization order attachment access
subpass_dependency.srcSubpass = 0;
subpass_dependency.dstSubpass = 0;
subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
subpass_dependency.srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
subpass_dependency_ptr = &subpass_dependency;
}
}
num_attachments++;
}
if (key.depth_format != VK_FORMAT_UNDEFINED)
{
attachments[num_attachments] = {0, static_cast<VkFormat>(key.depth_format), VK_SAMPLE_COUNT_1_BIT,
static_cast<VkAttachmentLoadOp>(key.depth_load_op),
static_cast<VkAttachmentStoreOp>(key.depth_store_op),
static_cast<VkAttachmentLoadOp>(key.stencil_load_op),
static_cast<VkAttachmentStoreOp>(key.stencil_store_op),
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
depth_reference.attachment = num_attachments;
depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
depth_reference_ptr = &depth_reference;
num_attachments++;
}
const VkSubpassDescriptionFlags subpass_flags =
(key.color_feedback_loop && g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access) ? VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM : 0;
const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, input_reference_ptr ? 1u : 0u,
input_reference_ptr ? input_reference_ptr : nullptr, color_reference_ptr ? 1u : 0u,
color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u,
num_attachments, attachments.data(), 1u, &subpass, subpass_dependency_ptr ? 1u : 0u,
subpass_dependency_ptr};
VkRenderPass pass;
VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: ");
return VK_NULL_HANDLE;
}
m_render_pass_cache.emplace(key.key, pass);
return pass;
}
void Context::DestroyRenderPassCache()
{
for (auto& it : m_render_pass_cache)
vkDestroyRenderPass(m_device, it.second, nullptr);
m_render_pass_cache.clear();
}
static constexpr std::string_view SPIN_SHADER = R"(
#version 460 core
layout(std430, set=0, binding=0) buffer SpinBuffer { uint spin[]; };
layout(push_constant) uniform constants { uint cycles; };
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint value = spin[0];
// The compiler doesn't know, but spin[0] == 0, so this loop won't actually go anywhere
for (uint i = 0; i < cycles; i++)
value = spin[value];
// Store the result back to the buffer so the compiler can't optimize it away
spin[0] = value;
}
)";
bool Context::InitSpinResources()
{
if (!m_spinning_supported)
return true;
auto spirv = ShaderCompiler::CompileComputeShader(SPIN_SHADER);
if (!spirv.has_value())
return false;
VkResult res;
#define CHECKED_CREATE(create_fn, create_struct, output_struct) \
do { \
if ((res = create_fn(m_device, create_struct, nullptr, output_struct)) != VK_SUCCESS) \
{ \
LOG_VULKAN_ERROR(res, #create_fn " failed: "); \
return false; \
} \
} while (0)
VkDescriptorSetLayoutBinding set_layout_binding = {};
set_layout_binding.binding = 0;
set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
set_layout_binding.descriptorCount = 1;
set_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo desc_set_layout_create = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
desc_set_layout_create.bindingCount = 1;
desc_set_layout_create.pBindings = &set_layout_binding;
CHECKED_CREATE(vkCreateDescriptorSetLayout, &desc_set_layout_create, &m_spin_descriptor_set_layout);
const VkPushConstantRange push_constant_range = { VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32) };
VkPipelineLayoutCreateInfo pl_layout_create = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
pl_layout_create.setLayoutCount = 1;
pl_layout_create.pSetLayouts = &m_spin_descriptor_set_layout;
pl_layout_create.pushConstantRangeCount = 1;
pl_layout_create.pPushConstantRanges = &push_constant_range;
CHECKED_CREATE(vkCreatePipelineLayout, &pl_layout_create, &m_spin_pipeline_layout);
VkShaderModuleCreateInfo module_create = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO };
module_create.codeSize = spirv->size() * sizeof(ShaderCompiler::SPIRVCodeType);
module_create.pCode = spirv->data();
VkShaderModule shader_module;
CHECKED_CREATE(vkCreateShaderModule, &module_create, &shader_module);
Util::SetObjectName(m_device, shader_module, "Spin Shader");
VkComputePipelineCreateInfo pl_create = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
pl_create.layout = m_spin_pipeline_layout;
pl_create.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
pl_create.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
pl_create.stage.pName = "main";
pl_create.stage.module = shader_module;
res = vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &pl_create, nullptr, &m_spin_pipeline);
vkDestroyShaderModule(m_device, shader_module, nullptr);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
return false;
}
Util::SetObjectName(m_device, m_spin_pipeline, "Spin Pipeline");
VmaAllocationCreateInfo buf_vma_create = {};
buf_vma_create.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VkBufferCreateInfo buf_create = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
buf_create.size = 4;
buf_create.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
if ((res = vmaCreateBuffer(m_allocator, &buf_create, &buf_vma_create, &m_spin_buffer, &m_spin_buffer_allocation, nullptr)) != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer failed: ");
return false;
}
Util::SetObjectName(m_device, m_spin_buffer, "Spin Buffer");
VkDescriptorSetAllocateInfo desc_set_allocate = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
desc_set_allocate.descriptorPool = m_global_descriptor_pool;
desc_set_allocate.descriptorSetCount = 1;
desc_set_allocate.pSetLayouts = &m_spin_descriptor_set_layout;
if ((res = vkAllocateDescriptorSets(m_device, &desc_set_allocate, &m_spin_descriptor_set)) != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkAllocateDescriptorSets failed: ");
return false;
}
const VkDescriptorBufferInfo desc_buffer_info = { m_spin_buffer, 0, VK_WHOLE_SIZE };
VkWriteDescriptorSet desc_set_write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
desc_set_write.dstSet = m_spin_descriptor_set;
desc_set_write.dstBinding = 0;
desc_set_write.descriptorCount = 1;
desc_set_write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
desc_set_write.pBufferInfo = &desc_buffer_info;
vkUpdateDescriptorSets(m_device, 1, &desc_set_write, 0, nullptr);
for (SpinResources& resources : m_spin_resources)
{
u32 index = &resources - &m_spin_resources[0];
VkCommandPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
pool_info.queueFamilyIndex = m_spin_queue_family_index;
CHECKED_CREATE(vkCreateCommandPool, &pool_info, &resources.command_pool);
Vulkan::Util::SetObjectName(m_device, resources.command_pool, "Spin Command Pool %u", index);
VkCommandBufferAllocateInfo buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
buffer_info.commandPool = resources.command_pool;
buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
buffer_info.commandBufferCount = 1;
res = vkAllocateCommandBuffers(m_device, &buffer_info, &resources.command_buffer);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
return false;
}
Vulkan::Util::SetObjectName(m_device, resources.command_buffer, "Spin Command Buffer %u", index);
VkFenceCreateInfo fence_info = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
CHECKED_CREATE(vkCreateFence, &fence_info, &resources.fence);
Vulkan::Util::SetObjectName(m_device, resources.fence, "Spin Fence %u", index);
if (!m_spin_queue_is_graphics_queue)
{
VkSemaphoreCreateInfo sem_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
CHECKED_CREATE(vkCreateSemaphore, &sem_info, &resources.semaphore);
Vulkan::Util::SetObjectName(m_device, resources.semaphore, "Draw to Spin Semaphore %u", index);
}
}
#undef CHECKED_CREATE
return true;
}
void Context::DestroySpinResources()
{
#define CHECKED_DESTROY(destructor, obj) \
do { \
if (obj != VK_NULL_HANDLE) \
{ \
destructor(m_device, obj, nullptr); \
obj = VK_NULL_HANDLE; \
} \
} while (0)
if (m_spin_buffer)
{
vmaDestroyBuffer(m_allocator, m_spin_buffer, m_spin_buffer_allocation);
m_spin_buffer = VK_NULL_HANDLE;
m_spin_buffer_allocation = VK_NULL_HANDLE;
}
CHECKED_DESTROY(vkDestroyPipeline, m_spin_pipeline);
CHECKED_DESTROY(vkDestroyPipelineLayout, m_spin_pipeline_layout);
CHECKED_DESTROY(vkDestroyDescriptorSetLayout, m_spin_descriptor_set_layout);
if (m_spin_descriptor_set != VK_NULL_HANDLE)
{
vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &m_spin_descriptor_set);
m_spin_descriptor_set = VK_NULL_HANDLE;
}
for (SpinResources& resources : m_spin_resources)
{
CHECKED_DESTROY(vkDestroySemaphore, resources.semaphore);
CHECKED_DESTROY(vkDestroyFence, resources.fence);
if (resources.command_buffer != VK_NULL_HANDLE)
{
vkFreeCommandBuffers(m_device, resources.command_pool, 1, &resources.command_buffer);
resources.command_buffer = VK_NULL_HANDLE;
}
CHECKED_DESTROY(vkDestroyCommandPool, resources.command_pool);
}
#undef CHECKED_DESTROY
}
void Context::WaitForSpinCompletion(u32 index)
{
SpinResources& resources = m_spin_resources[index];
if (!resources.in_progress)
return;
VkResult res = vkWaitForFences(m_device, 1, &resources.fence, VK_TRUE, UINT64_MAX);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
SpinCommandCompleted(index);
}
void Context::SpinCommandCompleted(u32 index)
{
SpinResources& resources = m_spin_resources[index];
resources.in_progress = false;
const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
std::array<u64, 2> timestamps;
VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, timestamp_base, static_cast<u32>(timestamps.size()),
sizeof(timestamps), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
if (res == VK_SUCCESS)
{
u64 begin, end;
if (m_optional_extensions.vk_ext_calibrated_timestamps)
{
begin = timestamps[0] * m_spin_timestamp_scale + m_spin_timestamp_offset;
end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
}
else
{
begin = timestamps[0] * m_spin_timestamp_scale;
end = timestamps[1] * m_spin_timestamp_scale;
}
m_spin_manager.SpinCompleted(resources.cycles, begin, end);
}
else
{
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
}
}
void Context::SubmitSpinCommand(u32 index, u32 cycles)
{
SpinResources& resources = m_spin_resources[index];
VkResult res;
// Reset fence to unsignaled before starting.
if ((res = vkResetFences(m_device, 1, &resources.fence)) != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkResetFences failed: ");
// Reset command pools to beginning since we can re-use the memory now
if ((res = vkResetCommandPool(m_device, resources.command_pool, 0)) != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");
// Enable commands to be recorded to the two buffers again.
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
if ((res = vkBeginCommandBuffer(resources.command_buffer, &begin_info)) != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");
if (!m_spin_buffer_initialized)
{
m_spin_buffer_initialized = true;
vkCmdFillBuffer(resources.command_buffer, m_spin_buffer, 0, VK_WHOLE_SIZE, 0);
VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.srcQueueFamilyIndex = m_spin_queue_family_index;
barrier.dstQueueFamilyIndex = m_spin_queue_family_index;
barrier.buffer = m_spin_buffer;
barrier.offset = 0;
barrier.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
}
if (m_spin_queue_is_graphics_queue)
vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
vkCmdResetQueryPool(resources.command_buffer, m_timestamp_query_pool, timestamp_base, 2);
vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, timestamp_base);
vkCmdPushConstants(resources.command_buffer, m_spin_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32), &cycles);
vkCmdBindPipeline(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline);
vkCmdBindDescriptorSets(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline_layout, 0, 1, &m_spin_descriptor_set, 0, nullptr);
vkCmdDispatch(resources.command_buffer, 1, 1, 1);
vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, m_timestamp_query_pool, timestamp_base + 1);
if ((res = vkEndCommandBuffer(resources.command_buffer)) != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &resources.command_buffer;
VkPipelineStageFlags sema_waits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
if (!m_spin_queue_is_graphics_queue)
{
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &resources.semaphore;
submit_info.pWaitDstStageMask = sema_waits;
}
vkQueueSubmit(m_spin_queue, 1, &submit_info, resources.fence);
resources.in_progress = true;
resources.cycles = cycles;
}
void Context::NotifyOfReadback()
{
if (!m_spinning_supported)
return;
m_spin_timer = 30;
m_spin_manager.ReadbackRequested();
}
void Context::CalibrateSpinTimestamp()
{
if (!m_optional_extensions.vk_ext_calibrated_timestamps)
return;
VkCalibratedTimestampInfoEXT infos[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_calibrated_timestamp_type },
};
u64 timestamps[2];
u64 maxDeviation;
constexpr u64 MAX_MAX_DEVIATION = 100000; // 100µs
for (int i = 0; i < 4; i++) // 4 tries to get under MAX_MAX_DEVIATION
{
VkResult res = vkGetCalibratedTimestampsEXT(m_device, std::size(infos), infos, timestamps, &maxDeviation);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkGetCalibratedTimestampsEXT failed: ");
return;
}
if (maxDeviation < MAX_MAX_DEVIATION)
break;
}
if (maxDeviation >= MAX_MAX_DEVIATION)
Console.Warning("vkGetCalibratedTimestampsEXT returned high max deviation of %lluµs", maxDeviation / 1000);
const double gpu_time = timestamps[0] * m_spin_timestamp_scale;
#ifdef _WIN32
const double cpu_time = timestamps[1] * m_queryperfcounter_to_ns;
#else
const double cpu_time = timestamps[1];
#endif
m_spin_timestamp_offset = cpu_time - gpu_time;
}
u64 Context::GetCPUTimestamp()
{
#ifdef _WIN32
LARGE_INTEGER value = {};
QueryPerformanceCounter(&value);
return static_cast<u64>(static_cast<double>(value.QuadPart) * m_queryperfcounter_to_ns);
#else
#ifdef CLOCK_MONOTONIC_RAW
const bool use_raw = m_calibrated_timestamp_type == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
const clockid_t clock = use_raw ? CLOCK_MONOTONIC_RAW : CLOCK_MONOTONIC;
#else
const clockid_t clock = CLOCK_MONOTONIC;
#endif
timespec ts = {};
clock_gettime(clock, &ts);
return static_cast<u64>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
}
} // namespace Vulkan