/*  PCSX2 - PS2 Emulator for PCs
 *  Copyright (C) 2002-2021  PCSX2 Dev Team
 *
 *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
 *  of the GNU Lesser General Public License as published by the Free Software Found-
 *  ation, either version 3 of the License, or (at your option) any later version.
 *
 *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 *  PURPOSE.  See the GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along with PCSX2.
 *  If not, see <http://www.gnu.org/licenses/>.
 */

#include "common/Vulkan/Context.h"
#include "common/Align.h"
#include "common/Assertions.h"
#include "common/Console.h"
#include "common/General.h"
#include "common/StringUtil.h"
#include "common/Vulkan/ShaderCompiler.h"
#include "common/Vulkan/SwapChain.h"
#include "common/Vulkan/Util.h"
#include <algorithm>
#include <array>
#include <cstring>

#ifdef _WIN32
#include "common/RedtapeWindows.h"
#else
#include <time.h>
#endif

std::unique_ptr<Vulkan::Context> g_vulkan_context;

// Tweakables
enum : u32
{
	MAX_DRAW_CALLS_PER_FRAME = 8192,
	MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = 2 * MAX_DRAW_CALLS_PER_FRAME,
	MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, // assume at least half our draws aren't going to be shuffle/blending
	MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME = 4, // Currently used by CAS only
	MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
	MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME * 2
};

namespace Vulkan
{
	Context::Context(VkInstance instance, VkPhysicalDevice physical_device)
		: m_instance(instance)
		, m_physical_device(physical_device)
	{
		// Read device physical memory properties, we need it for allocating buffers
		vkGetPhysicalDeviceProperties(physical_device, &m_device_properties);
		vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties);

		// We need this to be at least 32 byte aligned for AVX2 stores.
		m_device_properties.limits.minUniformBufferOffsetAlignment =
			std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
		m_device_properties.limits.minTexelBufferOffsetAlignment =
			std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
		m_device_properties.limits.optimalBufferCopyOffsetAlignment =
			std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(32));
		m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
			Common::NextPow2(std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(32)));
		m_device_properties.limits.bufferImageGranularity =
			std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(32));
	}

	Context::~Context() = default;

	VkInstance Context::CreateVulkanInstance(
		const WindowInfo* wi, bool enable_debug_utils, bool enable_validation_layer)
	{
		ExtensionList enabled_extensions;
		if (!SelectInstanceExtensions(&enabled_extensions, wi, enable_debug_utils))
			return VK_NULL_HANDLE;

		// Remember to manually update this every release. We don't pull in svnrev.h here, because
		// it's only the major/minor version, and rebuilding the file every time something else changes
		// is unnecessary.
		VkApplicationInfo app_info = {};
		app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
		app_info.pNext = nullptr;
		app_info.pApplicationName = "PCSX2";
		app_info.applicationVersion = VK_MAKE_VERSION(1, 7, 0);
		app_info.pEngineName = "PCSX2";
		app_info.engineVersion = VK_MAKE_VERSION(1, 7, 0);
		app_info.apiVersion = VK_API_VERSION_1_1;

		VkInstanceCreateInfo instance_create_info = {};
		instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
		instance_create_info.pNext = nullptr;
		instance_create_info.flags = 0;
		instance_create_info.pApplicationInfo = &app_info;
		instance_create_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
		instance_create_info.ppEnabledExtensionNames = enabled_extensions.data();
		instance_create_info.enabledLayerCount = 0;
		instance_create_info.ppEnabledLayerNames = nullptr;

		// Enable debug layer on debug builds
		if (enable_validation_layer)
		{
			static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"};
			instance_create_info.enabledLayerCount = 1;
			instance_create_info.ppEnabledLayerNames = layer_names;
		}

		VkInstance instance;
		VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateInstance failed: ");
			return nullptr;
		}

		return instance;
	}

	bool Context::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo* wi, bool enable_debug_utils)
	{
		u32 extension_count = 0;
		VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: ");
			return false;
		}

		if (extension_count == 0)
		{
			Console.Error("Vulkan: No extensions supported by instance.");
			return false;
		}

		std::vector<VkExtensionProperties> available_extension_list(extension_count);
		res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data());
		pxAssert(res == VK_SUCCESS);

		auto SupportsExtension = [&](const char* name, bool required) {
			if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
					[&](const VkExtensionProperties& properties) { return !strcmp(name, properties.extensionName); }) !=
				available_extension_list.end())
			{
				DevCon.WriteLn("Enabling extension: %s", name);
				extension_list->push_back(name);
				return true;
			}

			if (required)
				Console.Error("Vulkan: Missing required extension %s.", name);

			return false;
		};

		// Common extensions
		if (wi && wi->type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true))
			return false;

#if defined(VK_USE_PLATFORM_WIN32_KHR)
		if (wi && wi->type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true))
			return false;
#endif
#if defined(VK_USE_PLATFORM_XLIB_KHR)
		if (wi && wi->type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true))
			return false;
#endif
#if defined(VK_USE_PLATFORM_WAYLAND_KHR)
		if (wi && wi->type == WindowInfo::Type::Wayland &&
			!SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true))
			return false;
#endif
#if defined(VK_USE_PLATFORM_ANDROID_KHR)
		if (wi && wi->type == WindowInfo::Type::Android &&
			!SupportsExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true))
			return false;
#endif
#if defined(VK_USE_PLATFORM_METAL_EXT)
		if (wi && wi->type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true))
			return false;
#endif

#if 0
	if (wi && wi->type == WindowInfo::Type::Display && !SupportsExtension(VK_KHR_DISPLAY_EXTENSION_NAME, true))
		return false;
#endif

		// VK_EXT_debug_utils
		if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false))
			Console.Warning("Vulkan: Debug report requested, but extension is not available.");

		return true;
	}

	Context::GPUList Context::EnumerateGPUs(VkInstance instance)
	{
		u32 gpu_count = 0;
		VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr);
		if (res != VK_SUCCESS || gpu_count == 0)
		{
			LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
			return {};
		}

		GPUList gpus;
		gpus.resize(gpu_count);

		res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data());
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
			return {};
		}

		return gpus;
	}

	Context::GPUNameList Context::EnumerateGPUNames(VkInstance instance)
	{
		u32 gpu_count = 0;
		VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr);
		if (res != VK_SUCCESS || gpu_count == 0)
		{
			LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
			return {};
		}

		GPUList gpus;
		gpus.resize(gpu_count);

		res = vkEnumeratePhysicalDevices(instance, &gpu_count, gpus.data());
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices failed: ");
			return {};
		}

		GPUNameList gpu_names;
		gpu_names.reserve(gpu_count);
		for (u32 i = 0; i < gpu_count; i++)
		{
			VkPhysicalDeviceProperties props = {};
			vkGetPhysicalDeviceProperties(gpus[i], &props);

			std::string gpu_name(props.deviceName);

			// handle duplicate adapter names
			if (std::any_of(gpu_names.begin(), gpu_names.end(),
					[&gpu_name](const std::string& other) { return (gpu_name == other); }))
			{
				std::string original_adapter_name = std::move(gpu_name);

				u32 current_extra = 2;
				do
				{
					gpu_name = StringUtil::StdStringFromFormat("%s (%u)", original_adapter_name.c_str(), current_extra);
					current_extra++;
				} while (std::any_of(gpu_names.begin(), gpu_names.end(),
					[&gpu_name](const std::string& other) { return (gpu_name == other); }));
			}

			gpu_names.push_back(std::move(gpu_name));
		}

		return gpu_names;
	}

	bool Context::Create(std::string_view gpu_name, const WindowInfo* wi, std::unique_ptr<SwapChain>* out_swap_chain,
		VkPresentModeKHR preferred_present_mode, bool threaded_presentation, bool enable_debug_utils,
		bool enable_validation_layer)
	{
		pxAssertMsg(!g_vulkan_context, "Has no current context");

		if (!Vulkan::LoadVulkanLibrary())
		{
			Console.Error("Failed to load Vulkan library");
			return false;
		}

		const bool enable_surface = (wi && wi->type != WindowInfo::Type::Surfaceless);
		VkInstance instance = CreateVulkanInstance(wi, enable_debug_utils, enable_validation_layer);
		if (instance == VK_NULL_HANDLE)
		{
			if (enable_debug_utils || enable_validation_layer)
			{
				// Try again without the validation layer.
				enable_debug_utils = false;
				enable_validation_layer = false;
				instance = CreateVulkanInstance(wi, enable_debug_utils, enable_validation_layer);
				if (instance == VK_NULL_HANDLE)
				{
					Vulkan::UnloadVulkanLibrary();
					return false;
				}

				Console.Error("Vulkan validation/debug layers requested but are unavailable. Creating non-debug device.");
			}
		}

		if (!Vulkan::LoadVulkanInstanceFunctions(instance))
		{
			Console.Error("Failed to load Vulkan instance functions");
			vkDestroyInstance(instance, nullptr);
			Vulkan::UnloadVulkanLibrary();
			return false;
		}

		GPUList gpus = EnumerateGPUs(instance);
		if (gpus.empty())
		{
			vkDestroyInstance(instance, nullptr);
			Vulkan::UnloadVulkanLibrary();
			return false;
		}

		u32 gpu_index = 0;
		GPUNameList gpu_names = EnumerateGPUNames(instance);
		if (!gpu_name.empty())
		{
			for (; gpu_index < static_cast<u32>(gpu_names.size()); gpu_index++)
			{
				Console.WriteLn("GPU %u: %s", static_cast<u32>(gpu_index), gpu_names[gpu_index].c_str());
				if (gpu_names[gpu_index] == gpu_name)
					break;
			}

			if (gpu_index == static_cast<u32>(gpu_names.size()))
			{
				Console.Warning("Requested GPU '%s' not found, using first (%s)", std::string(gpu_name).c_str(),
					gpu_names[0].c_str());
				gpu_index = 0;
			}
		}
		else
		{
			Console.WriteLn("No GPU requested, using first (%s)", gpu_names[0].c_str());
		}

		VkSurfaceKHR surface = VK_NULL_HANDLE;
		WindowInfo wi_copy;
		if (wi)
			wi_copy = *wi;

		if (enable_surface &&
			(surface = SwapChain::CreateVulkanSurface(instance, gpus[gpu_index], &wi_copy)) == VK_NULL_HANDLE)
		{
			vkDestroyInstance(instance, nullptr);
			Vulkan::UnloadVulkanLibrary();
			return false;
		}

		g_vulkan_context.reset(new Context(instance, gpus[gpu_index]));

		if (enable_debug_utils)
			g_vulkan_context->EnableDebugUtils();

		// Attempt to create the device.
		if (!g_vulkan_context->CreateDevice(surface, enable_validation_layer, nullptr, 0, nullptr, 0, nullptr) ||
			!g_vulkan_context->CreateAllocator() || !g_vulkan_context->CreateGlobalDescriptorPool() ||
			!g_vulkan_context->CreateCommandBuffers() || !g_vulkan_context->CreateTextureStreamBuffer() ||
			!g_vulkan_context->InitSpinResources() ||
			(enable_surface && (*out_swap_chain = SwapChain::Create(wi_copy, surface, preferred_present_mode)) == nullptr))
		{
			// Since we are destroying the instance, we're also responsible for destroying the surface.
			if (surface != VK_NULL_HANDLE)
				vkDestroySurfaceKHR(instance, surface, nullptr);

			g_vulkan_context.reset();
			return false;
		}

		if (threaded_presentation)
			g_vulkan_context->StartPresentThread();

		return true;
	}

	void Context::Destroy()
	{
		pxAssertMsg(g_vulkan_context, "Has context");

		g_vulkan_context->StopPresentThread();

		if (g_vulkan_context->m_device != VK_NULL_HANDLE)
			g_vulkan_context->WaitForGPUIdle();

		g_vulkan_context->m_texture_upload_buffer.Destroy(false);

		g_vulkan_context->DestroySpinResources();
		g_vulkan_context->DestroyRenderPassCache();
		g_vulkan_context->DestroyGlobalDescriptorPool();
		g_vulkan_context->DestroyCommandBuffers();
		g_vulkan_context->DestroyAllocator();

		if (g_vulkan_context->m_device != VK_NULL_HANDLE)
			vkDestroyDevice(g_vulkan_context->m_device, nullptr);

		if (g_vulkan_context->m_debug_messenger_callback != VK_NULL_HANDLE)
			g_vulkan_context->DisableDebugUtils();

		if (g_vulkan_context->m_instance != VK_NULL_HANDLE)
			vkDestroyInstance(g_vulkan_context->m_instance, nullptr);

		Vulkan::UnloadVulkanLibrary();

		g_vulkan_context.reset();
	}

	bool Context::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface)
	{
		u32 extension_count = 0;
		VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: ");
			return false;
		}

		if (extension_count == 0)
		{
			Console.Error("Vulkan: No extensions supported by device.");
			return false;
		}

		std::vector<VkExtensionProperties> available_extension_list(extension_count);
		res = vkEnumerateDeviceExtensionProperties(
			m_physical_device, nullptr, &extension_count, available_extension_list.data());
		pxAssert(res == VK_SUCCESS);

		auto SupportsExtension = [&](const char* name, bool required) {
			if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
					[&](const VkExtensionProperties& properties) { return !strcmp(name, properties.extensionName); }) !=
				available_extension_list.end())
			{
				if (std::none_of(extension_list->begin(), extension_list->end(),
						[&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); }))
				{
					DevCon.WriteLn("Enabling extension: %s", name);
					extension_list->push_back(name);
				}

				return true;
			}

			if (required)
				Console.Error("Vulkan: Missing required extension %s.", name);

			return false;
		};

		if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true))
			return false;

		m_optional_extensions.vk_ext_provoking_vertex =
			SupportsExtension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
		m_optional_extensions.vk_ext_memory_budget =
			SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false);
		m_optional_extensions.vk_ext_calibrated_timestamps =
			SupportsExtension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, false);
		m_optional_extensions.vk_ext_line_rasterization =
			SupportsExtension(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
		m_optional_extensions.vk_khr_driver_properties =
			SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false);
		m_optional_extensions.vk_arm_rasterization_order_attachment_access =
			SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
		m_optional_extensions.vk_khr_fragment_shader_barycentric =
			SupportsExtension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, false);

		return true;
	}

	bool Context::SelectDeviceFeatures(const VkPhysicalDeviceFeatures* required_features)
	{
		VkPhysicalDeviceFeatures available_features;
		vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);

		if (required_features)
			std::memcpy(&m_device_features, required_features, sizeof(m_device_features));

		// Enable the features we use.
		m_device_features.dualSrcBlend = available_features.dualSrcBlend;
		m_device_features.geometryShader = available_features.geometryShader;
		m_device_features.largePoints = available_features.largePoints;
		m_device_features.wideLines = available_features.wideLines;
		m_device_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
		m_device_features.textureCompressionBC = available_features.textureCompressionBC;
		m_device_features.samplerAnisotropy = available_features.samplerAnisotropy;

		return true;
	}

	bool Context::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer,
		const char** required_device_extensions, u32 num_required_device_extensions,
		const char** required_device_layers, u32 num_required_device_layers,
		const VkPhysicalDeviceFeatures* required_features)
	{
		u32 queue_family_count;
		vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr);
		if (queue_family_count == 0)
		{
			Console.Error("No queue families found on specified vulkan physical device.");
			return false;
		}

		std::vector<VkQueueFamilyProperties> queue_family_properties(queue_family_count);
		vkGetPhysicalDeviceQueueFamilyProperties(
			m_physical_device, &queue_family_count, queue_family_properties.data());
		Console.WriteLn("%u vulkan queue families", queue_family_count);

		// Find graphics and present queues.
		m_graphics_queue_family_index = queue_family_count;
		m_present_queue_family_index = queue_family_count;
		m_spin_queue_family_index = queue_family_count;
		u32 spin_queue_index = 0;
		for (uint32_t i = 0; i < queue_family_count; i++)
		{
			VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
			if (graphics_supported)
			{
				m_graphics_queue_family_index = i;
				// Quit now, no need for a present queue.
				if (!surface)
				{
					break;
				}
			}

			if (surface)
			{
				VkBool32 present_supported;
				VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported);
				if (res != VK_SUCCESS)
				{
					LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
					return false;
				}

				if (present_supported)
				{
					m_present_queue_family_index = i;
				}

				// Prefer one queue family index that does both graphics and present.
				if (graphics_supported && present_supported)
				{
					break;
				}
			}
		}
		for (uint32_t i = 0; i < queue_family_count; i++)
		{
			// Pick a queue for spinning
			if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
				continue; // We need compute
			if (queue_family_properties[i].timestampValidBits == 0)
				continue; // We need timing
			const bool queue_is_used = i == m_graphics_queue_family_index || i == m_present_queue_family_index;
			if (queue_is_used && m_spin_queue_family_index != queue_family_count)
				continue; // Found a non-graphics queue to use
			spin_queue_index = 0;
			m_spin_queue_family_index = i;
			if (queue_is_used && queue_family_properties[i].queueCount > 1)
				spin_queue_index = 1;
			if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
				break; // Async compute queue, definitely pick this one
		}
		if (m_graphics_queue_family_index == queue_family_count)
		{
			Console.Error("Vulkan: Failed to find an acceptable graphics queue.");
			return false;
		}
		if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count)
		{
			Console.Error("Vulkan: Failed to find an acceptable present queue.");
			return false;
		}

		VkDeviceCreateInfo device_info = {};
		device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
		device_info.pNext = nullptr;
		device_info.flags = 0;
		device_info.queueCreateInfoCount = 0;

		static constexpr float queue_priorities[] = {1.0f, 0.0f}; // Low priority for the spin queue
		std::array<VkDeviceQueueCreateInfo, 3> queue_infos;
		VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
		graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
		graphics_queue_info.pNext = nullptr;
		graphics_queue_info.flags = 0;
		graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index;
		graphics_queue_info.queueCount = 1;
		graphics_queue_info.pQueuePriorities = queue_priorities;

		if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
		{
			VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++];
			present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
			present_queue_info.pNext = nullptr;
			present_queue_info.flags = 0;
			present_queue_info.queueFamilyIndex = m_present_queue_family_index;
			present_queue_info.queueCount = 1;
			present_queue_info.pQueuePriorities = queue_priorities;
		}

		if (m_spin_queue_family_index == m_graphics_queue_family_index)
		{
			if (spin_queue_index != 0)
				graphics_queue_info.queueCount = 2;
		}
		else if (m_spin_queue_family_index == m_present_queue_family_index)
		{
			if (spin_queue_index != 0)
				queue_infos[1].queueCount = 2; // present queue
		}
		else
		{
			VkDeviceQueueCreateInfo& spin_queue_info = queue_infos[device_info.queueCreateInfoCount++];
			spin_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
			spin_queue_info.pNext = nullptr;
			spin_queue_info.flags = 0;
			spin_queue_info.queueFamilyIndex = m_spin_queue_family_index;
			spin_queue_info.queueCount = 1;
			spin_queue_info.pQueuePriorities = queue_priorities + 1;
		}

		device_info.pQueueCreateInfos = queue_infos.data();

		ExtensionList enabled_extensions;
		for (u32 i = 0; i < num_required_device_extensions; i++)
			enabled_extensions.emplace_back(required_device_extensions[i]);
		if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE))
			return false;

		device_info.enabledLayerCount = num_required_device_layers;
		device_info.ppEnabledLayerNames = required_device_layers;
		device_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
		device_info.ppEnabledExtensionNames = enabled_extensions.data();

		// Check for required features before creating.
		if (!SelectDeviceFeatures(required_features))
			return false;

		device_info.pEnabledFeatures = &m_device_features;

		// Enable debug layer on debug builds
		if (enable_validation_layer)
		{
			static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"};
			device_info.enabledLayerCount = 1;
			device_info.ppEnabledLayerNames = layer_names;
		}

		// provoking vertex
		VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_feature = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT};
		VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM rasterization_order_access_feature = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM};
		VkPhysicalDeviceLineRasterizationFeaturesEXT line_rasterization_feature = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT};

		if (m_optional_extensions.vk_ext_provoking_vertex)
		{
			provoking_vertex_feature.provokingVertexLast = VK_TRUE;
			Util::AddPointerToChain(&device_info, &provoking_vertex_feature);
		}
		if (m_optional_extensions.vk_ext_line_rasterization)
		{
			line_rasterization_feature.bresenhamLines = VK_TRUE;
			Util::AddPointerToChain(&device_info, &line_rasterization_feature);
		}
		if (m_optional_extensions.vk_arm_rasterization_order_attachment_access)
		{
			rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess = VK_TRUE;
			Util::AddPointerToChain(&device_info, &rasterization_order_access_feature);
		}

		VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateDevice failed: ");
			return false;
		}

		// With the device created, we can fill the remaining entry points.
		if (!LoadVulkanDeviceFunctions(m_device))
			return false;

		// Grab the graphics and present queues.
		vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue);
		if (surface)
		{
			vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
		}
		m_spinning_supported = m_spin_queue_family_index != queue_family_count &&
		                       queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
		                       m_device_properties.limits.timestampPeriod > 0;
		m_spin_queue_is_graphics_queue = m_spin_queue_family_index == m_graphics_queue_family_index && spin_queue_index == 0;

		m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
		                          queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
		                          m_device_properties.limits.timestampPeriod > 0);
		DevCon.WriteLn("GPU timing is %s (TS=%u TS valid bits=%u, TS period=%f)",
			m_gpu_timing_supported ? "supported" : "not supported",
			static_cast<u32>(m_device_properties.limits.timestampComputeAndGraphics),
			queue_family_properties[m_graphics_queue_family_index].timestampValidBits,
			m_device_properties.limits.timestampPeriod);

		ProcessDeviceExtensions();

		if (m_spinning_supported)
		{
			vkGetDeviceQueue(m_device, m_spin_queue_family_index, spin_queue_index, &m_spin_queue);

			m_spin_timestamp_scale = m_device_properties.limits.timestampPeriod;
			if (m_optional_extensions.vk_ext_calibrated_timestamps)
			{
#ifdef _WIN32
				LARGE_INTEGER Freq;
				QueryPerformanceFrequency(&Freq);
				m_queryperfcounter_to_ns = 1000000000.0 / static_cast < double > (Freq.QuadPart);
#endif
				CalibrateSpinTimestamp();
			}
		}

		return true;
	}

	void Context::ProcessDeviceExtensions()
	{
		// advanced feature checks
		VkPhysicalDeviceFeatures2 features2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
		VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT};
		VkPhysicalDeviceLineRasterizationFeaturesEXT line_rasterization_feature = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT};
		VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM rasterization_order_access_feature = {
			VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM};

		// add in optional feature structs
		if (m_optional_extensions.vk_ext_provoking_vertex)
			Util::AddPointerToChain(&features2, &provoking_vertex_features);
		if (m_optional_extensions.vk_ext_line_rasterization)
			Util::AddPointerToChain(&features2, &line_rasterization_feature);
		if (m_optional_extensions.vk_arm_rasterization_order_attachment_access)
			Util::AddPointerToChain(&features2, &rasterization_order_access_feature);

		// query
		vkGetPhysicalDeviceFeatures2(m_physical_device, &features2);

		// confirm we actually support it
		m_optional_extensions.vk_ext_provoking_vertex &= (provoking_vertex_features.provokingVertexLast == VK_TRUE);
		m_optional_extensions.vk_arm_rasterization_order_attachment_access &= (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE);
		m_optional_extensions.vk_ext_line_rasterization &= (line_rasterization_feature.bresenhamLines == VK_TRUE);

		VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
		void** pNext = &properties2.pNext;

		if (m_optional_extensions.vk_khr_driver_properties)
		{
			m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
			*pNext = &m_device_driver_properties;
			pNext = &m_device_driver_properties.pNext;
		}

		// query
		vkGetPhysicalDeviceProperties2(m_physical_device, &properties2);

		// VK_EXT_calibrated_timestamps checking
		if (m_optional_extensions.vk_ext_calibrated_timestamps)
		{
			u32 count = 0;
			vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(m_physical_device, &count, nullptr);
			std::unique_ptr<VkTimeDomainEXT[]> time_domains = std::make_unique<VkTimeDomainEXT[]>(count);
			vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(m_physical_device, &count, time_domains.get());
			const VkTimeDomainEXT* begin = &time_domains[0];
			const VkTimeDomainEXT* end = &time_domains[count];
			if (std::find(begin, end, VK_TIME_DOMAIN_DEVICE_EXT) == end)
				m_optional_extensions.vk_ext_calibrated_timestamps = false;
			VkTimeDomainEXT preferred_types[] = {
#ifdef _WIN32
				VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT,
#else
#ifdef CLOCK_MONOTONIC_RAW
				VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
#endif
				VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
#endif
			};
			m_calibrated_timestamp_type = VK_TIME_DOMAIN_DEVICE_EXT;
			for (VkTimeDomainEXT type : preferred_types)
			{
				if (std::find(begin, end, type) != end)
				{
					m_calibrated_timestamp_type = type;
					break;
				}
			}
			if (m_calibrated_timestamp_type == VK_TIME_DOMAIN_DEVICE_EXT)
				m_optional_extensions.vk_ext_calibrated_timestamps = false;
		}

		Console.WriteLn("VK_EXT_provoking_vertex is %s",
			m_optional_extensions.vk_ext_provoking_vertex ? "supported" : "NOT supported");
		Console.WriteLn("VK_EXT_line_rasterization is %s",
			m_optional_extensions.vk_ext_line_rasterization ? "supported" : "NOT supported");
		Console.WriteLn("VK_EXT_calibrated_timestamps is %s",
			m_optional_extensions.vk_ext_calibrated_timestamps ? "supported" : "NOT supported");
		Console.WriteLn("VK_ARM_rasterization_order_attachment_access is %s",
			m_optional_extensions.vk_arm_rasterization_order_attachment_access ? "supported" : "NOT supported");
	}

	bool Context::CreateAllocator()
	{
		VmaAllocatorCreateInfo ci = {};
		ci.vulkanApiVersion = VK_API_VERSION_1_1;
		ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
		ci.physicalDevice = m_physical_device;
		ci.device = m_device;
		ci.instance = m_instance;

		if (m_optional_extensions.vk_ext_memory_budget)
			ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;

		VkResult res = vmaCreateAllocator(&ci, &m_allocator);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: ");
			return false;
		}

		return true;
	}

	void Context::DestroyAllocator()
	{
		if (m_allocator == VK_NULL_HANDLE)
			return;

		vmaDestroyAllocator(m_allocator);
		m_allocator = VK_NULL_HANDLE;
	}

	bool Context::CreateCommandBuffers()
	{
		VkResult res;

		uint32_t frame_index = 0;
		for (FrameResources& resources : m_frame_resources)
		{
			resources.needs_fence_wait = false;

			VkCommandPoolCreateInfo pool_info = {
				VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, m_graphics_queue_family_index};
			res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: ");
				return false;
			}
			Vulkan::Util::SetObjectName(
				g_vulkan_context->GetDevice(), resources.command_pool, "Frame Command Pool %u", frame_index);

			VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
				resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY,
				static_cast<u32>(resources.command_buffers.size())};

			res = vkAllocateCommandBuffers(m_device, &buffer_info, resources.command_buffers.data());
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
				return false;
			}
			for (u32 i = 0; i < resources.command_buffers.size(); i++)
			{
				Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.command_buffers[i],
					"Frame %u %sCommand Buffer", frame_index, (i == 0) ? "Init" : "");
			}

			VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT};

			res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkCreateFence failed: ");
				return false;
			}
			Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), resources.fence, "Frame Fence %u", frame_index);
			// TODO: A better way to choose the number of descriptors.
			VkDescriptorPoolSize pool_sizes[] = {
				{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME},
				{VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, MAX_SAMPLED_IMAGE_DESCRIPTORS_PER_FRAME},
				{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_STORAGE_IMAGE_DESCRIPTORS_PER_FRAME},
				{VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_IMAGE_DESCRIPTORS_PER_FRAME},
			};

			VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0,
				MAX_DESCRIPTOR_SETS_PER_FRAME, static_cast<u32>(std::size(pool_sizes)), pool_sizes};

			res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
				return false;
			}
			Vulkan::Util::SetObjectName(
				g_vulkan_context->GetDevice(), resources.descriptor_pool, "Frame Descriptor Pool %u", frame_index);

			++frame_index;
		}

		ActivateCommandBuffer(0);
		return true;
	}

	void Context::DestroyCommandBuffers()
	{
		for (FrameResources& resources : m_frame_resources)
		{
			for (auto& it : resources.cleanup_resources)
				it();
			resources.cleanup_resources.clear();

			if (resources.fence != VK_NULL_HANDLE)
			{
				vkDestroyFence(m_device, resources.fence, nullptr);
				resources.fence = VK_NULL_HANDLE;
			}
			if (resources.descriptor_pool != VK_NULL_HANDLE)
			{
				vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr);
				resources.descriptor_pool = VK_NULL_HANDLE;
			}
			if (resources.command_buffers[0] != VK_NULL_HANDLE)
			{
				vkFreeCommandBuffers(m_device, resources.command_pool,
					static_cast<u32>(resources.command_buffers.size()), resources.command_buffers.data());
				resources.command_buffers.fill(VK_NULL_HANDLE);
			}
			if (resources.command_pool != VK_NULL_HANDLE)
			{
				vkDestroyCommandPool(m_device, resources.command_pool, nullptr);
				resources.command_pool = VK_NULL_HANDLE;
			}
		}
	}

	bool Context::CreateGlobalDescriptorPool()
	{
		// TODO: A better way to choose the number of descriptors.
		VkDescriptorPoolSize pool_sizes[] = {
			{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024},
			{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024},
			{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
		};

		VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
			VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
			1024, // TODO: tweak this
			static_cast<u32>(std::size(pool_sizes)), pool_sizes};

		VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
			return false;
		}
		Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_global_descriptor_pool, "Global Descriptor Pool");

		if (m_gpu_timing_supported)
		{
			const VkQueryPoolCreateInfo query_create_info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr,
				0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0};
			res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: ");
				m_gpu_timing_supported = false;
				return false;
			}
		}

		return true;
	}

	void Context::DestroyGlobalDescriptorPool()
	{
		if (m_timestamp_query_pool != VK_NULL_HANDLE)
		{
			vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr);
			m_timestamp_query_pool = VK_NULL_HANDLE;
		}

		if (m_global_descriptor_pool != VK_NULL_HANDLE)
		{
			vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr);
			m_global_descriptor_pool = VK_NULL_HANDLE;
		}
	}

	bool Context::CreateTextureStreamBuffer()
	{
		if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE))
		{
			Console.Error("Failed to allocate texture upload buffer");
			return false;
		}

		return true;
	}

	VkCommandBuffer Context::GetCurrentInitCommandBuffer()
	{
		FrameResources& res = m_frame_resources[m_current_frame];
		VkCommandBuffer buf = res.command_buffers[0];
		if (res.init_buffer_used)
			return buf;

		VkCommandBufferBeginInfo bi{
			VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
		vkBeginCommandBuffer(buf, &bi);
		res.init_buffer_used = true;
		return buf;
	}

	VkDescriptorSet Context::AllocateDescriptorSet(VkDescriptorSetLayout set_layout)
	{
		VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
			m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout};

		VkDescriptorSet descriptor_set;
		VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
		if (res != VK_SUCCESS)
		{
			// Failing to allocate a descriptor set is not a fatal error, we can
			// recover by moving to the next command buffer.
			return VK_NULL_HANDLE;
		}

		return descriptor_set;
	}

	VkDescriptorSet Context::AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout)
	{
		VkDescriptorSetAllocateInfo allocate_info = {
			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, m_global_descriptor_pool, 1, &set_layout};

		VkDescriptorSet descriptor_set;
		VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
		if (res != VK_SUCCESS)
			return VK_NULL_HANDLE;

		return descriptor_set;
	}

	void Context::FreeGlobalDescriptorSet(VkDescriptorSet set)
	{
		vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set);
	}

	void Context::WaitForFenceCounter(u64 fence_counter)
	{
		if (m_completed_fence_counter >= fence_counter)
			return;

		// Find the first command buffer which covers this counter value.
		u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
		while (index != m_current_frame)
		{
			if (m_frame_resources[index].fence_counter >= fence_counter)
				break;

			index = (index + 1) % NUM_COMMAND_BUFFERS;
		}

		pxAssert(index != m_current_frame);
		WaitForCommandBufferCompletion(index);
	}

	void Context::WaitForGPUIdle()
	{
		WaitForPresentComplete();
		vkDeviceWaitIdle(m_device);
	}

	float Context::GetAndResetAccumulatedGPUTime()
	{
		const float time = m_accumulated_gpu_time;
		m_accumulated_gpu_time = 0.0f;
		return time;
	}

	bool Context::SetEnableGPUTiming(bool enabled)
	{
		m_gpu_timing_enabled = enabled && m_gpu_timing_supported;
		return (enabled == m_gpu_timing_enabled);
	}

	void Context::ScanForCommandBufferCompletion()
	{
		for (u32 check_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; check_index != m_current_frame; check_index = (check_index + 1) % NUM_COMMAND_BUFFERS)
		{
			FrameResources& resources = m_frame_resources[check_index];
			if (resources.fence_counter <= m_completed_fence_counter)
				continue; // Already completed
			if (vkGetFenceStatus(m_device, resources.fence) != VK_SUCCESS)
				break; // Fence not signaled, later fences won't be either
			CommandBufferCompleted(check_index);
			m_completed_fence_counter = resources.fence_counter;
		}
		for (SpinResources& resources : m_spin_resources)
		{
			if (!resources.in_progress)
				continue;
			if (vkGetFenceStatus(m_device, resources.fence) != VK_SUCCESS)
				continue;
			SpinCommandCompleted(&resources - &m_spin_resources[0]);
		}
	}

	void Context::WaitForCommandBufferCompletion(u32 index)
	{
		// Wait for this command buffer to be completed.
		VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");

		// Clean up any resources for command buffers between the last known completed buffer and this
		// now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
		const u64 now_completed_counter = m_frame_resources[index].fence_counter;
		u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
		while (cleanup_index != m_current_frame)
		{
			FrameResources& resources = m_frame_resources[cleanup_index];
			if (resources.fence_counter > now_completed_counter)
				break;

			if (resources.fence_counter > m_completed_fence_counter)
				CommandBufferCompleted(cleanup_index);

			cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
		}

		m_completed_fence_counter = now_completed_counter;
	}

	void Context::SubmitCommandBuffer(SwapChain* present_swap_chain /* = nullptr */, bool submit_on_thread /* = false */)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];

		// End the current command buffer.
		VkResult res;
		if (resources.init_buffer_used)
		{
			res = vkEndCommandBuffer(resources.command_buffers[0]);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
				pxFailRel("Failed to end command buffer");
			}
		}

		bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
		if (wants_timestamp && resources.timestamp_written)
		{
			vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, m_current_frame * 2 + 1);
		}

		res = vkEndCommandBuffer(resources.command_buffers[1]);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
			pxFailRel("Failed to end command buffer");
		}

		// This command buffer now has commands, so can't be re-used without waiting.
		resources.needs_fence_wait = true;

		u32 spin_cycles = 0;
		const bool spin_enabled = m_spin_timer;
		if (spin_enabled)
		{
			ScanForCommandBufferCompletion();
			auto draw = m_spin_manager.DrawSubmitted(m_command_buffer_render_passes);
			u32 constant_offset = 400000 * m_spin_manager.SpinsPerUnitTime(); // 400µs, just to be safe since going over gets really bad
			if (m_optional_extensions.vk_ext_calibrated_timestamps)
				constant_offset /= 2; // Safety factor isn't as important here, going over just hurts this one submission a bit
			u32 minimum_spin = 200000 * m_spin_manager.SpinsPerUnitTime();
			u32 maximum_spin = std::max<u32>(1024, 16000000 * m_spin_manager.SpinsPerUnitTime()); // 16ms
			if (draw.recommended_spin > minimum_spin + constant_offset)
				spin_cycles = std::min(draw.recommended_spin - constant_offset, maximum_spin);
			resources.spin_id = draw.id;
		}
		else
		{
			resources.spin_id = -1;
		}
		m_command_buffer_render_passes = 0;

		if (present_swap_chain != VK_NULL_HANDLE && m_spinning_supported)
		{
			m_spin_manager.NextFrame();
			if (m_spin_timer)
				m_spin_timer--;
			// Calibrate a max of once per frame
			m_wants_new_timestamp_calibration = m_optional_extensions.vk_ext_calibrated_timestamps;
		}

		if (spin_cycles != 0)
			WaitForSpinCompletion(m_current_frame);

		std::unique_lock<std::mutex> lock(m_present_mutex);
		WaitForPresentComplete(lock);

		if (spin_enabled && m_optional_extensions.vk_ext_calibrated_timestamps)
			resources.submit_timestamp = GetCPUTimestamp();

		if (!submit_on_thread || !m_present_thread.joinable())
		{
			DoSubmitCommandBuffer(m_current_frame, present_swap_chain, spin_cycles);
			if (present_swap_chain)
				DoPresent(present_swap_chain);
			return;
		}

		m_queued_present.command_buffer_index = m_current_frame;
		m_queued_present.swap_chain = present_swap_chain;
		m_queued_present.spin_cycles = spin_cycles;
		m_present_done.store(false);
		m_present_queued_cv.notify_one();
	}

	void Context::DoSubmitCommandBuffer(u32 index, SwapChain* present_swap_chain, u32 spin_cycles)
	{
		FrameResources& resources = m_frame_resources[index];

		uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
		VkSemaphore semas[2];
		VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
		submit_info.commandBufferCount = resources.init_buffer_used ? 2u : 1u;
		submit_info.pCommandBuffers = resources.init_buffer_used ? resources.command_buffers.data() : &resources.command_buffers[1];

		if (present_swap_chain)
		{
			submit_info.pWaitSemaphores = present_swap_chain->GetImageAvailableSemaphorePtr();
			submit_info.waitSemaphoreCount = 1;
			submit_info.pWaitDstStageMask = &wait_bits;

			if (spin_cycles != 0)
			{
				semas[0] = present_swap_chain->GetRenderingFinishedSemaphore();
				semas[1] = m_spin_resources[index].semaphore;
				submit_info.signalSemaphoreCount = 2;
				submit_info.pSignalSemaphores = semas;
			}
			else
			{
				submit_info.pSignalSemaphores = present_swap_chain->GetRenderingFinishedSemaphorePtr();
				submit_info.signalSemaphoreCount = 1;
			}
		}
		else if (spin_cycles != 0)
		{
			submit_info.signalSemaphoreCount = 1;
			submit_info.pSignalSemaphores = &m_spin_resources[index].semaphore;
		}

		VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
			pxFailRel("Failed to submit command buffer.");
		}

		if (spin_cycles != 0)
			SubmitSpinCommand(index, spin_cycles);
	}

	void Context::DoPresent(SwapChain* present_swap_chain)
	{
		const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr,
			1, present_swap_chain->GetRenderingFinishedSemaphorePtr(),
			1, present_swap_chain->GetSwapChainPtr(), present_swap_chain->GetCurrentImageIndexPtr(),
			nullptr};

		present_swap_chain->ReleaseCurrentImage();

		VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
		if (res != VK_SUCCESS)
		{
			// VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
			if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR)
				LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");

			m_last_present_failed.store(true);
			return;
		}

		// Grab the next image as soon as possible, that way we spend less time blocked on the next
		// submission. Don't care if it fails, we'll deal with that at the presentation call site.
		// Credit to dxvk for the idea.
		present_swap_chain->AcquireNextImage();
	}

	void Context::WaitForPresentComplete()
	{
		if (m_present_done.load())
			return;

		std::unique_lock<std::mutex> lock(m_present_mutex);
		WaitForPresentComplete(lock);
	}

	void Context::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
	{
		if (m_present_done.load())
			return;

		m_present_done_cv.wait(lock, [this]() { return m_present_done.load(); });
	}

	void Context::PresentThread()
	{
		std::unique_lock<std::mutex> lock(m_present_mutex);
		while (!m_present_thread_done.load())
		{
			m_present_queued_cv.wait(lock, [this]() { return !m_present_done.load() || m_present_thread_done.load(); });

			if (m_present_done.load())
				continue;

			DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain, m_queued_present.spin_cycles);
			if (m_queued_present.swap_chain)
				DoPresent(m_queued_present.swap_chain);
			m_present_done.store(true);
			m_present_done_cv.notify_one();
		}
	}

	void Context::StartPresentThread()
	{
		pxAssert(!m_present_thread.joinable());
		m_present_thread_done.store(false);
		m_present_thread = std::thread(&Context::PresentThread, this);
	}

	void Context::StopPresentThread()
	{
		if (!m_present_thread.joinable())
			return;

		{
			std::unique_lock<std::mutex> lock(m_present_mutex);
			WaitForPresentComplete(lock);
			m_present_thread_done.store(true);
			m_present_queued_cv.notify_one();
		}

		m_present_thread.join();
	}

	void Context::CommandBufferCompleted(u32 index)
	{
		FrameResources& resources = m_frame_resources[index];

		for (auto& it : resources.cleanup_resources)
			it();
		resources.cleanup_resources.clear();

		bool wants_timestamps = m_gpu_timing_enabled || resources.spin_id >= 0;

		if (wants_timestamps && resources.timestamp_written)
		{
			std::array<u64, 2> timestamps;
			VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast<u32>(timestamps.size()),
				sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
			if (res == VK_SUCCESS)
			{
				// if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be zero
				if (timestamps[0] > 0 && m_gpu_timing_enabled)
				{
					const double ns_diff = (timestamps[1] - timestamps[0]) * static_cast<double>(m_device_properties.limits.timestampPeriod);
					m_accumulated_gpu_time += ns_diff / 1000000.0;
				}
				if (resources.spin_id >= 0)
				{
					if (m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[1] > 0)
					{
						u64 end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
						m_spin_manager.DrawCompleted(resources.spin_id, resources.submit_timestamp, end);
					}
					else if (!m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[0] > 0)
					{
						u64 begin = timestamps[0] * m_spin_timestamp_scale;
						u64 end = timestamps[1] * m_spin_timestamp_scale;
						m_spin_manager.DrawCompleted(resources.spin_id, begin, end);
					}
				}
			}
			else
			{
				LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
			}
		}
	}

	void Context::MoveToNextCommandBuffer() { ActivateCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); }

	void Context::ActivateCommandBuffer(u32 index)
	{
		FrameResources& resources = m_frame_resources[index];

		if (!m_present_done.load() && m_queued_present.command_buffer_index == index)
			WaitForPresentComplete();

		// Wait for the GPU to finish with all resources for this command buffer.
		if (resources.fence_counter > m_completed_fence_counter)
			WaitForCommandBufferCompletion(index);

		// Reset fence to unsignaled before starting.
		VkResult res = vkResetFences(m_device, 1, &resources.fence);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkResetFences failed: ");

		// Reset command pools to beginning since we can re-use the memory now
		res = vkResetCommandPool(m_device, resources.command_pool, 0);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");

		// Enable commands to be recorded to the two buffers again.
		VkCommandBufferBeginInfo begin_info = {
			VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
		res = vkBeginCommandBuffer(resources.command_buffers[1], &begin_info);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");

		// Also can do the same for the descriptor pools
		res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: ");

		bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
		if (wants_timestamp)
		{
			vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2);
			vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, index * 2);
		}

		resources.fence_counter = m_next_fence_counter++;
		resources.init_buffer_used = false;
		resources.timestamp_written = wants_timestamp;

		m_current_frame = index;
		m_current_command_buffer = resources.command_buffers[1];

		// using the lower 32 bits of the fence index should be sufficient here, I hope...
		vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter));
	}

	void Context::ExecuteCommandBuffer(WaitType wait_for_completion)
	{
		// If we're waiting for completion, don't bother waking the worker thread.
		const u32 current_frame = m_current_frame;
		SubmitCommandBuffer();
		MoveToNextCommandBuffer();

		if (wait_for_completion != WaitType::None)
		{
			// Calibrate while we wait
			if (m_wants_new_timestamp_calibration)
				CalibrateSpinTimestamp();
			if (wait_for_completion == WaitType::Spin)
			{
				while (vkGetFenceStatus(m_device, m_frame_resources[current_frame].fence) == VK_NOT_READY)
					ShortSpin();
			}
			WaitForCommandBufferCompletion(current_frame);
		}
	}

	bool Context::CheckLastPresentFail()
	{
		bool res = m_last_present_failed;
		m_last_present_failed = false;
		return res;
	}

	void Context::DeferBufferDestruction(VkBuffer object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkDestroyBuffer(m_device, object, nullptr); });
	}

	void Context::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back(
			[this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); });
	}

	void Context::DeferBufferViewDestruction(VkBufferView object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkDestroyBufferView(m_device, object, nullptr); });
	}

	void Context::DeferDeviceMemoryDestruction(VkDeviceMemory object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkFreeMemory(m_device, object, nullptr); });
	}

	void Context::DeferFramebufferDestruction(VkFramebuffer object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); });
	}

	void Context::DeferImageDestruction(VkImage object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkDestroyImage(m_device, object, nullptr); });
	}

	void Context::DeferImageDestruction(VkImage object, VmaAllocation allocation)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back(
			[this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); });
	}

	void Context::DeferImageViewDestruction(VkImageView object)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, object]() { vkDestroyImageView(m_device, object, nullptr); });
	}

	void Context::DeferPipelineDestruction(VkPipeline pipeline)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, pipeline]() { vkDestroyPipeline(m_device, pipeline, nullptr); });
	}

	void Context::DeferSamplerDestruction(VkSampler sampler)
	{
		FrameResources& resources = m_frame_resources[m_current_frame];
		resources.cleanup_resources.push_back([this, sampler]() { vkDestroySampler(m_device, sampler, nullptr); });
	}

	VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
		VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
		void* pUserData)
	{
		if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
		{
			Console.Error("Vulkan debug report: (%s) %s",
				pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
		}
		else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT))
		{
			Console.Warning("Vulkan debug report: (%s) %s",
				pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
		}
		else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT)
		{
			Console.WriteLn("Vulkan debug report: (%s) %s",
				pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
		}
		else
		{
			DevCon.WriteLn("Vulkan debug report: (%s) %s",
				pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", pCallbackData->pMessage);
		}

		return VK_FALSE;
	}

	bool Context::EnableDebugUtils()
	{
		// Already enabled?
		if (m_debug_messenger_callback != VK_NULL_HANDLE)
			return true;

		// Check for presence of the functions before calling
		if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT)
		{
			return false;
		}

		VkDebugUtilsMessengerCreateInfoEXT messenger_info = {VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
			nullptr, 0,
			VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
				VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
			VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
				VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
			DebugMessengerCallback, nullptr};

		VkResult res =
			vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: ");
			return false;
		}

		return true;
	}

	void Context::DisableDebugUtils()
	{
		if (m_debug_messenger_callback != VK_NULL_HANDLE)
		{
			vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr);
			m_debug_messenger_callback = VK_NULL_HANDLE;
		}
	}

	VkRenderPass Context::CreateCachedRenderPass(RenderPassCacheKey key)
	{
		VkAttachmentReference color_reference;
		VkAttachmentReference* color_reference_ptr = nullptr;
		VkAttachmentReference depth_reference;
		VkAttachmentReference* depth_reference_ptr = nullptr;
		VkAttachmentReference input_reference;
		VkAttachmentReference* input_reference_ptr = nullptr;
		VkSubpassDependency subpass_dependency;
		VkSubpassDependency* subpass_dependency_ptr = nullptr;
		std::array<VkAttachmentDescription, 2> attachments;
		u32 num_attachments = 0;
		if (key.color_format != VK_FORMAT_UNDEFINED)
		{
			attachments[num_attachments] = {0, static_cast<VkFormat>(key.color_format), VK_SAMPLE_COUNT_1_BIT,
				static_cast<VkAttachmentLoadOp>(key.color_load_op),
				static_cast<VkAttachmentStoreOp>(key.color_store_op), VK_ATTACHMENT_LOAD_OP_DONT_CARE,
				VK_ATTACHMENT_STORE_OP_DONT_CARE,
				key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
				key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
			color_reference.attachment = num_attachments;
			color_reference.layout =
				key.color_feedback_loop ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
			color_reference_ptr = &color_reference;

			if (key.color_feedback_loop)
			{
				input_reference.attachment = num_attachments;
				input_reference.layout = VK_IMAGE_LAYOUT_GENERAL;
				input_reference_ptr = &input_reference;

				if (!g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access)
				{
					// don't need the framebuffer-local dependency when we have rasterization order attachment access
					subpass_dependency.srcSubpass = 0;
					subpass_dependency.dstSubpass = 0;
					subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
					subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
					subpass_dependency.srcAccessMask =
						VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
					subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
					subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
					subpass_dependency_ptr = &subpass_dependency;
				}
			}

			num_attachments++;
		}
		if (key.depth_format != VK_FORMAT_UNDEFINED)
		{
			attachments[num_attachments] = {0, static_cast<VkFormat>(key.depth_format), VK_SAMPLE_COUNT_1_BIT,
				static_cast<VkAttachmentLoadOp>(key.depth_load_op),
				static_cast<VkAttachmentStoreOp>(key.depth_store_op),
				static_cast<VkAttachmentLoadOp>(key.stencil_load_op),
				static_cast<VkAttachmentStoreOp>(key.stencil_store_op),
				VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL};
			depth_reference.attachment = num_attachments;
			depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
			depth_reference_ptr = &depth_reference;
			num_attachments++;
		}

		const VkSubpassDescriptionFlags subpass_flags =
			(key.color_feedback_loop && g_vulkan_context->GetOptionalExtensions().vk_arm_rasterization_order_attachment_access) ? VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM : 0;
		const VkSubpassDescription subpass = {subpass_flags, VK_PIPELINE_BIND_POINT_GRAPHICS, input_reference_ptr ? 1u : 0u,
			input_reference_ptr ? input_reference_ptr : nullptr, color_reference_ptr ? 1u : 0u,
			color_reference_ptr ? color_reference_ptr : nullptr, nullptr, depth_reference_ptr, 0, nullptr};
		const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, nullptr, 0u,
			num_attachments, attachments.data(), 1u, &subpass, subpass_dependency_ptr ? 1u : 0u,
			subpass_dependency_ptr};

		VkRenderPass pass;
		VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: ");
			return VK_NULL_HANDLE;
		}

		m_render_pass_cache.emplace(key.key, pass);
		return pass;
	}

	void Context::DestroyRenderPassCache()
	{
		for (auto& it : m_render_pass_cache)
			vkDestroyRenderPass(m_device, it.second, nullptr);

		m_render_pass_cache.clear();
	}

	static constexpr std::string_view SPIN_SHADER = R"(
#version 460 core

layout(std430, set=0, binding=0) buffer SpinBuffer { uint spin[]; };
layout(push_constant) uniform constants { uint cycles; };
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

void main()
{
	uint value = spin[0];
	// The compiler doesn't know, but spin[0] == 0, so this loop won't actually go anywhere
	for (uint i = 0; i < cycles; i++)
		value = spin[value];
	// Store the result back to the buffer so the compiler can't optimize it away
	spin[0] = value;
}
)";

	bool Context::InitSpinResources()
	{
		if (!m_spinning_supported)
			return true;
		auto spirv = ShaderCompiler::CompileComputeShader(SPIN_SHADER);
		if (!spirv.has_value())
			return false;

		VkResult res;
#define CHECKED_CREATE(create_fn, create_struct, output_struct) \
	do { \
		if ((res = create_fn(m_device, create_struct, nullptr, output_struct)) != VK_SUCCESS) \
		{ \
			LOG_VULKAN_ERROR(res, #create_fn " failed: "); \
			return false; \
		} \
	} while (0)

		VkDescriptorSetLayoutBinding set_layout_binding = {};
		set_layout_binding.binding = 0;
		set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
		set_layout_binding.descriptorCount = 1;
		set_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
		VkDescriptorSetLayoutCreateInfo desc_set_layout_create = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
		desc_set_layout_create.bindingCount = 1;
		desc_set_layout_create.pBindings = &set_layout_binding;
		CHECKED_CREATE(vkCreateDescriptorSetLayout, &desc_set_layout_create, &m_spin_descriptor_set_layout);

		const VkPushConstantRange push_constant_range = { VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32) };
		VkPipelineLayoutCreateInfo pl_layout_create = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
		pl_layout_create.setLayoutCount = 1;
		pl_layout_create.pSetLayouts = &m_spin_descriptor_set_layout;
		pl_layout_create.pushConstantRangeCount = 1;
		pl_layout_create.pPushConstantRanges = &push_constant_range;
		CHECKED_CREATE(vkCreatePipelineLayout, &pl_layout_create, &m_spin_pipeline_layout);

		VkShaderModuleCreateInfo module_create = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO };
		module_create.codeSize = spirv->size() * sizeof(ShaderCompiler::SPIRVCodeType);
		module_create.pCode = spirv->data();
		VkShaderModule shader_module;
		CHECKED_CREATE(vkCreateShaderModule, &module_create, &shader_module);
		Util::SetObjectName(m_device, shader_module, "Spin Shader");

		VkComputePipelineCreateInfo pl_create = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
		pl_create.layout = m_spin_pipeline_layout;
		pl_create.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
		pl_create.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
		pl_create.stage.pName = "main";
		pl_create.stage.module = shader_module;
		res = vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &pl_create, nullptr, &m_spin_pipeline);
		vkDestroyShaderModule(m_device, shader_module, nullptr);
		if (res != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
			return false;
		}
		Util::SetObjectName(m_device, m_spin_pipeline, "Spin Pipeline");

		VmaAllocationCreateInfo buf_vma_create = {};
		buf_vma_create.usage = VMA_MEMORY_USAGE_GPU_ONLY;
		VkBufferCreateInfo buf_create = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
		buf_create.size = 4;
		buf_create.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
		if ((res = vmaCreateBuffer(m_allocator, &buf_create, &buf_vma_create, &m_spin_buffer, &m_spin_buffer_allocation, nullptr)) != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vmaCreateBuffer failed: ");
			return false;
		}
		Util::SetObjectName(m_device, m_spin_buffer, "Spin Buffer");

		VkDescriptorSetAllocateInfo desc_set_allocate = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
		desc_set_allocate.descriptorPool = m_global_descriptor_pool;
		desc_set_allocate.descriptorSetCount = 1;
		desc_set_allocate.pSetLayouts = &m_spin_descriptor_set_layout;
		if ((res = vkAllocateDescriptorSets(m_device, &desc_set_allocate, &m_spin_descriptor_set)) != VK_SUCCESS)
		{
			LOG_VULKAN_ERROR(res, "vkAllocateDescriptorSets failed: ");
			return false;
		}
		const VkDescriptorBufferInfo desc_buffer_info = { m_spin_buffer, 0, VK_WHOLE_SIZE };
		VkWriteDescriptorSet desc_set_write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
		desc_set_write.dstSet = m_spin_descriptor_set;
		desc_set_write.dstBinding = 0;
		desc_set_write.descriptorCount = 1;
		desc_set_write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
		desc_set_write.pBufferInfo = &desc_buffer_info;
		vkUpdateDescriptorSets(m_device, 1, &desc_set_write, 0, nullptr);

		for (SpinResources& resources : m_spin_resources)
		{
			u32 index = &resources - &m_spin_resources[0];
			VkCommandPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
			pool_info.queueFamilyIndex = m_spin_queue_family_index;
			CHECKED_CREATE(vkCreateCommandPool, &pool_info, &resources.command_pool);
			Vulkan::Util::SetObjectName(m_device, resources.command_pool, "Spin Command Pool %u", index);

			VkCommandBufferAllocateInfo buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
			buffer_info.commandPool = resources.command_pool;
			buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
			buffer_info.commandBufferCount = 1;
			res = vkAllocateCommandBuffers(m_device, &buffer_info, &resources.command_buffer);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
				return false;
			}
			Vulkan::Util::SetObjectName(m_device, resources.command_buffer, "Spin Command Buffer %u", index);

			VkFenceCreateInfo fence_info = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
			fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
			CHECKED_CREATE(vkCreateFence, &fence_info, &resources.fence);
			Vulkan::Util::SetObjectName(m_device, resources.fence, "Spin Fence %u", index);

			if (!m_spin_queue_is_graphics_queue)
			{
				VkSemaphoreCreateInfo sem_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
				CHECKED_CREATE(vkCreateSemaphore, &sem_info, &resources.semaphore);
				Vulkan::Util::SetObjectName(m_device, resources.semaphore, "Draw to Spin Semaphore %u", index);
			}
		}

#undef CHECKED_CREATE
		return true;
	}

	void Context::DestroySpinResources()
	{
#define CHECKED_DESTROY(destructor, obj) \
		do { \
			if (obj != VK_NULL_HANDLE) \
			{ \
				destructor(m_device, obj, nullptr); \
				obj = VK_NULL_HANDLE; \
			} \
		} while (0)

		if (m_spin_buffer)
		{
			vmaDestroyBuffer(m_allocator, m_spin_buffer, m_spin_buffer_allocation);
			m_spin_buffer = VK_NULL_HANDLE;
			m_spin_buffer_allocation = VK_NULL_HANDLE;
		}
		CHECKED_DESTROY(vkDestroyPipeline, m_spin_pipeline);
		CHECKED_DESTROY(vkDestroyPipelineLayout, m_spin_pipeline_layout);
		CHECKED_DESTROY(vkDestroyDescriptorSetLayout, m_spin_descriptor_set_layout);
		if (m_spin_descriptor_set != VK_NULL_HANDLE)
		{
			vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &m_spin_descriptor_set);
			m_spin_descriptor_set = VK_NULL_HANDLE;
		}
		for (SpinResources& resources : m_spin_resources)
		{
			CHECKED_DESTROY(vkDestroySemaphore, resources.semaphore);
			CHECKED_DESTROY(vkDestroyFence, resources.fence);
			if (resources.command_buffer != VK_NULL_HANDLE)
			{
				vkFreeCommandBuffers(m_device, resources.command_pool, 1, &resources.command_buffer);
				resources.command_buffer = VK_NULL_HANDLE;
			}
			CHECKED_DESTROY(vkDestroyCommandPool, resources.command_pool);
		}
#undef CHECKED_DESTROY
	}

	void Context::WaitForSpinCompletion(u32 index)
	{
		SpinResources& resources = m_spin_resources[index];
		if (!resources.in_progress)
			return;
		VkResult res = vkWaitForFences(m_device, 1, &resources.fence, VK_TRUE, UINT64_MAX);
		if (res != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
		SpinCommandCompleted(index);
	}

	void Context::SpinCommandCompleted(u32 index)
	{
		SpinResources& resources = m_spin_resources[index];
		resources.in_progress = false;
		const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
		std::array<u64, 2> timestamps;
		VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, timestamp_base, static_cast<u32>(timestamps.size()),
			sizeof(timestamps), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
		if (res == VK_SUCCESS)
		{
			u64 begin, end;
			if (m_optional_extensions.vk_ext_calibrated_timestamps)
			{
				begin = timestamps[0] * m_spin_timestamp_scale + m_spin_timestamp_offset;
				end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
			}
			else
			{
				begin = timestamps[0] * m_spin_timestamp_scale;
				end = timestamps[1] * m_spin_timestamp_scale;
			}
			m_spin_manager.SpinCompleted(resources.cycles, begin, end);
		}
		else
		{
			LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
		}
	}

	void Context::SubmitSpinCommand(u32 index, u32 cycles)
	{
		SpinResources& resources = m_spin_resources[index];
		VkResult res;

		// Reset fence to unsignaled before starting.
		if ((res = vkResetFences(m_device, 1, &resources.fence)) != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkResetFences failed: ");

		// Reset command pools to beginning since we can re-use the memory now
		if ((res = vkResetCommandPool(m_device, resources.command_pool, 0)) != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");

		// Enable commands to be recorded to the two buffers again.
		VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
		begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
		if ((res = vkBeginCommandBuffer(resources.command_buffer, &begin_info)) != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");

		if (!m_spin_buffer_initialized)
		{
			m_spin_buffer_initialized = true;
			vkCmdFillBuffer(resources.command_buffer, m_spin_buffer, 0, VK_WHOLE_SIZE, 0);
			VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
			barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
			barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
			barrier.srcQueueFamilyIndex = m_spin_queue_family_index;
			barrier.dstQueueFamilyIndex = m_spin_queue_family_index;
			barrier.buffer = m_spin_buffer;
			barrier.offset = 0;
			barrier.size = VK_WHOLE_SIZE;
			vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
		}

		if (m_spin_queue_is_graphics_queue)
			vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);

		const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
		vkCmdResetQueryPool(resources.command_buffer, m_timestamp_query_pool, timestamp_base, 2);
		vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, timestamp_base);
		vkCmdPushConstants(resources.command_buffer, m_spin_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32), &cycles);
		vkCmdBindPipeline(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline);
		vkCmdBindDescriptorSets(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline_layout, 0, 1, &m_spin_descriptor_set, 0, nullptr);
		vkCmdDispatch(resources.command_buffer, 1, 1, 1);
		vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, m_timestamp_query_pool, timestamp_base + 1);

		if ((res = vkEndCommandBuffer(resources.command_buffer)) != VK_SUCCESS)
			LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");

		VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
		submit_info.commandBufferCount = 1;
		submit_info.pCommandBuffers = &resources.command_buffer;
		VkPipelineStageFlags sema_waits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
		if (!m_spin_queue_is_graphics_queue)
		{
			submit_info.waitSemaphoreCount = 1;
			submit_info.pWaitSemaphores = &resources.semaphore;
			submit_info.pWaitDstStageMask = sema_waits;
		}
		vkQueueSubmit(m_spin_queue, 1, &submit_info, resources.fence);
		resources.in_progress = true;
		resources.cycles = cycles;
	}

	void Context::NotifyOfReadback()
	{
		if (!m_spinning_supported)
			return;
		m_spin_timer = 30;
		m_spin_manager.ReadbackRequested();
	}

	void Context::CalibrateSpinTimestamp()
	{
		if (!m_optional_extensions.vk_ext_calibrated_timestamps)
			return;
		VkCalibratedTimestampInfoEXT infos[2] = {
			{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
			{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_calibrated_timestamp_type },
		};
		u64 timestamps[2];
		u64 maxDeviation;
		constexpr u64 MAX_MAX_DEVIATION = 100000; // 100µs
		for (int i = 0; i < 4; i++) // 4 tries to get under MAX_MAX_DEVIATION
		{
			VkResult res = vkGetCalibratedTimestampsEXT(m_device, std::size(infos), infos, timestamps, &maxDeviation);
			if (res != VK_SUCCESS)
			{
				LOG_VULKAN_ERROR(res, "vkGetCalibratedTimestampsEXT failed: ");
				return;
			}
			if (maxDeviation < MAX_MAX_DEVIATION)
				break;
		}
		if (maxDeviation >= MAX_MAX_DEVIATION)
			Console.Warning("vkGetCalibratedTimestampsEXT returned high max deviation of %lluµs", maxDeviation / 1000);
		const double gpu_time = timestamps[0] * m_spin_timestamp_scale;
#ifdef _WIN32
		const double cpu_time = timestamps[1] * m_queryperfcounter_to_ns;
#else
		const double cpu_time = timestamps[1];
#endif
		m_spin_timestamp_offset = cpu_time - gpu_time;
	}

	u64 Context::GetCPUTimestamp()
	{
#ifdef _WIN32
		LARGE_INTEGER value = {};
		QueryPerformanceCounter(&value);
		return static_cast<u64>(static_cast<double>(value.QuadPart) * m_queryperfcounter_to_ns);
#else
#ifdef CLOCK_MONOTONIC_RAW
		const bool use_raw = m_calibrated_timestamp_type == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
		const clockid_t clock = use_raw ? CLOCK_MONOTONIC_RAW : CLOCK_MONOTONIC;
#else
		const clockid_t clock = CLOCK_MONOTONIC;
#endif
		timespec ts = {};
		clock_gettime(clock, &ts);
		return static_cast<u64>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
	}
} // namespace Vulkan