mirror of https://github.com/PCSX2/pcsx2.git
GS:Vk: Add option to spin GPU during readbacks
This commit is contained in:
parent
4c9981f3c8
commit
923b0817ce
|
@ -18,12 +18,19 @@
|
|||
#include "common/Assertions.h"
|
||||
#include "common/Console.h"
|
||||
#include "common/StringUtil.h"
|
||||
#include "common/Vulkan/ShaderCompiler.h"
|
||||
#include "common/Vulkan/SwapChain.h"
|
||||
#include "common/Vulkan/Util.h"
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "common/RedtapeWindows.h"
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
std::unique_ptr<Vulkan::Context> g_vulkan_context;
|
||||
|
||||
// Tweakables
|
||||
|
@ -351,6 +358,7 @@ namespace Vulkan
|
|||
if (!g_vulkan_context->CreateDevice(surface, enable_validation_layer, nullptr, 0, nullptr, 0, nullptr) ||
|
||||
!g_vulkan_context->CreateAllocator() || !g_vulkan_context->CreateGlobalDescriptorPool() ||
|
||||
!g_vulkan_context->CreateCommandBuffers() || !g_vulkan_context->CreateTextureStreamBuffer() ||
|
||||
!g_vulkan_context->InitSpinResources() ||
|
||||
(enable_surface && (*out_swap_chain = SwapChain::Create(wi_copy, surface, preferred_present_mode)) == nullptr))
|
||||
{
|
||||
// Since we are destroying the instance, we're also responsible for destroying the surface.
|
||||
|
@ -378,6 +386,7 @@ namespace Vulkan
|
|||
|
||||
g_vulkan_context->m_texture_upload_buffer.Destroy(false);
|
||||
|
||||
g_vulkan_context->DestroySpinResources();
|
||||
g_vulkan_context->DestroyRenderPassCache();
|
||||
g_vulkan_context->DestroyGlobalDescriptorPool();
|
||||
g_vulkan_context->DestroyCommandBuffers();
|
||||
|
@ -498,6 +507,8 @@ namespace Vulkan
|
|||
// Find graphics and present queues.
|
||||
m_graphics_queue_family_index = queue_family_count;
|
||||
m_present_queue_family_index = queue_family_count;
|
||||
m_spin_queue_family_index = queue_family_count;
|
||||
u32 spin_queue_index = 0;
|
||||
for (uint32_t i = 0; i < queue_family_count; i++)
|
||||
{
|
||||
VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
|
||||
|
@ -533,6 +544,23 @@ namespace Vulkan
|
|||
}
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < queue_family_count; i++)
|
||||
{
|
||||
// Pick a queue for spinning
|
||||
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_COMPUTE_BIT))
|
||||
continue; // We need compute
|
||||
if (queue_family_properties[i].timestampValidBits == 0)
|
||||
continue; // We need timing
|
||||
const bool queue_is_used = i == m_graphics_queue_family_index || i == m_present_queue_family_index;
|
||||
if (queue_is_used && m_spin_queue_family_index != queue_family_count)
|
||||
continue; // Found a non-graphics queue to use
|
||||
spin_queue_index = 0;
|
||||
m_spin_queue_family_index = i;
|
||||
if (queue_is_used && queue_family_properties[i].queueCount > 1)
|
||||
spin_queue_index = 1;
|
||||
if (!(queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
|
||||
break; // Async compute queue, definitely pick this one
|
||||
}
|
||||
if (m_graphics_queue_family_index == queue_family_count)
|
||||
{
|
||||
Console.Error("Vulkan: Failed to find an acceptable graphics queue.");
|
||||
|
@ -548,9 +576,11 @@ namespace Vulkan
|
|||
device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
device_info.pNext = nullptr;
|
||||
device_info.flags = 0;
|
||||
device_info.queueCreateInfoCount = 0;
|
||||
|
||||
static constexpr float queue_priorities[] = {1.0f};
|
||||
VkDeviceQueueCreateInfo graphics_queue_info = {};
|
||||
static constexpr float queue_priorities[] = {1.0f, 0.0f}; // Low priority for the spin queue
|
||||
std::array<VkDeviceQueueCreateInfo, 3> queue_infos;
|
||||
VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
|
||||
graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
graphics_queue_info.pNext = nullptr;
|
||||
graphics_queue_info.flags = 0;
|
||||
|
@ -558,24 +588,38 @@ namespace Vulkan
|
|||
graphics_queue_info.queueCount = 1;
|
||||
graphics_queue_info.pQueuePriorities = queue_priorities;
|
||||
|
||||
VkDeviceQueueCreateInfo present_queue_info = {};
|
||||
present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
present_queue_info.pNext = nullptr;
|
||||
present_queue_info.flags = 0;
|
||||
present_queue_info.queueFamilyIndex = m_present_queue_family_index;
|
||||
present_queue_info.queueCount = 1;
|
||||
present_queue_info.pQueuePriorities = queue_priorities;
|
||||
|
||||
std::array<VkDeviceQueueCreateInfo, 2> queue_infos = {{
|
||||
graphics_queue_info,
|
||||
present_queue_info,
|
||||
}};
|
||||
|
||||
device_info.queueCreateInfoCount = 1;
|
||||
if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
|
||||
{
|
||||
device_info.queueCreateInfoCount = 2;
|
||||
VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++];
|
||||
present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
present_queue_info.pNext = nullptr;
|
||||
present_queue_info.flags = 0;
|
||||
present_queue_info.queueFamilyIndex = m_present_queue_family_index;
|
||||
present_queue_info.queueCount = 1;
|
||||
present_queue_info.pQueuePriorities = queue_priorities;
|
||||
}
|
||||
|
||||
if (m_spin_queue_family_index == m_graphics_queue_family_index)
|
||||
{
|
||||
if (spin_queue_index != 0)
|
||||
graphics_queue_info.queueCount = 2;
|
||||
}
|
||||
else if (m_spin_queue_family_index == m_present_queue_family_index)
|
||||
{
|
||||
if (spin_queue_index != 0)
|
||||
queue_infos[1].queueCount = 2; // present queue
|
||||
}
|
||||
else
|
||||
{
|
||||
VkDeviceQueueCreateInfo& spin_queue_info = queue_infos[device_info.queueCreateInfoCount++];
|
||||
spin_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
spin_queue_info.pNext = nullptr;
|
||||
spin_queue_info.flags = 0;
|
||||
spin_queue_info.queueFamilyIndex = m_spin_queue_family_index;
|
||||
spin_queue_info.queueCount = 1;
|
||||
spin_queue_info.pQueuePriorities = queue_priorities + 1;
|
||||
}
|
||||
|
||||
device_info.pQueueCreateInfos = queue_infos.data();
|
||||
|
||||
ExtensionList enabled_extensions;
|
||||
|
@ -637,6 +681,10 @@ namespace Vulkan
|
|||
{
|
||||
vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
|
||||
}
|
||||
m_spinning_supported = m_spin_queue_family_index != queue_family_count &&
|
||||
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
|
||||
m_device_properties.limits.timestampPeriod > 0;
|
||||
m_spin_queue_is_graphics_queue = m_spin_queue_family_index == m_graphics_queue_family_index && spin_queue_index == 0;
|
||||
|
||||
m_gpu_timing_supported = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
|
||||
queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
|
||||
|
@ -648,6 +696,23 @@ namespace Vulkan
|
|||
m_device_properties.limits.timestampPeriod);
|
||||
|
||||
ProcessDeviceExtensions();
|
||||
|
||||
if (m_spinning_supported)
|
||||
{
|
||||
vkGetDeviceQueue(m_device, m_spin_queue_family_index, spin_queue_index, &m_spin_queue);
|
||||
|
||||
m_spin_timestamp_scale = m_device_properties.limits.timestampPeriod;
|
||||
if (m_optional_extensions.vk_ext_calibrated_timestamps)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER Freq;
|
||||
QueryPerformanceFrequency(&Freq);
|
||||
m_queryperfcounter_to_ns = 1000000000.0 / static_cast < double > (Freq.QuadPart);
|
||||
#endif
|
||||
CalibrateSpinTimestamp();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -868,6 +933,7 @@ namespace Vulkan
|
|||
VkDescriptorPoolSize pool_sizes[] = {
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1024},
|
||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1024},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1},
|
||||
};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr,
|
||||
|
@ -886,7 +952,7 @@ namespace Vulkan
|
|||
if (m_gpu_timing_supported)
|
||||
{
|
||||
const VkQueryPoolCreateInfo query_create_info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr,
|
||||
0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 2, 0};
|
||||
0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0};
|
||||
res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
|
@ -1024,6 +1090,14 @@ namespace Vulkan
|
|||
CommandBufferCompleted(check_index);
|
||||
m_completed_fence_counter = resources.fence_counter;
|
||||
}
|
||||
for (SpinResources& resources : m_spin_resources)
|
||||
{
|
||||
if (!resources.in_progress)
|
||||
continue;
|
||||
if (vkGetFenceStatus(m_device, resources.fence) != VK_SUCCESS)
|
||||
continue;
|
||||
SpinCommandCompleted(&resources - &m_spin_resources[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void Context::WaitForCommandBufferCompletion(u32 index)
|
||||
|
@ -1070,7 +1144,8 @@ namespace Vulkan
|
|||
}
|
||||
}
|
||||
|
||||
if (m_gpu_timing_enabled && resources.timestamp_written)
|
||||
bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
|
||||
if (wants_timestamp && resources.timestamp_written)
|
||||
{
|
||||
vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, m_current_frame * 2 + 1);
|
||||
}
|
||||
|
@ -1085,12 +1160,48 @@ namespace Vulkan
|
|||
// This command buffer now has commands, so can't be re-used without waiting.
|
||||
resources.needs_fence_wait = true;
|
||||
|
||||
u32 spin_cycles = 0;
|
||||
const bool spin_enabled = m_spin_timer;
|
||||
if (spin_enabled)
|
||||
{
|
||||
ScanForCommandBufferCompletion();
|
||||
auto draw = m_spin_manager.DrawSubmitted(m_command_buffer_render_passes);
|
||||
u32 constant_offset = 400000 * m_spin_manager.SpinsPerUnitTime(); // 400µs, just to be safe since going over gets really bad
|
||||
if (m_optional_extensions.vk_ext_calibrated_timestamps)
|
||||
constant_offset /= 2; // Safety factor isn't as important here, going over just hurts this one submission a bit
|
||||
u32 minimum_spin = 200000 * m_spin_manager.SpinsPerUnitTime();
|
||||
u32 maximum_spin = std::max<u32>(1024, 16000000 * m_spin_manager.SpinsPerUnitTime()); // 16ms
|
||||
if (draw.recommended_spin > minimum_spin + constant_offset)
|
||||
spin_cycles = std::min(draw.recommended_spin - constant_offset, maximum_spin);
|
||||
resources.spin_id = draw.id;
|
||||
}
|
||||
else
|
||||
{
|
||||
resources.spin_id = -1;
|
||||
}
|
||||
m_command_buffer_render_passes = 0;
|
||||
|
||||
if (present_swap_chain != VK_NULL_HANDLE && m_spinning_supported)
|
||||
{
|
||||
m_spin_manager.NextFrame();
|
||||
if (m_spin_timer)
|
||||
m_spin_timer--;
|
||||
// Calibrate a max of once per frame
|
||||
m_wants_new_timestamp_calibration = m_optional_extensions.vk_ext_calibrated_timestamps;
|
||||
}
|
||||
|
||||
if (spin_cycles != 0)
|
||||
WaitForSpinCompletion(m_current_frame);
|
||||
|
||||
std::unique_lock<std::mutex> lock(m_present_mutex);
|
||||
WaitForPresentComplete(lock);
|
||||
|
||||
if (spin_enabled && m_optional_extensions.vk_ext_calibrated_timestamps)
|
||||
resources.submit_timestamp = GetCPUTimestamp();
|
||||
|
||||
if (!submit_on_thread || !m_present_thread.joinable())
|
||||
{
|
||||
DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore);
|
||||
DoSubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore, spin_cycles);
|
||||
if (present_swap_chain != VK_NULL_HANDLE)
|
||||
DoPresent(signal_semaphore, present_swap_chain, present_image_index);
|
||||
return;
|
||||
|
@ -1101,30 +1212,45 @@ namespace Vulkan
|
|||
m_queued_present.present_image_index = present_image_index;
|
||||
m_queued_present.wait_semaphore = wait_semaphore;
|
||||
m_queued_present.signal_semaphore = signal_semaphore;
|
||||
m_queued_present.spin_cycles = spin_cycles;
|
||||
m_present_done.store(false);
|
||||
m_present_queued_cv.notify_one();
|
||||
}
|
||||
|
||||
void Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore)
|
||||
void Context::DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, u32 spin_cycles)
|
||||
{
|
||||
FrameResources& resources = m_frame_resources[index];
|
||||
|
||||
uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, &wait_bits,
|
||||
resources.init_buffer_used ? 2u : 1u,
|
||||
resources.init_buffer_used ? resources.command_buffers.data() : &resources.command_buffers[1], 0, nullptr};
|
||||
VkSemaphore semas[2];
|
||||
VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
submit_info.commandBufferCount = resources.init_buffer_used ? 2u : 1u;
|
||||
submit_info.pCommandBuffers = resources.init_buffer_used ? resources.command_buffers.data() : &resources.command_buffers[1];
|
||||
|
||||
if (wait_semaphore != VK_NULL_HANDLE)
|
||||
{
|
||||
submit_info.pWaitSemaphores = &wait_semaphore;
|
||||
submit_info.waitSemaphoreCount = 1;
|
||||
submit_info.pWaitDstStageMask = &wait_bits;
|
||||
}
|
||||
|
||||
if (signal_semaphore != VK_NULL_HANDLE)
|
||||
if (signal_semaphore != VK_NULL_HANDLE && spin_cycles != 0)
|
||||
{
|
||||
semas[0] = signal_semaphore;
|
||||
semas[1] = m_spin_resources[index].semaphore;
|
||||
submit_info.signalSemaphoreCount = 2;
|
||||
submit_info.pSignalSemaphores = semas;
|
||||
}
|
||||
else if (signal_semaphore != VK_NULL_HANDLE)
|
||||
{
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &signal_semaphore;
|
||||
}
|
||||
else if (spin_cycles != 0)
|
||||
{
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &m_spin_resources[index].semaphore;
|
||||
}
|
||||
|
||||
VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
|
||||
if (res != VK_SUCCESS)
|
||||
|
@ -1132,6 +1258,9 @@ namespace Vulkan
|
|||
LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
|
||||
pxFailRel("Failed to submit command buffer.");
|
||||
}
|
||||
|
||||
if (spin_cycles != 0)
|
||||
SubmitSpinCommand(index, spin_cycles);
|
||||
}
|
||||
|
||||
void Context::DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index)
|
||||
|
@ -1180,7 +1309,7 @@ namespace Vulkan
|
|||
continue;
|
||||
|
||||
DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.wait_semaphore,
|
||||
m_queued_present.signal_semaphore);
|
||||
m_queued_present.signal_semaphore, m_queued_present.spin_cycles);
|
||||
DoPresent(m_queued_present.signal_semaphore, m_queued_present.present_swap_chain,
|
||||
m_queued_present.present_image_index);
|
||||
m_present_done.store(true);
|
||||
|
@ -1218,7 +1347,9 @@ namespace Vulkan
|
|||
it();
|
||||
resources.cleanup_resources.clear();
|
||||
|
||||
if (m_gpu_timing_enabled && resources.timestamp_written)
|
||||
bool wants_timestamps = m_gpu_timing_enabled || resources.spin_id >= 0;
|
||||
|
||||
if (wants_timestamps && resources.timestamp_written)
|
||||
{
|
||||
std::array<u64, 2> timestamps;
|
||||
VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast<u32>(timestamps.size()),
|
||||
|
@ -1226,11 +1357,25 @@ namespace Vulkan
|
|||
if (res == VK_SUCCESS)
|
||||
{
|
||||
// if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be zero
|
||||
if (timestamps[0] > 0)
|
||||
if (timestamps[0] > 0 && m_gpu_timing_enabled)
|
||||
{
|
||||
const double ns_diff = (timestamps[1] - timestamps[0]) * static_cast<double>(m_device_properties.limits.timestampPeriod);
|
||||
m_accumulated_gpu_time += ns_diff / 1000000.0;
|
||||
}
|
||||
if (resources.spin_id >= 0)
|
||||
{
|
||||
if (m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[1] > 0)
|
||||
{
|
||||
u64 end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
|
||||
m_spin_manager.DrawCompleted(resources.spin_id, resources.submit_timestamp, end);
|
||||
}
|
||||
else if (!m_optional_extensions.vk_ext_calibrated_timestamps && timestamps[0] > 0)
|
||||
{
|
||||
u64 begin = timestamps[0] * m_spin_timestamp_scale;
|
||||
u64 end = timestamps[1] * m_spin_timestamp_scale;
|
||||
m_spin_manager.DrawCompleted(resources.spin_id, begin, end);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1274,7 +1419,8 @@ namespace Vulkan
|
|||
if (res != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: ");
|
||||
|
||||
if (m_gpu_timing_enabled)
|
||||
bool wants_timestamp = m_gpu_timing_enabled || m_spin_timer;
|
||||
if (wants_timestamp)
|
||||
{
|
||||
vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2);
|
||||
vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, index * 2);
|
||||
|
@ -1282,7 +1428,7 @@ namespace Vulkan
|
|||
|
||||
resources.fence_counter = m_next_fence_counter++;
|
||||
resources.init_buffer_used = false;
|
||||
resources.timestamp_written = m_gpu_timing_enabled;
|
||||
resources.timestamp_written = wants_timestamp;
|
||||
|
||||
m_current_frame = index;
|
||||
m_current_command_buffer = resources.command_buffers[1];
|
||||
|
@ -1299,7 +1445,12 @@ namespace Vulkan
|
|||
MoveToNextCommandBuffer();
|
||||
|
||||
if (wait_for_completion)
|
||||
{
|
||||
// Calibrate while we wait
|
||||
if (m_wants_new_timestamp_calibration)
|
||||
CalibrateSpinTimestamp();
|
||||
WaitForCommandBufferCompletion(current_frame);
|
||||
}
|
||||
}
|
||||
|
||||
bool Context::CheckLastPresentFail()
|
||||
|
@ -1529,4 +1680,350 @@ namespace Vulkan
|
|||
|
||||
m_render_pass_cache.clear();
|
||||
}
|
||||
|
||||
static constexpr std::string_view SPIN_SHADER = R"(
|
||||
#version 460 core
|
||||
|
||||
layout(std430, set=0, binding=0) buffer SpinBuffer { uint spin[]; };
|
||||
layout(push_constant) uniform constants { uint cycles; };
|
||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uint value = spin[0];
|
||||
// The compiler doesn't know, but spin[0] == 0, so this loop won't actually go anywhere
|
||||
for (uint i = 0; i < cycles; i++)
|
||||
value = spin[value];
|
||||
// Store the result back to the buffer so the compiler can't optimize it away
|
||||
spin[0] = value;
|
||||
}
|
||||
)";
|
||||
|
||||
bool Context::InitSpinResources()
|
||||
{
|
||||
if (!m_spinning_supported)
|
||||
return true;
|
||||
auto spirv = ShaderCompiler::CompileComputeShader(SPIN_SHADER);
|
||||
if (!spirv.has_value())
|
||||
return false;
|
||||
|
||||
VkResult res;
|
||||
#define CHECKED_CREATE(create_fn, create_struct, output_struct) \
|
||||
do { \
|
||||
if ((res = create_fn(m_device, create_struct, nullptr, output_struct)) != VK_SUCCESS) \
|
||||
{ \
|
||||
LOG_VULKAN_ERROR(res, #create_fn " failed: "); \
|
||||
return false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
VkDescriptorSetLayoutBinding set_layout_binding = {};
|
||||
set_layout_binding.binding = 0;
|
||||
set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
set_layout_binding.descriptorCount = 1;
|
||||
set_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
VkDescriptorSetLayoutCreateInfo desc_set_layout_create = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };
|
||||
desc_set_layout_create.bindingCount = 1;
|
||||
desc_set_layout_create.pBindings = &set_layout_binding;
|
||||
CHECKED_CREATE(vkCreateDescriptorSetLayout, &desc_set_layout_create, &m_spin_descriptor_set_layout);
|
||||
|
||||
const VkPushConstantRange push_constant_range = { VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32) };
|
||||
VkPipelineLayoutCreateInfo pl_layout_create = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };
|
||||
pl_layout_create.setLayoutCount = 1;
|
||||
pl_layout_create.pSetLayouts = &m_spin_descriptor_set_layout;
|
||||
pl_layout_create.pushConstantRangeCount = 1;
|
||||
pl_layout_create.pPushConstantRanges = &push_constant_range;
|
||||
CHECKED_CREATE(vkCreatePipelineLayout, &pl_layout_create, &m_spin_pipeline_layout);
|
||||
|
||||
VkShaderModuleCreateInfo module_create = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO };
|
||||
module_create.codeSize = spirv->size() * sizeof(ShaderCompiler::SPIRVCodeType);
|
||||
module_create.pCode = spirv->data();
|
||||
VkShaderModule shader_module;
|
||||
CHECKED_CREATE(vkCreateShaderModule, &module_create, &shader_module);
|
||||
Util::SetObjectName(m_device, shader_module, "Spin Shader");
|
||||
|
||||
VkComputePipelineCreateInfo pl_create = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO };
|
||||
pl_create.layout = m_spin_pipeline_layout;
|
||||
pl_create.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
pl_create.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
pl_create.stage.pName = "main";
|
||||
pl_create.stage.module = shader_module;
|
||||
res = vkCreateComputePipelines(m_device, VK_NULL_HANDLE, 1, &pl_create, nullptr, &m_spin_pipeline);
|
||||
vkDestroyShaderModule(m_device, shader_module, nullptr);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
|
||||
return false;
|
||||
}
|
||||
Util::SetObjectName(m_device, m_spin_pipeline, "Spin Pipeline");
|
||||
|
||||
VmaAllocationCreateInfo buf_vma_create = {};
|
||||
buf_vma_create.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||
VkBufferCreateInfo buf_create = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
|
||||
buf_create.size = 4;
|
||||
buf_create.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||
if ((res = vmaCreateBuffer(m_allocator, &buf_create, &buf_vma_create, &m_spin_buffer, &m_spin_buffer_allocation, nullptr)) != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vmaCreateBuffer failed: ");
|
||||
return false;
|
||||
}
|
||||
Util::SetObjectName(m_device, m_spin_buffer, "Spin Buffer");
|
||||
|
||||
VkDescriptorSetAllocateInfo desc_set_allocate = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO };
|
||||
desc_set_allocate.descriptorPool = m_global_descriptor_pool;
|
||||
desc_set_allocate.descriptorSetCount = 1;
|
||||
desc_set_allocate.pSetLayouts = &m_spin_descriptor_set_layout;
|
||||
if ((res = vkAllocateDescriptorSets(m_device, &desc_set_allocate, &m_spin_descriptor_set)) != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkAllocateDescriptorSets failed: ");
|
||||
return false;
|
||||
}
|
||||
const VkDescriptorBufferInfo desc_buffer_info = { m_spin_buffer, 0, VK_WHOLE_SIZE };
|
||||
VkWriteDescriptorSet desc_set_write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET };
|
||||
desc_set_write.dstSet = m_spin_descriptor_set;
|
||||
desc_set_write.dstBinding = 0;
|
||||
desc_set_write.descriptorCount = 1;
|
||||
desc_set_write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
desc_set_write.pBufferInfo = &desc_buffer_info;
|
||||
vkUpdateDescriptorSets(m_device, 1, &desc_set_write, 0, nullptr);
|
||||
|
||||
for (SpinResources& resources : m_spin_resources)
|
||||
{
|
||||
u32 index = &resources - &m_spin_resources[0];
|
||||
VkCommandPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO };
|
||||
pool_info.queueFamilyIndex = m_spin_queue_family_index;
|
||||
CHECKED_CREATE(vkCreateCommandPool, &pool_info, &resources.command_pool);
|
||||
Vulkan::Util::SetObjectName(m_device, resources.command_pool, "Spin Command Pool %u", index);
|
||||
|
||||
VkCommandBufferAllocateInfo buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO };
|
||||
buffer_info.commandPool = resources.command_pool;
|
||||
buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
buffer_info.commandBufferCount = 1;
|
||||
res = vkAllocateCommandBuffers(m_device, &buffer_info, &resources.command_buffer);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
|
||||
return false;
|
||||
}
|
||||
Vulkan::Util::SetObjectName(m_device, resources.command_buffer, "Spin Command Buffer %u", index);
|
||||
|
||||
VkFenceCreateInfo fence_info = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
|
||||
fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
||||
CHECKED_CREATE(vkCreateFence, &fence_info, &resources.fence);
|
||||
Vulkan::Util::SetObjectName(m_device, resources.fence, "Spin Fence %u", index);
|
||||
|
||||
if (!m_spin_queue_is_graphics_queue)
|
||||
{
|
||||
VkSemaphoreCreateInfo sem_info = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };
|
||||
CHECKED_CREATE(vkCreateSemaphore, &sem_info, &resources.semaphore);
|
||||
Vulkan::Util::SetObjectName(m_device, resources.semaphore, "Draw to Spin Semaphore %u", index);
|
||||
}
|
||||
}
|
||||
|
||||
#undef CHECKED_CREATE
|
||||
return true;
|
||||
}
|
||||
|
||||
void Context::DestroySpinResources()
|
||||
{
|
||||
#define CHECKED_DESTROY(destructor, obj) \
|
||||
do { \
|
||||
if (obj != VK_NULL_HANDLE) \
|
||||
{ \
|
||||
destructor(m_device, obj, nullptr); \
|
||||
obj = VK_NULL_HANDLE; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
if (m_spin_buffer)
|
||||
{
|
||||
vmaDestroyBuffer(m_allocator, m_spin_buffer, m_spin_buffer_allocation);
|
||||
m_spin_buffer = VK_NULL_HANDLE;
|
||||
m_spin_buffer_allocation = VK_NULL_HANDLE;
|
||||
}
|
||||
CHECKED_DESTROY(vkDestroyPipeline, m_spin_pipeline);
|
||||
CHECKED_DESTROY(vkDestroyPipelineLayout, m_spin_pipeline_layout);
|
||||
CHECKED_DESTROY(vkDestroyDescriptorSetLayout, m_spin_descriptor_set_layout);
|
||||
if (m_spin_descriptor_set != VK_NULL_HANDLE)
|
||||
{
|
||||
vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &m_spin_descriptor_set);
|
||||
m_spin_descriptor_set = VK_NULL_HANDLE;
|
||||
}
|
||||
for (SpinResources& resources : m_spin_resources)
|
||||
{
|
||||
CHECKED_DESTROY(vkDestroySemaphore, resources.semaphore);
|
||||
CHECKED_DESTROY(vkDestroyFence, resources.fence);
|
||||
if (resources.command_buffer != VK_NULL_HANDLE)
|
||||
{
|
||||
vkFreeCommandBuffers(m_device, resources.command_pool, 1, &resources.command_buffer);
|
||||
resources.command_buffer = VK_NULL_HANDLE;
|
||||
}
|
||||
CHECKED_DESTROY(vkDestroyCommandPool, resources.command_pool);
|
||||
}
|
||||
#undef CHECKED_DESTROY
|
||||
}
|
||||
|
||||
void Context::WaitForSpinCompletion(u32 index)
|
||||
{
|
||||
SpinResources& resources = m_spin_resources[index];
|
||||
if (!resources.in_progress)
|
||||
return;
|
||||
VkResult res = vkWaitForFences(m_device, 1, &resources.fence, VK_TRUE, UINT64_MAX);
|
||||
if (res != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkWaitForFences failed: ");
|
||||
SpinCommandCompleted(index);
|
||||
}
|
||||
|
||||
void Context::SpinCommandCompleted(u32 index)
|
||||
{
|
||||
SpinResources& resources = m_spin_resources[index];
|
||||
resources.in_progress = false;
|
||||
const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
|
||||
std::array<u64, 2> timestamps;
|
||||
VkResult res = vkGetQueryPoolResults(m_device, m_timestamp_query_pool, timestamp_base, static_cast<u32>(timestamps.size()),
|
||||
sizeof(timestamps), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
|
||||
if (res == VK_SUCCESS)
|
||||
{
|
||||
u64 begin, end;
|
||||
if (m_optional_extensions.vk_ext_calibrated_timestamps)
|
||||
{
|
||||
begin = timestamps[0] * m_spin_timestamp_scale + m_spin_timestamp_offset;
|
||||
end = timestamps[1] * m_spin_timestamp_scale + m_spin_timestamp_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
begin = timestamps[0] * m_spin_timestamp_scale;
|
||||
end = timestamps[1] * m_spin_timestamp_scale;
|
||||
}
|
||||
m_spin_manager.SpinCompleted(resources.cycles, begin, end);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
|
||||
}
|
||||
}
|
||||
|
||||
void Context::SubmitSpinCommand(u32 index, u32 cycles)
|
||||
{
|
||||
SpinResources& resources = m_spin_resources[index];
|
||||
VkResult res;
|
||||
|
||||
// Reset fence to unsignaled before starting.
|
||||
if ((res = vkResetFences(m_device, 1, &resources.fence)) != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkResetFences failed: ");
|
||||
|
||||
// Reset command pools to beginning since we can re-use the memory now
|
||||
if ((res = vkResetCommandPool(m_device, resources.command_pool, 0)) != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");
|
||||
|
||||
// Enable commands to be recorded to the two buffers again.
|
||||
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
if ((res = vkBeginCommandBuffer(resources.command_buffer, &begin_info)) != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");
|
||||
|
||||
if (!m_spin_buffer_initialized)
|
||||
{
|
||||
m_spin_buffer_initialized = true;
|
||||
vkCmdFillBuffer(resources.command_buffer, m_spin_buffer, 0, VK_WHOLE_SIZE, 0);
|
||||
VkBufferMemoryBarrier barrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER };
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = m_spin_queue_family_index;
|
||||
barrier.dstQueueFamilyIndex = m_spin_queue_family_index;
|
||||
barrier.buffer = m_spin_buffer;
|
||||
barrier.offset = 0;
|
||||
barrier.size = VK_WHOLE_SIZE;
|
||||
vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr);
|
||||
}
|
||||
|
||||
if (m_spin_queue_is_graphics_queue)
|
||||
vkCmdPipelineBarrier(resources.command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 0, nullptr);
|
||||
|
||||
const u32 timestamp_base = (index + NUM_COMMAND_BUFFERS) * 2;
|
||||
vkCmdResetQueryPool(resources.command_buffer, m_timestamp_query_pool, timestamp_base, 2);
|
||||
vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, timestamp_base);
|
||||
vkCmdPushConstants(resources.command_buffer, m_spin_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(u32), &cycles);
|
||||
vkCmdBindPipeline(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline);
|
||||
vkCmdBindDescriptorSets(resources.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_spin_pipeline_layout, 0, 1, &m_spin_descriptor_set, 0, nullptr);
|
||||
vkCmdDispatch(resources.command_buffer, 1, 1, 1);
|
||||
vkCmdWriteTimestamp(resources.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, m_timestamp_query_pool, timestamp_base + 1);
|
||||
|
||||
if ((res = vkEndCommandBuffer(resources.command_buffer)) != VK_SUCCESS)
|
||||
LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
|
||||
|
||||
VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO };
|
||||
submit_info.commandBufferCount = 1;
|
||||
submit_info.pCommandBuffers = &resources.command_buffer;
|
||||
VkPipelineStageFlags sema_waits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
|
||||
if (!m_spin_queue_is_graphics_queue)
|
||||
{
|
||||
submit_info.waitSemaphoreCount = 1;
|
||||
submit_info.pWaitSemaphores = &resources.semaphore;
|
||||
submit_info.pWaitDstStageMask = sema_waits;
|
||||
}
|
||||
vkQueueSubmit(m_spin_queue, 1, &submit_info, resources.fence);
|
||||
resources.in_progress = true;
|
||||
resources.cycles = cycles;
|
||||
}
|
||||
|
||||
void Context::NotifyOfReadback()
|
||||
{
|
||||
if (!m_spinning_supported)
|
||||
return;
|
||||
m_spin_timer = 30;
|
||||
m_spin_manager.ReadbackRequested();
|
||||
}
|
||||
|
||||
void Context::CalibrateSpinTimestamp()
|
||||
{
|
||||
if (!m_optional_extensions.vk_ext_calibrated_timestamps)
|
||||
return;
|
||||
VkCalibratedTimestampInfoEXT infos[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_calibrated_timestamp_type },
|
||||
};
|
||||
u64 timestamps[2];
|
||||
u64 maxDeviation;
|
||||
constexpr u64 MAX_MAX_DEVIATION = 100000; // 100µs
|
||||
for (int i = 0; i < 4; i++) // 4 tries to get under MAX_MAX_DEVIATION
|
||||
{
|
||||
VkResult res = vkGetCalibratedTimestampsEXT(m_device, std::size(infos), infos, timestamps, &maxDeviation);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkGetCalibratedTimestampsEXT failed: ");
|
||||
return;
|
||||
}
|
||||
if (maxDeviation < MAX_MAX_DEVIATION)
|
||||
break;
|
||||
}
|
||||
if (maxDeviation >= MAX_MAX_DEVIATION)
|
||||
Console.Warning("vkGetCalibratedTimestampsEXT returned high max deviation of %lluµs", maxDeviation / 1000);
|
||||
const double gpu_time = timestamps[0] * m_spin_timestamp_scale;
|
||||
#ifdef _WIN32
|
||||
const double cpu_time = timestamps[1] * m_queryperfcounter_to_ns;
|
||||
#else
|
||||
const double cpu_time = timestamps[1];
|
||||
#endif
|
||||
m_spin_timestamp_offset = cpu_time - gpu_time;
|
||||
}
|
||||
|
||||
u64 Context::GetCPUTimestamp()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER value = {};
|
||||
QueryPerformanceCounter(&value);
|
||||
return static_cast<u64>(static_cast<double>(value.QuadPart) * m_queryperfcounter_to_ns);
|
||||
#else
|
||||
#ifdef CLOCK_MONOTONIC_RAW
|
||||
const bool use_raw = m_calibrated_timestamp_type == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
const clockid_t clock = use_raw ? CLOCK_MONOTONIC_RAW : CLOCK_MONOTONIC;
|
||||
#else
|
||||
const clockid_t clock = CLOCK_MONOTONIC;
|
||||
#endif
|
||||
timespec ts = {};
|
||||
clock_gettime(clock, &ts);
|
||||
return static_cast<u64>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "common/Pcsx2Defs.h"
|
||||
|
||||
#include "common/ReadbackSpinManager.h"
|
||||
#include "common/Vulkan/Loader.h"
|
||||
#include "common/Vulkan/StreamBuffer.h"
|
||||
|
||||
|
@ -225,6 +226,9 @@ namespace Vulkan
|
|||
float GetAndResetAccumulatedGPUTime();
|
||||
bool SetEnableGPUTiming(bool enabled);
|
||||
|
||||
void CountRenderPass() { m_command_buffer_render_passes++; }
|
||||
void NotifyOfReadback();
|
||||
|
||||
private:
|
||||
Context(VkInstance instance, VkPhysicalDevice physical_device);
|
||||
|
||||
|
@ -272,13 +276,21 @@ namespace Vulkan
|
|||
void ScanForCommandBufferCompletion();
|
||||
void WaitForCommandBufferCompletion(u32 index);
|
||||
|
||||
void DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore);
|
||||
void DoSubmitCommandBuffer(u32 index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, u32 spin_cycles);
|
||||
void DoPresent(VkSemaphore wait_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index);
|
||||
void WaitForPresentComplete(std::unique_lock<std::mutex>& lock);
|
||||
void PresentThread();
|
||||
void StartPresentThread();
|
||||
void StopPresentThread();
|
||||
|
||||
bool InitSpinResources();
|
||||
void DestroySpinResources();
|
||||
void WaitForSpinCompletion(u32 index);
|
||||
void SpinCommandCompleted(u32 index);
|
||||
void SubmitSpinCommand(u32 index, u32 cycles);
|
||||
void CalibrateSpinTimestamp();
|
||||
u64 GetCPUTimestamp();
|
||||
|
||||
struct FrameResources
|
||||
{
|
||||
// [0] - Init (upload) command buffer, [1] - draw command buffer
|
||||
|
@ -287,6 +299,8 @@ namespace Vulkan
|
|||
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
|
||||
VkFence fence = VK_NULL_HANDLE;
|
||||
u64 fence_counter = 0;
|
||||
s32 spin_id = -1;
|
||||
u32 submit_timestamp = 0;
|
||||
bool init_buffer_used = false;
|
||||
bool needs_fence_wait = false;
|
||||
bool timestamp_written = false;
|
||||
|
@ -294,6 +308,16 @@ namespace Vulkan
|
|||
std::vector<std::function<void()>> cleanup_resources;
|
||||
};
|
||||
|
||||
struct SpinResources
|
||||
{
|
||||
VkCommandPool command_pool = VK_NULL_HANDLE;
|
||||
VkCommandBuffer command_buffer = VK_NULL_HANDLE;
|
||||
VkSemaphore semaphore = VK_NULL_HANDLE;
|
||||
VkFence fence = VK_NULL_HANDLE;
|
||||
u32 cycles = 0;
|
||||
bool in_progress = false;
|
||||
};
|
||||
|
||||
VkInstance m_instance = VK_NULL_HANDLE;
|
||||
VkPhysicalDevice m_physical_device = VK_NULL_HANDLE;
|
||||
VkDevice m_device = VK_NULL_HANDLE;
|
||||
|
@ -308,10 +332,32 @@ namespace Vulkan
|
|||
u32 m_graphics_queue_family_index = 0;
|
||||
u32 m_present_queue_family_index = 0;
|
||||
|
||||
ReadbackSpinManager m_spin_manager;
|
||||
VkQueue m_spin_queue = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_spin_descriptor_set_layout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_spin_pipeline_layout = VK_NULL_HANDLE;
|
||||
VkPipeline m_spin_pipeline = VK_NULL_HANDLE;
|
||||
VkBuffer m_spin_buffer = VK_NULL_HANDLE;
|
||||
VmaAllocation m_spin_buffer_allocation = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_spin_descriptor_set = VK_NULL_HANDLE;
|
||||
std::array<SpinResources, NUM_COMMAND_BUFFERS> m_spin_resources;
|
||||
#ifdef _WIN32
|
||||
double m_queryperfcounter_to_ns = 0;
|
||||
#endif
|
||||
double m_spin_timestamp_scale = 0;
|
||||
double m_spin_timestamp_offset = 0;
|
||||
u32 m_spin_queue_family_index = 0;
|
||||
u32 m_command_buffer_render_passes = 0;
|
||||
u32 m_spin_timer = 0;
|
||||
bool m_spinning_supported = false;
|
||||
bool m_spin_queue_is_graphics_queue = false;
|
||||
bool m_spin_buffer_initialized = false;
|
||||
|
||||
VkQueryPool m_timestamp_query_pool = VK_NULL_HANDLE;
|
||||
float m_accumulated_gpu_time = 0.0f;
|
||||
bool m_gpu_timing_enabled = false;
|
||||
bool m_gpu_timing_supported = false;
|
||||
bool m_wants_new_timestamp_calibration = false;
|
||||
VkTimeDomainEXT m_calibrated_timestamp_type = VK_TIME_DOMAIN_DEVICE_EXT;
|
||||
|
||||
std::array<FrameResources, NUM_COMMAND_BUFFERS> m_frame_resources;
|
||||
|
@ -336,6 +382,7 @@ namespace Vulkan
|
|||
VkSwapchainKHR present_swap_chain;
|
||||
u32 command_buffer_index;
|
||||
u32 present_image_index;
|
||||
u32 spin_cycles;
|
||||
};
|
||||
|
||||
QueuedPresent m_queued_present = {};
|
||||
|
|
|
@ -462,6 +462,16 @@ bool GSDeviceVK::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTextu
|
|||
}
|
||||
|
||||
ExecuteCommandBuffer(true);
|
||||
if (GSConfig.HWSpinGPUForReadbacks)
|
||||
{
|
||||
g_vulkan_context->NotifyOfReadback();
|
||||
if (!g_vulkan_context->GetOptionalExtensions().vk_ext_calibrated_timestamps && !m_warned_slow_spin)
|
||||
{
|
||||
m_warned_slow_spin = true;
|
||||
Host::AddKeyedOSDMessage("GSDeviceVK_NoCalibratedTimestamps",
|
||||
"Spin GPU During Readbacks is enabled, but calibrated timestamps are unavailable. This might be really slow.", 10.0f);
|
||||
}
|
||||
}
|
||||
|
||||
// invalidate cpu cache before reading
|
||||
VkResult res = vmaInvalidateAllocation(g_vulkan_context->GetAllocator(), m_readback_staging_allocation, 0, size);
|
||||
|
@ -2389,6 +2399,7 @@ void GSDeviceVK::BeginRenderPass(VkRenderPass rp, const GSVector4i& rect)
|
|||
m_current_framebuffer, {{rect.x, rect.y}, {static_cast<u32>(rect.width()), static_cast<u32>(rect.height())}}, 0,
|
||||
nullptr};
|
||||
|
||||
g_vulkan_context->CountRenderPass();
|
||||
vkCmdBeginRenderPass(g_vulkan_context->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE);
|
||||
}
|
||||
|
||||
|
|
|
@ -109,6 +109,7 @@ private:
|
|||
VkBuffer m_readback_staging_buffer = VK_NULL_HANDLE;
|
||||
void* m_readback_staging_buffer_map = nullptr;
|
||||
u32 m_readback_staging_buffer_size = 0;
|
||||
bool m_warned_slow_spin = false;
|
||||
|
||||
VkSampler m_point_sampler = VK_NULL_HANDLE;
|
||||
VkSampler m_linear_sampler = VK_NULL_HANDLE;
|
||||
|
|
Loading…
Reference in New Issue