Merge pull request #8379 from stenzek/mali
Vulkan: Optimizations for Mali with EFB2RAM on
This commit is contained in:
commit
d3ee0a4535
|
@ -75,13 +75,15 @@ const ConfigInfo<bool> GFX_ENABLE_VALIDATION_LAYER{
|
||||||
#if defined(ANDROID)
|
#if defined(ANDROID)
|
||||||
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
||||||
{System::GFX, "Settings", "BackendMultithreading"}, false};
|
{System::GFX, "Settings", "BackendMultithreading"}, false};
|
||||||
|
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
||||||
|
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 0};
|
||||||
#else
|
#else
|
||||||
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
||||||
{System::GFX, "Settings", "BackendMultithreading"}, true};
|
{System::GFX, "Settings", "BackendMultithreading"}, true};
|
||||||
#endif
|
|
||||||
|
|
||||||
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
||||||
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
|
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
|
||||||
|
#endif
|
||||||
|
|
||||||
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
|
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
|
||||||
const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{
|
const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{
|
||||||
{System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};
|
{System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};
|
||||||
|
|
|
@ -172,8 +172,10 @@ bool BoundingBox::CreateGPUBuffer()
|
||||||
VkMemoryRequirements memory_requirements;
|
VkMemoryRequirements memory_requirements;
|
||||||
vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
|
vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
|
||||||
|
|
||||||
uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits,
|
uint32_t memory_type_index = g_vulkan_context
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
->GetMemoryType(memory_requirements.memoryTypeBits,
|
||||||
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false)
|
||||||
|
.value_or(0);
|
||||||
VkMemoryAllocateInfo memory_allocate_info = {
|
VkMemoryAllocateInfo memory_allocate_info = {
|
||||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType
|
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType
|
||||||
nullptr, // const void* pNext
|
nullptr, // const void* pNext
|
||||||
|
|
|
@ -87,8 +87,10 @@ std::unique_ptr<VKTexture> VKTexture::Create(const TextureConfig& tex_config)
|
||||||
|
|
||||||
VkMemoryAllocateInfo memory_info = {
|
VkMemoryAllocateInfo memory_info = {
|
||||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size,
|
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size,
|
||||||
g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits,
|
g_vulkan_context
|
||||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)};
|
->GetMemoryType(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||||
|
false)
|
||||||
|
.value_or(0)};
|
||||||
|
|
||||||
VkDeviceMemory device_memory;
|
VkDeviceMemory device_memory;
|
||||||
res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory);
|
res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory);
|
||||||
|
|
|
@ -710,96 +710,107 @@ void VulkanContext::DisableDebugReports()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index)
|
std::optional<u32> VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties,
|
||||||
|
bool strict, bool* is_coherent)
|
||||||
{
|
{
|
||||||
|
static constexpr u32 ALL_MEMORY_PROPERTY_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||||
|
|
||||||
|
const u32 mask = strict ? ALL_MEMORY_PROPERTY_FLAGS : properties;
|
||||||
|
|
||||||
for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++)
|
for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++)
|
||||||
{
|
{
|
||||||
if ((bits & (1 << i)) != 0)
|
if ((bits & (1 << i)) != 0)
|
||||||
{
|
{
|
||||||
u32 supported = m_device_memory_properties.memoryTypes[i].propertyFlags & properties;
|
const VkMemoryPropertyFlags type_flags =
|
||||||
|
m_device_memory_properties.memoryTypes[i].propertyFlags;
|
||||||
|
const VkMemoryPropertyFlags supported = type_flags & mask;
|
||||||
if (supported == properties)
|
if (supported == properties)
|
||||||
{
|
{
|
||||||
*out_type_index = i;
|
if (is_coherent)
|
||||||
return true;
|
*is_coherent = (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
||||||
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return std::nullopt;
|
||||||
}
|
|
||||||
|
|
||||||
u32 VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties)
|
|
||||||
{
|
|
||||||
u32 type_index = VK_MAX_MEMORY_TYPES;
|
|
||||||
if (!GetMemoryType(bits, properties, &type_index))
|
|
||||||
PanicAlert("Unable to find memory type for %x:%x", bits, properties);
|
|
||||||
|
|
||||||
return type_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent)
|
u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent)
|
||||||
{
|
{
|
||||||
// Try for coherent memory first.
|
static constexpr VkMemoryPropertyFlags COHERENT_FLAGS =
|
||||||
VkMemoryPropertyFlags flags =
|
|
||||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||||
|
|
||||||
u32 type_index;
|
// Try for coherent memory. Some drivers (looking at you, Adreno) have the cached type before the
|
||||||
if (!GetMemoryType(bits, flags, &type_index))
|
// uncached type, so use a strict check first.
|
||||||
{
|
std::optional<u32> type_index = GetMemoryType(bits, COHERENT_FLAGS, true, is_coherent);
|
||||||
WARN_LOG(
|
if (type_index)
|
||||||
VIDEO,
|
return type_index.value();
|
||||||
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
|
|
||||||
|
|
||||||
// Try non-coherent memory.
|
// Try for coherent memory, with any other bits set.
|
||||||
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
type_index = GetMemoryType(bits, COHERENT_FLAGS, false, is_coherent);
|
||||||
if (!GetMemoryType(bits, flags, &type_index))
|
if (type_index)
|
||||||
{
|
{
|
||||||
// We shouldn't have any memory types that aren't host-visible.
|
WARN_LOG(VIDEO,
|
||||||
PanicAlert("Unable to get memory type for upload.");
|
"Strict check for upload memory properties failed, this may affect performance");
|
||||||
type_index = 0;
|
return type_index.value();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_coherent)
|
// Fall back to non-coherent memory.
|
||||||
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0);
|
WARN_LOG(
|
||||||
|
VIDEO,
|
||||||
|
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
|
||||||
|
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
|
||||||
|
if (type_index)
|
||||||
|
return type_index.value();
|
||||||
|
|
||||||
return type_index;
|
// Shouldn't happen, there should be at least one host-visible heap.
|
||||||
|
PanicAlert("Unable to get memory type for upload.");
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent, bool* is_cached)
|
u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent)
|
||||||
{
|
{
|
||||||
// Try for cached and coherent memory first.
|
std::optional<u32> type_index;
|
||||||
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
||||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
|
||||||
|
|
||||||
u32 type_index;
|
// Mali driver appears to be significantly slower for readbacks when using cached memory.
|
||||||
if (!GetMemoryType(bits, flags, &type_index))
|
if (DriverDetails::HasBug(DriverDetails::BUG_SLOW_CACHED_READBACK_MEMORY))
|
||||||
{
|
{
|
||||||
// For readbacks, caching is more important than coherency.
|
type_index = GetMemoryType(
|
||||||
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true,
|
||||||
if (!GetMemoryType(bits, flags, &type_index))
|
is_coherent);
|
||||||
{
|
if (type_index)
|
||||||
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect "
|
return type_index.value();
|
||||||
"performance.");
|
|
||||||
|
|
||||||
// Remove the cached bit as well.
|
|
||||||
flags &= ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
|
||||||
if (!GetMemoryType(bits, flags, &type_index))
|
|
||||||
{
|
|
||||||
// We shouldn't have any memory types that aren't host-visible.
|
|
||||||
PanicAlert("Unable to get memory type for upload.");
|
|
||||||
type_index = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_coherent)
|
// Optimal config uses cached+coherent.
|
||||||
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0);
|
type_index =
|
||||||
if (is_cached)
|
GetMemoryType(bits,
|
||||||
*is_cached = ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0);
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||||
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||||
|
true, is_coherent);
|
||||||
|
if (type_index)
|
||||||
|
return type_index.value();
|
||||||
|
|
||||||
return type_index;
|
// Otherwise, prefer cached over coherent if we must choose one.
|
||||||
|
type_index =
|
||||||
|
GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||||
|
false, is_coherent);
|
||||||
|
if (type_index)
|
||||||
|
return type_index.value();
|
||||||
|
|
||||||
|
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect "
|
||||||
|
"performance.");
|
||||||
|
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
|
||||||
|
*is_coherent = false;
|
||||||
|
if (type_index)
|
||||||
|
return type_index.value();
|
||||||
|
|
||||||
|
// We should have at least one host visible memory type...
|
||||||
|
PanicAlert("Unable to get memory type for upload.");
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VulkanContext::InitDriverDetails()
|
void VulkanContext::InitDriverDetails()
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
@ -99,12 +100,12 @@ public:
|
||||||
float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; }
|
float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; }
|
||||||
// Finds a memory type index for the specified memory properties and the bits returned by
|
// Finds a memory type index for the specified memory properties and the bits returned by
|
||||||
// vkGetImageMemoryRequirements
|
// vkGetImageMemoryRequirements
|
||||||
bool GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index);
|
std::optional<u32> GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, bool strict,
|
||||||
u32 GetMemoryType(u32 bits, VkMemoryPropertyFlags properties);
|
bool* is_coherent = nullptr);
|
||||||
|
|
||||||
// Finds a memory type for upload or readback buffers.
|
// Finds a memory type for upload or readback buffers.
|
||||||
u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr);
|
u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr);
|
||||||
u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr, bool* is_cached = nullptr);
|
u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using ExtensionList = std::vector<const char*>;
|
using ExtensionList = std::vector<const char*>;
|
||||||
|
|
|
@ -113,7 +113,11 @@ constexpr BugInfo m_known_bugs[] = {
|
||||||
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
|
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
|
||||||
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
|
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
|
||||||
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
|
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
|
||||||
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}};
|
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
|
||||||
|
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY,
|
||||||
|
-1.0, -1.0, true},
|
||||||
|
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
|
||||||
|
BUG_SLOW_CACHED_READBACK_MEMORY, -1.0, -1.0, true}};
|
||||||
|
|
||||||
static std::map<Bug, BugInfo> m_bugs;
|
static std::map<Bug, BugInfo> m_bugs;
|
||||||
|
|
||||||
|
|
|
@ -281,6 +281,11 @@ enum Bug
|
||||||
// The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed,
|
// The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed,
|
||||||
// however the implementation is broken on some drivers.
|
// however the implementation is broken on some drivers.
|
||||||
BUG_BROKEN_REVERSED_DEPTH_RANGE,
|
BUG_BROKEN_REVERSED_DEPTH_RANGE,
|
||||||
|
|
||||||
|
// BUG: Cached memory is significantly slower for readbacks than coherent memory in the
|
||||||
|
// Mali Vulkan driver, causing high CPU usage in the __pi___inval_cache_range kernel
|
||||||
|
// function. This flag causes readback buffers to select the coherent type.
|
||||||
|
BUG_SLOW_CACHED_READBACK_MEMORY,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Initializes our internal vendor, device family, and driver version
|
// Initializes our internal vendor, device family, and driver version
|
||||||
|
|
Loading…
Reference in New Issue