Merge pull request #8379 from stenzek/mali
Vulkan: Optimizations for Mali with EFB2RAM on
This commit is contained in:
commit
d3ee0a4535
|
@ -75,13 +75,15 @@ const ConfigInfo<bool> GFX_ENABLE_VALIDATION_LAYER{
|
|||
#if defined(ANDROID)
|
||||
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
||||
{System::GFX, "Settings", "BackendMultithreading"}, false};
|
||||
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
||||
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 0};
|
||||
#else
|
||||
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
||||
{System::GFX, "Settings", "BackendMultithreading"}, true};
|
||||
#endif
|
||||
|
||||
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
||||
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
|
||||
#endif
|
||||
|
||||
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
|
||||
const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{
|
||||
{System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};
|
||||
|
|
|
@ -172,8 +172,10 @@ bool BoundingBox::CreateGPUBuffer()
|
|||
VkMemoryRequirements memory_requirements;
|
||||
vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
|
||||
|
||||
uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
uint32_t memory_type_index = g_vulkan_context
|
||||
->GetMemoryType(memory_requirements.memoryTypeBits,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false)
|
||||
.value_or(0);
|
||||
VkMemoryAllocateInfo memory_allocate_info = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType
|
||||
nullptr, // const void* pNext
|
||||
|
|
|
@ -87,8 +87,10 @@ std::unique_ptr<VKTexture> VKTexture::Create(const TextureConfig& tex_config)
|
|||
|
||||
VkMemoryAllocateInfo memory_info = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size,
|
||||
g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)};
|
||||
g_vulkan_context
|
||||
->GetMemoryType(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
false)
|
||||
.value_or(0)};
|
||||
|
||||
VkDeviceMemory device_memory;
|
||||
res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory);
|
||||
|
|
|
@ -710,96 +710,107 @@ void VulkanContext::DisableDebugReports()
|
|||
}
|
||||
}
|
||||
|
||||
bool VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index)
|
||||
std::optional<u32> VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties,
|
||||
bool strict, bool* is_coherent)
|
||||
{
|
||||
static constexpr u32 ALL_MEMORY_PROPERTY_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
|
||||
const u32 mask = strict ? ALL_MEMORY_PROPERTY_FLAGS : properties;
|
||||
|
||||
for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++)
|
||||
{
|
||||
if ((bits & (1 << i)) != 0)
|
||||
{
|
||||
u32 supported = m_device_memory_properties.memoryTypes[i].propertyFlags & properties;
|
||||
const VkMemoryPropertyFlags type_flags =
|
||||
m_device_memory_properties.memoryTypes[i].propertyFlags;
|
||||
const VkMemoryPropertyFlags supported = type_flags & mask;
|
||||
if (supported == properties)
|
||||
{
|
||||
*out_type_index = i;
|
||||
return true;
|
||||
if (is_coherent)
|
||||
*is_coherent = (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties)
|
||||
{
|
||||
u32 type_index = VK_MAX_MEMORY_TYPES;
|
||||
if (!GetMemoryType(bits, properties, &type_index))
|
||||
PanicAlert("Unable to find memory type for %x:%x", bits, properties);
|
||||
|
||||
return type_index;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent)
|
||||
{
|
||||
// Try for coherent memory first.
|
||||
VkMemoryPropertyFlags flags =
|
||||
static constexpr VkMemoryPropertyFlags COHERENT_FLAGS =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
u32 type_index;
|
||||
if (!GetMemoryType(bits, flags, &type_index))
|
||||
{
|
||||
WARN_LOG(
|
||||
VIDEO,
|
||||
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
|
||||
// Try for coherent memory. Some drivers (looking at you, Adreno) have the cached type before the
|
||||
// uncached type, so use a strict check first.
|
||||
std::optional<u32> type_index = GetMemoryType(bits, COHERENT_FLAGS, true, is_coherent);
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
|
||||
// Try non-coherent memory.
|
||||
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
if (!GetMemoryType(bits, flags, &type_index))
|
||||
{
|
||||
// We shouldn't have any memory types that aren't host-visible.
|
||||
PanicAlert("Unable to get memory type for upload.");
|
||||
type_index = 0;
|
||||
}
|
||||
// Try for coherent memory, with any other bits set.
|
||||
type_index = GetMemoryType(bits, COHERENT_FLAGS, false, is_coherent);
|
||||
if (type_index)
|
||||
{
|
||||
WARN_LOG(VIDEO,
|
||||
"Strict check for upload memory properties failed, this may affect performance");
|
||||
return type_index.value();
|
||||
}
|
||||
|
||||
if (is_coherent)
|
||||
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0);
|
||||
// Fall back to non-coherent memory.
|
||||
WARN_LOG(
|
||||
VIDEO,
|
||||
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
|
||||
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
|
||||
return type_index;
|
||||
// Shouldn't happen, there should be at least one host-visible heap.
|
||||
PanicAlert("Unable to get memory type for upload.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent, bool* is_cached)
|
||||
u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent)
|
||||
{
|
||||
// Try for cached and coherent memory first.
|
||||
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
std::optional<u32> type_index;
|
||||
|
||||
u32 type_index;
|
||||
if (!GetMemoryType(bits, flags, &type_index))
|
||||
// Mali driver appears to be significantly slower for readbacks when using cached memory.
|
||||
if (DriverDetails::HasBug(DriverDetails::BUG_SLOW_CACHED_READBACK_MEMORY))
|
||||
{
|
||||
// For readbacks, caching is more important than coherency.
|
||||
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
if (!GetMemoryType(bits, flags, &type_index))
|
||||
{
|
||||
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect "
|
||||
"performance.");
|
||||
|
||||
// Remove the cached bit as well.
|
||||
flags &= ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
if (!GetMemoryType(bits, flags, &type_index))
|
||||
{
|
||||
// We shouldn't have any memory types that aren't host-visible.
|
||||
PanicAlert("Unable to get memory type for upload.");
|
||||
type_index = 0;
|
||||
}
|
||||
}
|
||||
type_index = GetMemoryType(
|
||||
bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true,
|
||||
is_coherent);
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
}
|
||||
|
||||
if (is_coherent)
|
||||
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0);
|
||||
if (is_cached)
|
||||
*is_cached = ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0);
|
||||
// Optimal config uses cached+coherent.
|
||||
type_index =
|
||||
GetMemoryType(bits,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
true, is_coherent);
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
|
||||
return type_index;
|
||||
// Otherwise, prefer cached over coherent if we must choose one.
|
||||
type_index =
|
||||
GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
false, is_coherent);
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
|
||||
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect "
|
||||
"performance.");
|
||||
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
|
||||
*is_coherent = false;
|
||||
if (type_index)
|
||||
return type_index.value();
|
||||
|
||||
// We should have at least one host visible memory type...
|
||||
PanicAlert("Unable to get memory type for upload.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void VulkanContext::InitDriverDetails()
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
@ -99,12 +100,12 @@ public:
|
|||
float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; }
|
||||
// Finds a memory type index for the specified memory properties and the bits returned by
|
||||
// vkGetImageMemoryRequirements
|
||||
bool GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index);
|
||||
u32 GetMemoryType(u32 bits, VkMemoryPropertyFlags properties);
|
||||
std::optional<u32> GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, bool strict,
|
||||
bool* is_coherent = nullptr);
|
||||
|
||||
// Finds a memory type for upload or readback buffers.
|
||||
u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr);
|
||||
u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr, bool* is_cached = nullptr);
|
||||
u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr);
|
||||
|
||||
private:
|
||||
using ExtensionList = std::vector<const char*>;
|
||||
|
|
|
@ -113,7 +113,11 @@ constexpr BugInfo m_known_bugs[] = {
|
|||
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
|
||||
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
|
||||
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
|
||||
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}};
|
||||
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
|
||||
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY,
|
||||
-1.0, -1.0, true},
|
||||
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
|
||||
BUG_SLOW_CACHED_READBACK_MEMORY, -1.0, -1.0, true}};
|
||||
|
||||
static std::map<Bug, BugInfo> m_bugs;
|
||||
|
||||
|
|
|
@ -281,6 +281,11 @@ enum Bug
|
|||
// The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed,
|
||||
// however the implementation is broken on some drivers.
|
||||
BUG_BROKEN_REVERSED_DEPTH_RANGE,
|
||||
|
||||
// BUG: Cached memory is significantly slower for readbacks than coherent memory in the
|
||||
// Mali Vulkan driver, causing high CPU usage in the __pi___inval_cache_range kernel
|
||||
// function. This flag causes readback buffers to select the coherent type.
|
||||
BUG_SLOW_CACHED_READBACK_MEMORY,
|
||||
};
|
||||
|
||||
// Initializes our internal vendor, device family, and driver version
|
||||
|
|
Loading…
Reference in New Issue