Merge pull request #8379 from stenzek/mali

Vulkan: Optimizations for Mali with EFB2RAM on
This commit is contained in:
Connor McLaughlin 2019-10-31 22:44:19 +10:00 committed by GitHub
commit d3ee0a4535
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 98 additions and 71 deletions

View File

@ -75,13 +75,15 @@ const ConfigInfo<bool> GFX_ENABLE_VALIDATION_LAYER{
#if defined(ANDROID) #if defined(ANDROID)
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{ const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
{System::GFX, "Settings", "BackendMultithreading"}, false}; {System::GFX, "Settings", "BackendMultithreading"}, false};
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 0};
#else #else
const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{ const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
{System::GFX, "Settings", "BackendMultithreading"}, true}; {System::GFX, "Settings", "BackendMultithreading"}, true};
#endif
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{ const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100}; {System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
#endif
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true}; const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{ const ConfigInfo<bool> GFX_WAIT_FOR_SHADERS_BEFORE_STARTING{
{System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false}; {System::GFX, "Settings", "WaitForShadersBeforeStarting"}, false};

View File

@ -172,8 +172,10 @@ bool BoundingBox::CreateGPUBuffer()
VkMemoryRequirements memory_requirements; VkMemoryRequirements memory_requirements;
vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements); vkGetBufferMemoryRequirements(g_vulkan_context->GetDevice(), buffer, &memory_requirements);
uint32_t memory_type_index = g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, uint32_t memory_type_index = g_vulkan_context
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); ->GetMemoryType(memory_requirements.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false)
.value_or(0);
VkMemoryAllocateInfo memory_allocate_info = { VkMemoryAllocateInfo memory_allocate_info = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType
nullptr, // const void* pNext nullptr, // const void* pNext

View File

@ -87,8 +87,10 @@ std::unique_ptr<VKTexture> VKTexture::Create(const TextureConfig& tex_config)
VkMemoryAllocateInfo memory_info = { VkMemoryAllocateInfo memory_info = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size,
g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, g_vulkan_context
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; ->GetMemoryType(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
false)
.value_or(0)};
VkDeviceMemory device_memory; VkDeviceMemory device_memory;
res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory);

View File

@ -710,96 +710,107 @@ void VulkanContext::DisableDebugReports()
} }
} }
bool VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index) std::optional<u32> VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties,
bool strict, bool* is_coherent)
{ {
static constexpr u32 ALL_MEMORY_PROPERTY_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
const u32 mask = strict ? ALL_MEMORY_PROPERTY_FLAGS : properties;
for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++) for (u32 i = 0; i < VK_MAX_MEMORY_TYPES; i++)
{ {
if ((bits & (1 << i)) != 0) if ((bits & (1 << i)) != 0)
{ {
u32 supported = m_device_memory_properties.memoryTypes[i].propertyFlags & properties; const VkMemoryPropertyFlags type_flags =
m_device_memory_properties.memoryTypes[i].propertyFlags;
const VkMemoryPropertyFlags supported = type_flags & mask;
if (supported == properties) if (supported == properties)
{ {
*out_type_index = i; if (is_coherent)
return true; *is_coherent = (type_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
return i;
} }
} }
} }
return false; return std::nullopt;
}
u32 VulkanContext::GetMemoryType(u32 bits, VkMemoryPropertyFlags properties)
{
u32 type_index = VK_MAX_MEMORY_TYPES;
if (!GetMemoryType(bits, properties, &type_index))
PanicAlert("Unable to find memory type for %x:%x", bits, properties);
return type_index;
} }
u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent) u32 VulkanContext::GetUploadMemoryType(u32 bits, bool* is_coherent)
{ {
// Try for coherent memory first. static constexpr VkMemoryPropertyFlags COHERENT_FLAGS =
VkMemoryPropertyFlags flags =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
u32 type_index; // Try for coherent memory. Some drivers (looking at you, Adreno) have the cached type before the
if (!GetMemoryType(bits, flags, &type_index)) // uncached type, so use a strict check first.
{ std::optional<u32> type_index = GetMemoryType(bits, COHERENT_FLAGS, true, is_coherent);
WARN_LOG( if (type_index)
VIDEO, return type_index.value();
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
// Try non-coherent memory. // Try for coherent memory, with any other bits set.
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; type_index = GetMemoryType(bits, COHERENT_FLAGS, false, is_coherent);
if (!GetMemoryType(bits, flags, &type_index)) if (type_index)
{ {
// We shouldn't have any memory types that aren't host-visible. WARN_LOG(VIDEO,
PanicAlert("Unable to get memory type for upload."); "Strict check for upload memory properties failed, this may affect performance");
type_index = 0; return type_index.value();
}
} }
if (is_coherent) // Fall back to non-coherent memory.
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); WARN_LOG(
VIDEO,
"Vulkan: Failed to find a coherent memory type for uploads, this will affect performance.");
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
if (type_index)
return type_index.value();
return type_index; // Shouldn't happen, there should be at least one host-visible heap.
PanicAlert("Unable to get memory type for upload.");
return 0;
} }
u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent, bool* is_cached) u32 VulkanContext::GetReadbackMemoryType(u32 bits, bool* is_coherent)
{ {
// Try for cached and coherent memory first. std::optional<u32> type_index;
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
u32 type_index; // Mali driver appears to be significantly slower for readbacks when using cached memory.
if (!GetMemoryType(bits, flags, &type_index)) if (DriverDetails::HasBug(DriverDetails::BUG_SLOW_CACHED_READBACK_MEMORY))
{ {
// For readbacks, caching is more important than coherency. type_index = GetMemoryType(
flags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true,
if (!GetMemoryType(bits, flags, &type_index)) is_coherent);
{ if (type_index)
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect " return type_index.value();
"performance.");
// Remove the cached bit as well.
flags &= ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
if (!GetMemoryType(bits, flags, &type_index))
{
// We shouldn't have any memory types that aren't host-visible.
PanicAlert("Unable to get memory type for upload.");
type_index = 0;
}
}
} }
if (is_coherent) // Optimal config uses cached+coherent.
*is_coherent = ((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0); type_index =
if (is_cached) GetMemoryType(bits,
*is_cached = ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0); VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
true, is_coherent);
if (type_index)
return type_index.value();
return type_index; // Otherwise, prefer cached over coherent if we must choose one.
type_index =
GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
false, is_coherent);
if (type_index)
return type_index.value();
WARN_LOG(VIDEO, "Vulkan: Failed to find a cached memory type for readbacks, this will affect "
"performance.");
type_index = GetMemoryType(bits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, false, is_coherent);
*is_coherent = false;
if (type_index)
return type_index.value();
// We should have at least one host visible memory type...
PanicAlert("Unable to get memory type for upload.");
return 0;
} }
void VulkanContext::InitDriverDetails() void VulkanContext::InitDriverDetails()

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <optional>
#include <vector> #include <vector>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
@ -99,12 +100,12 @@ public:
float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; } float GetMaxSamplerAnisotropy() const { return m_device_properties.limits.maxSamplerAnisotropy; }
// Finds a memory type index for the specified memory properties and the bits returned by // Finds a memory type index for the specified memory properties and the bits returned by
// vkGetImageMemoryRequirements // vkGetImageMemoryRequirements
bool GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, u32* out_type_index); std::optional<u32> GetMemoryType(u32 bits, VkMemoryPropertyFlags properties, bool strict,
u32 GetMemoryType(u32 bits, VkMemoryPropertyFlags properties); bool* is_coherent = nullptr);
// Finds a memory type for upload or readback buffers. // Finds a memory type for upload or readback buffers.
u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr); u32 GetUploadMemoryType(u32 bits, bool* is_coherent = nullptr);
u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr, bool* is_cached = nullptr); u32 GetReadbackMemoryType(u32 bits, bool* is_coherent = nullptr);
private: private:
using ExtensionList = std::vector<const char*>; using ExtensionList = std::vector<const char*>;

View File

@ -113,7 +113,11 @@ constexpr BugInfo m_known_bugs[] = {
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN, {API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}, BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN, {API_VULKAN, OS_OSX, VENDOR_ALL, DRIVER_PORTABILITY, Family::UNKNOWN,
BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true}}; BUG_BROKEN_REVERSED_DEPTH_RANGE, -1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_ARM, DRIVER_ARM, Family::UNKNOWN, BUG_SLOW_CACHED_READBACK_MEMORY,
-1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, Family::UNKNOWN,
BUG_SLOW_CACHED_READBACK_MEMORY, -1.0, -1.0, true}};
static std::map<Bug, BugInfo> m_bugs; static std::map<Bug, BugInfo> m_bugs;

View File

@ -281,6 +281,11 @@ enum Bug
// The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed, // The Vulkan spec allows the minDepth/maxDepth fields in the viewport to be reversed,
// however the implementation is broken on some drivers. // however the implementation is broken on some drivers.
BUG_BROKEN_REVERSED_DEPTH_RANGE, BUG_BROKEN_REVERSED_DEPTH_RANGE,
// BUG: Cached memory is significantly slower for readbacks than coherent memory in the
// Mali Vulkan driver, causing high CPU usage in the __pi___inval_cache_range kernel
// function. This flag causes readback buffers to select the coherent type.
BUG_SLOW_CACHED_READBACK_MEMORY,
}; };
// Initializes our internal vendor, device family, and driver version // Initializes our internal vendor, device family, and driver version