From 010c7dea33dd1de1cb3d61373fa9dd88354855e7 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 10 Aug 2024 12:21:51 +1000 Subject: [PATCH] dep: Bump vk_mem_alloc to 871913d --- dep/vulkan/include/vulkan/vk_mem_alloc.h | 3042 ++++++++-------------- src/util/vulkan_loader.h | 1 + 2 files changed, 1051 insertions(+), 1992 deletions(-) diff --git a/dep/vulkan/include/vulkan/vk_mem_alloc.h b/dep/vulkan/include/vulkan/vk_mem_alloc.h index dc116b0c1..b24942a13 100644 --- a/dep/vulkan/include/vulkan/vk_mem_alloc.h +++ b/dep/vulkan/include/vulkan/vk_mem_alloc.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -25,14 +25,17 @@ /** \mainpage Vulkan Memory Allocator -Version 3.1.0-development +Version 3.1.0 -Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. \n -License: MIT +Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. \n +License: MIT \n +See also: [product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/), +[repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) -API documentation divided into groups: [Modules](modules.html) -\section main_table_of_contents Table of contents +API documentation divided into groups: [Topics](topics.html) + +General documentation chapters: - User guide - \subpage quick_start @@ -46,6 +49,7 @@ License: MIT - [Custom memory pools](@ref choosing_memory_type_custom_memory_pools) - [Dedicated allocations](@ref choosing_memory_type_dedicated_allocations) - \subpage memory_mapping + - [Copy functions](@ref memory_mapping_copy_functions) - [Mapping functions](@ref memory_mapping_mapping_functions) - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) - [Cache flush and invalidate](@ref memory_mapping_cache_control) @@ -55,6 +59,7 @@ License: MIT - \subpage resource_aliasing - \subpage custom_memory_pools - [Choosing memory type index](@ref custom_memory_pools_MemTypeIndex) + - [When not to use custom pools](@ref custom_memory_pools_when_not_use) - [Linear allocation algorithm](@ref linear_algorithm) - [Free-at-once](@ref linear_algorithm_free_at_once) - [Stack](@ref linear_algorithm_stack) @@ -72,7 +77,8 @@ License: MIT - [Memory initialization](@ref debugging_memory_usage_initialization) - [Margins](@ref debugging_memory_usage_margins) - [Corruption detection](@ref debugging_memory_usage_corruption_detection) - - \subpage opengl_interop + - [Leak detection features](@ref debugging_memory_usage_leak_detection) + - \subpage other_api_interop - \subpage usage_patterns - [GPU-only resource](@ref usage_patterns_gpu_only) - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) @@ -96,11 +102,6 @@ License: MIT - [Allocation algorithm](@ref general_considerations_allocation_algorithm) - [Features not supported](@ref general_considerations_features_not_supported) -\section main_see_also See also - -- [**Product page on GPUOpen**](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) -- [**Source repository on GitHub**](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) - \defgroup group_init Library initialization \brief API elements related to the initialization and management of the entire library, especially #VmaAllocator object. @@ -126,8 +127,8 @@ See documentation chapter: \ref statistics. extern "C" { #endif -#ifndef VULKAN_H_ - #include +#if !defined(VULKAN_H_) +#include #endif #if !defined(VMA_VULKAN_VERSION) @@ -213,6 +214,25 @@ extern "C" { #endif #endif +// Defined to 1 when VK_KHR_maintenance4 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE4) + #if VK_KHR_maintenance4 + #define VMA_KHR_MAINTENANCE4 1 + #else + #define VMA_KHR_MAINTENANCE4 0 + #endif +#endif + +// Defined to 1 when VK_KHR_maintenance5 device extension is defined in Vulkan headers. +#if !defined(VMA_KHR_MAINTENANCE5) + #if VK_KHR_maintenance5 + #define VMA_KHR_MAINTENANCE5 1 + #else + #define VMA_KHR_MAINTENANCE5 0 + #endif +#endif + + // Defined to 1 when VK_KHR_external_memory device extension is defined in Vulkan headers. #if !defined(VMA_EXTERNAL_MEMORY) #if VK_KHR_external_memory @@ -234,6 +254,12 @@ extern "C" { #define VMA_CALL_POST #endif +// Define this macro to decorate pNext pointers with an attribute specifying the Vulkan +// structure that will be extended via the pNext chain. +#ifndef VMA_EXTENDS_VK_STRUCT + #define VMA_EXTENDS_VK_STRUCT(vkStruct) +#endif + // Define this macro to decorate pointers with an attribute specifying the // length of the array they point to if they are not null. // @@ -420,6 +446,20 @@ typedef enum VmaAllocatorCreateFlagBits For more details, see the documentation of the VK_EXT_memory_priority extension. */ VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT = 0x00000040, + /** + Enables usage of VK_KHR_maintenance4 extension in the library. + + You may set this flag only if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT = 0x00000080, + /** + Enables usage of VK_KHR_maintenance5 extension in the library. + + You should set this flag if you found available and enabled this device extension, + while creating Vulkan device passed as VmaAllocatorCreateInfo::device. + */ + VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT = 0x00000100, VMA_ALLOCATOR_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VmaAllocatorCreateFlagBits; @@ -521,6 +561,9 @@ typedef enum VmaAllocationCreateFlagBits /** \brief Set this flag if the allocation should have its own memory block. Use it for special, big resources, like fullscreen images used as attachments. + + If you use this flag while creating a buffer or an image, `VkMemoryDedicatedAllocateInfo` + structure is applied if possible. */ VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT = 0x00000001, @@ -602,7 +645,7 @@ typedef enum VmaAllocationCreateFlagBits This includes allocations created in \ref custom_memory_pools. Declares that mapped memory can be read, written, and accessed in random order, - so a `HOST_CACHED` memory type is required. + so a `HOST_CACHED` memory type is preferred. */ VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, /** @@ -884,7 +927,7 @@ Use it as a unique identifier to virtual allocation within the single block. Use value `VK_NULL_HANDLE` to represent a null/invalid allocation. */ -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation); +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VmaVirtualAllocation) /** @} */ @@ -983,13 +1026,14 @@ typedef struct VmaVulkanFunctions PFN_vkBindImageMemory2KHR VMA_NULLABLE vkBindImageMemory2KHR; #endif #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2. PFN_vkGetPhysicalDeviceMemoryProperties2KHR VMA_NULLABLE vkGetPhysicalDeviceMemoryProperties2KHR; #endif -#if VMA_VULKAN_VERSION >= 1003000 +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. - PFN_vkGetDeviceBufferMemoryRequirements VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; + PFN_vkGetDeviceBufferMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceBufferMemoryRequirements; /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4. - PFN_vkGetDeviceImageMemoryRequirements VMA_NULLABLE vkGetDeviceImageMemoryRequirements; + PFN_vkGetDeviceImageMemoryRequirementsKHR VMA_NULLABLE vkGetDeviceImageMemoryRequirements; #endif } VmaVulkanFunctions; @@ -1049,13 +1093,15 @@ typedef struct VmaAllocatorCreateInfo Starting from version 3.0.0 this member is no longer optional, it must be set! */ VkInstance VMA_NOT_NULL instance; - /** \brief Optional. The highest version of Vulkan that the application is designed to use. + /** \brief Optional. Vulkan version that the application uses. It must be a value in the format as created by macro `VK_MAKE_VERSION` or a constant like: `VK_API_VERSION_1_1`, `VK_API_VERSION_1_0`. The patch version number specified is ignored. Only the major and minor versions are considered. - It must be less or equal (preferably equal) to value as passed to `vkCreateInstance` as `VkApplicationInfo::apiVersion`. Only versions 1.0, 1.1, 1.2, 1.3 are supported by the current implementation. Leaving it initialized to zero is equivalent to `VK_API_VERSION_1_0`. + It must match the Vulkan version used by the application and supported on the selected physical device, + so it must be no higher than `VkApplicationInfo::apiVersion` passed to `vkCreateInstance` + and no higher than `VkPhysicalDeviceProperties::apiVersion` found on the physical device used. */ uint32_t vulkanApiVersion; #if VMA_EXTERNAL_MEMORY @@ -1319,7 +1365,7 @@ typedef struct VmaPoolCreateInfo Please note that some structures, e.g. `VkMemoryPriorityAllocateInfoEXT`, `VkMemoryDedicatedAllocateInfoKHR`, can be attached automatically by this library when using other, more convenient of its features. */ - void* VMA_NULLABLE pMemoryAllocateNext; + void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkMemoryAllocateInfo) pMemoryAllocateNext; } VmaPoolCreateInfo; /** @} */ @@ -1329,7 +1375,11 @@ typedef struct VmaPoolCreateInfo @{ */ -/// Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). +/** +Parameters of #VmaAllocation objects, that can be retrieved using function vmaGetAllocationInfo(). + +There is also an extended version of this structure that carries additional parameters: #VmaAllocationInfo2. +*/ typedef struct VmaAllocationInfo { /** \brief Memory type index that this allocation was allocated from. @@ -1389,6 +1439,33 @@ typedef struct VmaAllocationInfo const char* VMA_NULLABLE pName; } VmaAllocationInfo; +/// Extended parameters of a #VmaAllocation object that can be retrieved using function vmaGetAllocationInfo2(). +typedef struct VmaAllocationInfo2 +{ + /** \brief Basic parameters of the allocation. + + If you need only these, you can use function vmaGetAllocationInfo() and structure #VmaAllocationInfo instead. + */ + VmaAllocationInfo allocationInfo; + /** \brief Size of the `VkDeviceMemory` block that the allocation belongs to. + + In case of an allocation with dedicated memory, it will be equal to `allocationInfo.size`. + */ + VkDeviceSize blockSize; + /** \brief `VK_TRUE` if the allocation has dedicated memory, `VK_FALSE` if it was placed as part of a larger memory block. + + When `VK_TRUE`, it also means `VkMemoryDedicatedAllocateInfo` was used when creating the allocation + (if VK_KHR_dedicated_allocation extension or Vulkan version >= 1.1 is enabled). + */ + VkBool32 dedicatedMemory; +} VmaAllocationInfo2; + +/** Callback function called during vmaBeginDefragmentation() to check custom criterion about ending current defragmentation pass. + +Should return true if the defragmentation needs to stop current pass. +*/ +typedef VkBool32 (VKAPI_PTR* PFN_vmaCheckDefragmentationBreakFunction)(void* VMA_NULLABLE pUserData); + /** \brief Parameters for defragmentation. To be used with function vmaBeginDefragmentation(). @@ -1412,6 +1489,13 @@ typedef struct VmaDefragmentationInfo `0` means no limit. */ uint32_t maxAllocationsPerPass; + /** \brief Optional custom callback for stopping vmaBeginDefragmentation(). + + Have to return true for breaking current defragmentation pass. + */ + PFN_vmaCheckDefragmentationBreakFunction VMA_NULLABLE pfnBreakCallback; + /// \brief Optional data to pass to custom callback for stopping pass of defragmentation. + void* VMA_NULLABLE pBreakCallbackUserData; } VmaDefragmentationInfo; /// Single move of an allocation to be done for defragmentation. @@ -1912,12 +1996,27 @@ you should avoid calling it too often. You can retrieve same VmaAllocationInfo structure while creating your resource, from function vmaCreateBuffer(), vmaCreateImage(). You can remember it if you are sure parameters don't change (e.g. due to defragmentation). + +There is also a new function vmaGetAllocationInfo2() that offers extended information +about the allocation, returned using new structure #VmaAllocationInfo2. */ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( VmaAllocator VMA_NOT_NULL allocator, VmaAllocation VMA_NOT_NULL allocation, VmaAllocationInfo* VMA_NOT_NULL pAllocationInfo); +/** \brief Returns extended information about specified allocation. + +Current parameters of given allocation are returned in `pAllocationInfo`. +Extended parameters in structure #VmaAllocationInfo2 include memory block size +and a flag telling whether the allocation has dedicated memory. +It can be useful e.g. for interop with OpenGL. +*/ +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL allocation, + VmaAllocationInfo2* VMA_NOT_NULL pAllocationInfo); + /** \brief Sets pUserData in given allocation to new value. The value of pointer `pUserData` is copied to allocation's `pUserData`. @@ -2069,7 +2168,7 @@ For more information, see documentation of vmaFlushAllocation(). \param allocator \param allocationCount \param allocations -\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. \param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. This function returns the `VkResult` from `vkFlushMappedMemoryRanges` if it is @@ -2090,7 +2189,7 @@ For more information, see documentation of vmaInvalidateAllocation(). \param allocator \param allocationCount \param allocations -\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all ofsets are zero. +\param offsets If not null, it must point to an array of offsets of regions to flush, relative to the beginning of respective allocations. Null means all offsets are zero. \param sizes If not null, it must point to an array of sizes of regions to flush in respective allocations. Null means `VK_WHOLE_SIZE` for all allocations. This function returns the `VkResult` from `vkInvalidateMappedMemoryRanges` if it is @@ -2103,6 +2202,61 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) offsets, const VkDeviceSize* VMA_NULLABLE VMA_LEN_IF_NOT_NULL(allocationCount) sizes); +/** \brief Maps the allocation temporarily if needed, copies data from specified host pointer to it, and flushes the memory from the host caches if needed. + +\param allocator +\param pSrcHostPointer Pointer to the host data that become source of the copy. +\param dstAllocation Handle to the allocation that becomes destination of the copy. +\param dstAllocationLocalOffset Offset within `dstAllocation` where to write copied data, in bytes. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from a host pointer to an allocation easily. +Same behavior can be achieved by calling vmaMapMemory(), `memcpy()`, vmaUnmapMemory(), vmaFlushAllocation(). + +This function can be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function will fail and generate a Validation Layers error. + +`dstAllocationLocalOffset` is relative to the contents of given `dstAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator VMA_NOT_NULL allocator, + const void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pSrcHostPointer, + VmaAllocation VMA_NOT_NULL dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + +/** \brief Invalidates memory in the host caches if needed, maps the allocation temporarily if needed, and copies data from it to a specified host pointer. + +\param allocator +\param srcAllocation Handle to the allocation that becomes source of the copy. +\param srcAllocationLocalOffset Offset within `srcAllocation` where to read copied data, in bytes. +\param pDstHostPointer Pointer to the host memory that become destination of the copy. +\param size Number of bytes to copy. + +This is a convenience function that allows to copy data from an allocation to a host pointer easily. +Same behavior can be achieved by calling vmaInvalidateAllocation(), vmaMapMemory(), `memcpy()`, vmaUnmapMemory(). + +This function should be called only for allocations created in a memory type that has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT` flag. +It can be ensured e.g. by using #VMA_MEMORY_USAGE_AUTO and #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Otherwise, the function may fail and generate a Validation Layers error. +It may also work very slowly when reading from an uncached memory. + +`srcAllocationLocalOffset` is relative to the contents of given `srcAllocation`. +If you mean whole allocation, you should pass 0. +Do not pass allocation's offset within device memory block as this parameter! +*/ +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator VMA_NOT_NULL allocator, + VmaAllocation VMA_NOT_NULL srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* VMA_NOT_NULL VMA_LEN_IF_NOT_NULL(size) pDstHostPointer, + VkDeviceSize size); + /** \brief Checks magic number in margins around all allocations in given memory types (in both default and custom pools) in search for corruptions. \param allocator @@ -2227,7 +2381,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindBufferMemory2( VmaAllocation VMA_NOT_NULL allocation, VkDeviceSize allocationLocalOffset, VkBuffer VMA_NOT_NULL_NON_DISPATCHABLE buffer, - const void* VMA_NULLABLE pNext); + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindBufferMemoryInfoKHR) pNext); /** \brief Binds image to allocation. @@ -2264,7 +2418,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaBindImageMemory2( VmaAllocation VMA_NOT_NULL allocation, VkDeviceSize allocationLocalOffset, VkImage VMA_NOT_NULL_NON_DISPATCHABLE image, - const void* VMA_NULLABLE pNext); + const void* VMA_NULLABLE VMA_EXTENDS_VK_STRUCT(VkBindImageMemoryInfoKHR) pNext); /** \brief Creates a new `VkBuffer`, allocates and binds memory for it. @@ -2604,14 +2758,27 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeStatsString( #include #include #include +#include #include #include +#if !defined(VMA_CPP20) + #if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 + #define VMA_CPP20 1 + #else + #define VMA_CPP20 0 + #endif +#endif + #ifdef _MSC_VER #include // For functions like __popcnt, _BitScanForward etc. #endif -#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 - #include // For std::popcount +#if VMA_CPP20 + #include +#endif + +#if VMA_STATS_STRING_ENABLED + #include // For snprintf #endif /******************************************************************************* @@ -2647,8 +2814,7 @@ VmaAllocatorCreateInfo::pVulkanFunctions. Other members can be null. #endif #ifndef VMA_USE_STL_SHARED_MUTEX - // Compiler conforms to C++17. - #if __cplusplus >= 201703L + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 #define VMA_USE_STL_SHARED_MUTEX 1 // Visual studio defines __cplusplus properly only when passed additional parameter: /Zc:__cplusplus // Otherwise it is always 199711L, despite shared_mutex works since Visual Studio 2015 Update 2. @@ -2682,7 +2848,7 @@ remove them if not needed. */ #if !defined(VMA_CONFIGURATION_USER_INCLUDES_H) #include // for assert - #include // for min, max + #include // for min, max, swap #include #else #include VMA_CONFIGURATION_USER_INCLUDES_H @@ -2693,6 +2859,14 @@ remove them if not needed. #define VMA_NULL nullptr #endif +#ifndef VMA_FALLTHROUGH + #if __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 + #define VMA_FALLTHROUGH [[fallthrough]] + #else + #define VMA_FALLTHROUGH + #endif +#endif + // Normal assert to check for programmer's errors, especially in Debug configuration. #ifndef VMA_ASSERT #ifdef NDEBUG @@ -2712,6 +2886,11 @@ remove them if not needed. #endif #endif +// Assert used for reporting memory leaks - unfreed allocations. +#ifndef VMA_ASSERT_LEAK + #define VMA_ASSERT_LEAK(expr) VMA_ASSERT(expr) +#endif + // If your compiler is not compatible with C++17 and definition of // aligned_alloc() function is missing, uncommenting following line may help: @@ -2769,7 +2948,7 @@ static void* vma_aligned_alloc(size_t alignment, size_t size) { return _aligned_malloc(size, alignment); } -#elif __cplusplus >= 201703L // Compiler conforms to C++17. +#elif __cplusplus >= 201703L || _MSVC_LANG >= 201703L // C++17 static void* vma_aligned_alloc(size_t alignment, size_t size) { return aligned_alloc(alignment, size); @@ -2795,7 +2974,7 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) #endif #ifndef VMA_ALIGN_OF - #define VMA_ALIGN_OF(type) (__alignof(type)) + #define VMA_ALIGN_OF(type) (alignof(type)) #endif #ifndef VMA_SYSTEM_ALIGNED_MALLOC @@ -2834,33 +3013,52 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) #define VMA_MAX(v1, v2) ((std::max)((v1), (v2))) #endif -#ifndef VMA_SWAP - #define VMA_SWAP(v1, v2) std::swap((v1), (v2)) -#endif - #ifndef VMA_SORT #define VMA_SORT(beg, end, cmp) std::sort(beg, end, cmp) #endif -#ifndef VMA_DEBUG_LOG - #define VMA_DEBUG_LOG(format, ...) +#ifndef VMA_DEBUG_LOG_FORMAT + #define VMA_DEBUG_LOG_FORMAT(format, ...) /* - #define VMA_DEBUG_LOG(format, ...) do { \ - printf(format, __VA_ARGS__); \ + #define VMA_DEBUG_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ printf("\n"); \ } while(false) */ #endif +#ifndef VMA_DEBUG_LOG + #define VMA_DEBUG_LOG(str) VMA_DEBUG_LOG_FORMAT("%s", (str)) +#endif + +#ifndef VMA_LEAK_LOG_FORMAT + #define VMA_LEAK_LOG_FORMAT(format, ...) VMA_DEBUG_LOG_FORMAT(format, __VA_ARGS__) +#endif + +#ifndef VMA_CLASS_NO_COPY + #define VMA_CLASS_NO_COPY(className) \ + private: \ + className(const className&) = delete; \ + className& operator=(const className&) = delete; +#endif +#ifndef VMA_CLASS_NO_COPY_NO_MOVE + #define VMA_CLASS_NO_COPY_NO_MOVE(className) \ + private: \ + className(const className&) = delete; \ + className(className&&) = delete; \ + className& operator=(const className&) = delete; \ + className& operator=(className&&) = delete; +#endif + // Define this macro to 1 to enable functions: vmaBuildStatsString, vmaFreeStatsString. #if VMA_STATS_STRING_ENABLED static inline void VmaUint32ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint32_t num) { - snprintf(outStr, strLen, "%u", static_cast(num)); + snprintf(outStr, strLen, "%" PRIu32, num); } static inline void VmaUint64ToStr(char* VMA_NOT_NULL outStr, size_t strLen, uint64_t num) { - snprintf(outStr, strLen, "%llu", static_cast(num)); + snprintf(outStr, strLen, "%" PRIu64, num); } static inline void VmaPtrToStr(char* VMA_NOT_NULL outStr, size_t strLen, const void* ptr) { @@ -2871,7 +3069,9 @@ static void vma_aligned_free(void* VMA_NULLABLE ptr) #ifndef VMA_MUTEX class VmaMutex { + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutex) public: + VmaMutex() { } void Lock() { m_Mutex.lock(); } void Unlock() { m_Mutex.unlock(); } bool TryLock() { return m_Mutex.try_lock(); } @@ -3037,13 +3237,6 @@ tools like RenderDoc. #define VMA_MAPPING_HYSTERESIS_ENABLED 1 #endif -#ifndef VMA_CLASS_NO_COPY - #define VMA_CLASS_NO_COPY(className) \ - private: \ - className(const className&) = delete; \ - className& operator=(const className&) = delete; -#endif - #define VMA_VALIDATE(cond) do { if(!(cond)) { \ VMA_ASSERT(0 && "Validation failed: " #cond); \ return false; \ @@ -3159,17 +3352,6 @@ class VmaList; template class VmaIntrusiveLinkedList; -// Unused in this version -#if 0 -template -struct VmaPair; -template -struct VmaPairFirstLess; - -template -class VmaMap; -#endif - #if VMA_STATS_STRING_ENABLED class VmaStringBuilder; class VmaJsonWriter; @@ -3209,7 +3391,7 @@ class VmaAllocationObjectAllocator; /* Returns number of bits set to 1 in (v). -On specific platforms and compilers you can use instrinsics like: +On specific platforms and compilers you can use intrinsics like: Visual Studio: return __popcnt(v); @@ -3221,7 +3403,7 @@ But you need to check in runtime whether user's CPU supports these, as some old */ static inline uint32_t VmaCountBitsSet(uint32_t v) { -#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L // C++20 +#if VMA_CPP20 return std::popcount(v); #else uint32_t c = v - ((v >> 1) & 0x55555555); @@ -3240,6 +3422,10 @@ static inline uint8_t VmaBitScanLSB(uint64_t mask) if (_BitScanForward64(&pos, mask)) return static_cast(pos); return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; #elif defined __GNUC__ || defined __clang__ return static_cast(__builtin_ffsll(mask)) - 1U; #else @@ -3262,6 +3448,10 @@ static inline uint8_t VmaBitScanLSB(uint32_t mask) if (_BitScanForward(&pos, mask)) return static_cast(pos); return UINT8_MAX; +#elif VMA_CPP20 + if(mask) + return static_cast(std::countr_zero(mask)); + return UINT8_MAX; #elif defined __GNUC__ || defined __clang__ return static_cast(__builtin_ffs(mask)) - 1U; #else @@ -3283,6 +3473,9 @@ static inline uint8_t VmaBitScanMSB(uint64_t mask) unsigned long pos; if (_BitScanReverse64(&pos, mask)) return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 63 - static_cast(std::countl_zero(mask)); #elif defined __GNUC__ || defined __clang__ if (mask) return 63 - static_cast(__builtin_clzll(mask)); @@ -3305,6 +3498,9 @@ static inline uint8_t VmaBitScanMSB(uint32_t mask) unsigned long pos; if (_BitScanReverse(&pos, mask)) return static_cast(pos); +#elif VMA_CPP20 + if(mask) + return 31 - static_cast(std::countl_zero(mask)); #elif defined __GNUC__ || defined __clang__ if (mask) return 31 - static_cast(__builtin_clz(mask)); @@ -3452,7 +3648,7 @@ static inline bool VmaIsBufferImageGranularityConflict( { if (suballocType1 > suballocType2) { - VMA_SWAP(suballocType1, suballocType2); + std::swap(suballocType1, suballocType2); } switch (suballocType1) @@ -3536,7 +3732,7 @@ new element with value (key) should be inserted. template static IterT VmaBinaryFindFirstNotLess(IterT beg, IterT end, const KeyT& key, const CmpLess& cmp) { - size_t down = 0, up = (end - beg); + size_t down = 0, up = size_t(end - beg); while (down < up) { const size_t mid = down + (up - down) / 2; // Overflow-safe midpoint calculation @@ -3597,13 +3793,95 @@ static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) newStruct->pNext = mainStruct->pNext; mainStruct->pNext = newStruct; } +// Finds structure with s->sType == sType in mainStruct->pNext chain. +// Returns pointer to it. If not found, returns null. +template +static inline const FindT* VmaPnextChainFind(const MainT* mainStruct, VkStructureType sType) +{ + for(const VkBaseInStructure* s = (const VkBaseInStructure*)mainStruct->pNext; + s != VMA_NULL; s = s->pNext) + { + if(s->sType == sType) + { + return (const FindT*)s; + } + } + return VMA_NULL; +} + +// An abstraction over buffer or image `usage` flags, depending on available extensions. +struct VmaBufferImageUsage +{ +#if VMA_KHR_MAINTENANCE5 + typedef uint64_t BaseType; // VkFlags64 +#else + typedef uint32_t BaseType; // VkFlags32 +#endif + + static const VmaBufferImageUsage UNKNOWN; + + BaseType Value; + + VmaBufferImageUsage() { *this = UNKNOWN; } + explicit VmaBufferImageUsage(BaseType usage) : Value(usage) { } + VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5); + explicit VmaBufferImageUsage(const VkImageCreateInfo &createInfo); + + bool operator==(const VmaBufferImageUsage& rhs) const { return Value == rhs.Value; } + bool operator!=(const VmaBufferImageUsage& rhs) const { return Value != rhs.Value; } + + bool Contains(BaseType flag) const { return (Value & flag) != 0; } + bool ContainsDeviceAccess() const + { + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same as VK_BUFFER_IMAGE_TRANSFER*. + return (Value & ~BaseType(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + } +}; + +const VmaBufferImageUsage VmaBufferImageUsage::UNKNOWN = VmaBufferImageUsage(0); + +static void swap(VmaBufferImageUsage& lhs, VmaBufferImageUsage& rhs) noexcept +{ + using std::swap; + swap(lhs.Value, rhs.Value); +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkBufferCreateInfo &createInfo, + bool useKhrMaintenance5) +{ +#if VMA_KHR_MAINTENANCE5 + if(useKhrMaintenance5) + { + // If VkBufferCreateInfo::pNext chain contains VkBufferUsageFlags2CreateInfoKHR, + // take usage from it and ignore VkBufferCreateInfo::usage, per specification + // of the VK_KHR_maintenance5 extension. + const VkBufferUsageFlags2CreateInfoKHR* const usageFlags2 = + VmaPnextChainFind(&createInfo, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR); + if(usageFlags2) + { + this->Value = usageFlags2->usage; + return; + } + } +#endif + + this->Value = (BaseType)createInfo.usage; +} + +VmaBufferImageUsage::VmaBufferImageUsage(const VkImageCreateInfo &createInfo) +{ + // Maybe in the future there will be VK_KHR_maintenanceN extension with structure + // VkImageUsageFlags2CreateInfoKHR, like the one for buffers... + + this->Value = (BaseType)createInfo.usage; +} // This is the main algorithm that guides the selection of a memory type best for an allocation - // converts usage to required/preferred/not preferred flags. static bool FindMemoryPreferences( bool isIntegratedGPU, const VmaAllocationCreateInfo& allocCreateInfo, - VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + VmaBufferImageUsage bufImgUsage, VkMemoryPropertyFlags& outRequiredFlags, VkMemoryPropertyFlags& outPreferredFlags, VkMemoryPropertyFlags& outNotPreferredFlags) @@ -3646,13 +3924,14 @@ static bool FindMemoryPreferences( case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: { - if(bufImgUsage == UINT32_MAX) + if(bufImgUsage == VmaBufferImageUsage::UNKNOWN) { - VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known."); + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known." + " Maybe you use VkBufferUsageFlags2CreateInfoKHR but forgot to use VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT?" ); return false; } - // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same VK_BUFFER_IMAGE_TRANSFER*. - const bool deviceAccess = (bufImgUsage & ~(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + + const bool deviceAccess = bufImgUsage.ContainsDeviceAccess(); const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; @@ -3662,18 +3941,21 @@ static bool FindMemoryPreferences( // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. if(hostAccessRandom) { - if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + // Prefer cached. Cannot require it, because some platforms don't have it (e.g. Raspberry Pi - see #362)! + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if (!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) { // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. // Omitting HOST_VISIBLE here is intentional. // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. - outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } else { - // Always CPU memory, cached. - outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + // Always CPU memory. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; } } // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. @@ -3712,19 +3994,18 @@ static bool FindMemoryPreferences( // No CPU access else { - // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory - if(deviceAccess) - { - // ...unless there is a clear preference from the user not to do so. - if(preferHost) - outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - else - outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } + // if(deviceAccess) + // + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory, + // unless there is a clear preference from the user not to do so. + // + // else: + // // No direct GPU access, no CPU access, just transfers. // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or // a "swap file" copy to free some GPU memory (then better CPU memory). // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + if(preferHost) outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; else @@ -3943,7 +4224,7 @@ static void VmaAddDetailedStatistics(VmaDetailedStatistics& inoutStats, const Vm // Helper RAII class to lock a mutex in constructor and unlock it in destructor (at the end of scope). struct VmaMutexLock { - VMA_CLASS_NO_COPY(VmaMutexLock) + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLock) public: VmaMutexLock(VMA_MUTEX& mutex, bool useMutex = true) : m_pMutex(useMutex ? &mutex : VMA_NULL) @@ -3959,7 +4240,7 @@ private: // Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for reading. struct VmaMutexLockRead { - VMA_CLASS_NO_COPY(VmaMutexLockRead) + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockRead) public: VmaMutexLockRead(VMA_RW_MUTEX& mutex, bool useMutex) : m_pMutex(useMutex ? &mutex : VMA_NULL) @@ -3975,7 +4256,7 @@ private: // Helper RAII class to lock a RW mutex in constructor and unlock it in destructor (at the end of scope), for writing. struct VmaMutexLockWrite { - VMA_CLASS_NO_COPY(VmaMutexLockWrite) + VMA_CLASS_NO_COPY_NO_MOVE(VmaMutexLockWrite) public: VmaMutexLockWrite(VMA_RW_MUTEX& mutex, bool useMutex) : m_pMutex(useMutex ? &mutex : VMA_NULL) @@ -3998,11 +4279,11 @@ private: #ifndef _VMA_ATOMIC_TRANSACTIONAL_INCREMENT // An object that increments given atomic but decrements it back in the destructor unless Commit() is called. -template +template struct AtomicTransactionalIncrement { public: - typedef std::atomic AtomicT; + using T = decltype(AtomicT().load()); ~AtomicTransactionalIncrement() { @@ -4010,7 +4291,7 @@ public: --(*m_Atomic); } - void Commit() { m_Atomic = nullptr; } + void Commit() { m_Atomic = VMA_NULL; } T Increment(AtomicT* atomic) { m_Atomic = atomic; @@ -4018,7 +4299,7 @@ public: } private: - AtomicT* m_Atomic = nullptr; + AtomicT* m_Atomic = VMA_NULL; }; #endif // _VMA_ATOMIC_TRANSACTIONAL_INCREMENT @@ -4430,7 +4711,7 @@ allocator can create multiple blocks. template class VmaPoolAllocator { - VMA_CLASS_NO_COPY(VmaPoolAllocator) + VMA_CLASS_NO_COPY_NO_MOVE(VmaPoolAllocator) public: VmaPoolAllocator(const VkAllocationCallbacks* pAllocationCallbacks, uint32_t firstBlockCapacity); ~VmaPoolAllocator(); @@ -4563,7 +4844,7 @@ struct VmaListItem template class VmaRawList { - VMA_CLASS_NO_COPY(VmaRawList) + VMA_CLASS_NO_COPY_NO_MOVE(VmaRawList) public: typedef VmaListItem ItemType; @@ -4826,7 +5107,7 @@ VmaListItem* VmaRawList::InsertAfter(ItemType* pItem, const T& value) template class VmaList { - VMA_CLASS_NO_COPY(VmaList) + VMA_CLASS_NO_COPY_NO_MOVE(VmaList) public: class reverse_iterator; class const_iterator; @@ -5285,99 +5566,6 @@ void VmaIntrusiveLinkedList::RemoveAll() #endif // _VMA_INTRUSIVE_LINKED_LIST_FUNCTIONS #endif // _VMA_INTRUSIVE_LINKED_LIST -// Unused in this version. -#if 0 - -#ifndef _VMA_PAIR -template -struct VmaPair -{ - T1 first; - T2 second; - - VmaPair() : first(), second() {} - VmaPair(const T1& firstSrc, const T2& secondSrc) : first(firstSrc), second(secondSrc) {} -}; - -template -struct VmaPairFirstLess -{ - bool operator()(const VmaPair& lhs, const VmaPair& rhs) const - { - return lhs.first < rhs.first; - } - bool operator()(const VmaPair& lhs, const FirstT& rhsFirst) const - { - return lhs.first < rhsFirst; - } -}; -#endif // _VMA_PAIR - -#ifndef _VMA_MAP -/* Class compatible with subset of interface of std::unordered_map. -KeyT, ValueT must be POD because they will be stored in VmaVector. -*/ -template -class VmaMap -{ -public: - typedef VmaPair PairType; - typedef PairType* iterator; - - VmaMap(const VmaStlAllocator& allocator) : m_Vector(allocator) {} - - iterator begin() { return m_Vector.begin(); } - iterator end() { return m_Vector.end(); } - size_t size() { return m_Vector.size(); } - - void insert(const PairType& pair); - iterator find(const KeyT& key); - void erase(iterator it); - -private: - VmaVector< PairType, VmaStlAllocator> m_Vector; -}; - -#ifndef _VMA_MAP_FUNCTIONS -template -void VmaMap::insert(const PairType& pair) -{ - const size_t indexToInsert = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - pair, - VmaPairFirstLess()) - m_Vector.data(); - VmaVectorInsert(m_Vector, indexToInsert, pair); -} - -template -VmaPair* VmaMap::find(const KeyT& key) -{ - PairType* it = VmaBinaryFindFirstNotLess( - m_Vector.data(), - m_Vector.data() + m_Vector.size(), - key, - VmaPairFirstLess()); - if ((it != m_Vector.end()) && (it->first == key)) - { - return it; - } - else - { - return m_Vector.end(); - } -} - -template -void VmaMap::erase(iterator it) -{ - VmaVectorRemove(m_Vector, it - m_Vector.begin()); -} -#endif // _VMA_MAP_FUNCTIONS -#endif // _VMA_MAP - -#endif // #if 0 - #if !defined(_VMA_STRING_BUILDER) && VMA_STATS_STRING_ENABLED class VmaStringBuilder { @@ -5418,7 +5606,7 @@ void VmaStringBuilder::AddNumber(uint32_t num) char* p = &buf[10]; do { - *--p = '0' + (num % 10); + *--p = '0' + (char)(num % 10); num /= 10; } while (num); Add(p); @@ -5431,7 +5619,7 @@ void VmaStringBuilder::AddNumber(uint64_t num) char* p = &buf[20]; do { - *--p = '0' + (num % 10); + *--p = '0' + (char)(num % 10); num /= 10; } while (num); Add(p); @@ -5453,7 +5641,7 @@ VmaStringBuilder passed to the constructor. */ class VmaJsonWriter { - VMA_CLASS_NO_COPY(VmaJsonWriter) + VMA_CLASS_NO_COPY_NO_MOVE(VmaJsonWriter) public: // sb - string builder to write the document to. Must remain alive for the whole lifetime of this object. VmaJsonWriter(const VkAllocationCallbacks* pAllocationCallbacks, VmaStringBuilder& sb); @@ -5487,7 +5675,6 @@ public: // Posts next part of an open string. The number is converted to decimal characters. void ContinueString(uint32_t n); void ContinueString(uint64_t n); - void ContinueString_Size(size_t n); // Posts next part of an open string. Pointer value is converted to characters // using "%p" formatting - shown as hexadecimal number, e.g.: 000000081276Ad00 void ContinueString_Pointer(const void* ptr); @@ -5497,7 +5684,6 @@ public: // Writes a number value. void WriteNumber(uint32_t n); void WriteNumber(uint64_t n); - void WriteSize(size_t n); // Writes a boolean value - false or true. void WriteBool(bool b); // Writes a null value. @@ -5522,11 +5708,6 @@ private: VmaVector< StackItem, VmaStlAllocator > m_Stack; bool m_InsideString; - // Write size_t for less than 64bits - void WriteSize(size_t n, std::integral_constant) { m_SB.AddNumber(static_cast(n)); } - // Write size_t for 64bits - void WriteSize(size_t n, std::integral_constant) { m_SB.AddNumber(static_cast(n)); } - void BeginValue(bool isString); void WriteIndent(bool oneLess = false); }; @@ -5629,7 +5810,7 @@ void VmaJsonWriter::ContinueString(const char* pStr) { m_SB.Add("\\\""); } - else if (ch >= 32) + else if ((uint8_t)ch >= 32) { m_SB.Add(ch); } @@ -5652,7 +5833,6 @@ void VmaJsonWriter::ContinueString(const char* pStr) break; default: VMA_ASSERT(0 && "Character not currently supported."); - break; } } } @@ -5669,14 +5849,6 @@ void VmaJsonWriter::ContinueString(uint64_t n) m_SB.AddNumber(n); } -void VmaJsonWriter::ContinueString_Size(size_t n) -{ - VMA_ASSERT(m_InsideString); - // Fix for AppleClang incorrect type casting - // TODO: Change to if constexpr when C++17 used as minimal standard - WriteSize(n, std::is_same{}); -} - void VmaJsonWriter::ContinueString_Pointer(const void* ptr) { VMA_ASSERT(m_InsideString); @@ -5708,15 +5880,6 @@ void VmaJsonWriter::WriteNumber(uint64_t n) m_SB.AddNumber(n); } -void VmaJsonWriter::WriteSize(size_t n) -{ - VMA_ASSERT(!m_InsideString); - BeginValue(false); - // Fix for AppleClang incorrect type casting - // TODO: Change to if constexpr when C++17 used as minimal standard - WriteSize(n, std::is_same{}); -} - void VmaJsonWriter::WriteBool(bool b) { VMA_ASSERT(!m_InsideString); @@ -5816,7 +5979,7 @@ static void VmaPrintDetailedStatistics(VmaJsonWriter& json, const VmaDetailedSta class VmaMappingHysteresis { - VMA_CLASS_NO_COPY(VmaMappingHysteresis) + VMA_CLASS_NO_COPY_NO_MOVE(VmaMappingHysteresis) public: VmaMappingHysteresis() = default; @@ -5923,7 +6086,7 @@ Thread-safety: */ class VmaDeviceMemoryBlock { - VMA_CLASS_NO_COPY(VmaDeviceMemoryBlock) + VMA_CLASS_NO_COPY_NO_MOVE(VmaDeviceMemoryBlock) public: VmaBlockMetadata* m_pMetadata; @@ -6065,9 +6228,17 @@ public: void DedicatedAllocUnmap(VmaAllocator hAllocator); #if VMA_STATS_STRING_ENABLED - uint32_t GetBufferImageUsage() const { return m_BufferImageUsage; } - - void InitBufferImageUsage(uint32_t bufferImageUsage); + VmaBufferImageUsage GetBufferImageUsage() const { return m_BufferImageUsage; } + void InitBufferUsage(const VkBufferCreateInfo &createInfo, bool useKhrMaintenance5) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo, useKhrMaintenance5); + } + void InitImageUsage(const VkImageCreateInfo &createInfo) + { + VMA_ASSERT(m_BufferImageUsage == VmaBufferImageUsage::UNKNOWN); + m_BufferImageUsage = VmaBufferImageUsage(createInfo); + } void PrintParameters(class VmaJsonWriter& json) const; #endif @@ -6106,7 +6277,7 @@ private: uint8_t m_MapCount; uint8_t m_Flags; // enum FLAGS #if VMA_STATS_STRING_ENABLED - uint32_t m_BufferImageUsage; // 0 if unknown. + VmaBufferImageUsage m_BufferImageUsage; // 0 if unknown. #endif }; #endif // _VMA_ALLOCATION_T @@ -6146,6 +6317,7 @@ Thread-safe, synchronized internally. */ class VmaDedicatedAllocationList { + VMA_CLASS_NO_COPY_NO_MOVE(VmaDedicatedAllocationList) public: VmaDedicatedAllocationList() {} ~VmaDedicatedAllocationList(); @@ -6180,7 +6352,7 @@ VmaDedicatedAllocationList::~VmaDedicatedAllocationList() if (!m_AllocationList.IsEmpty()) { - VMA_ASSERT(false && "Unfreed dedicated allocations found!"); + VMA_ASSERT_LEAK(false && "Unfreed dedicated allocations found!"); } } @@ -6201,7 +6373,7 @@ bool VmaDedicatedAllocationList::Validate() void VmaDedicatedAllocationList::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) { - for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) { const VkDeviceSize size = item->GetSize(); inoutStats.statistics.blockCount++; @@ -6218,7 +6390,7 @@ void VmaDedicatedAllocationList::AddStatistics(VmaStatistics& inoutStats) inoutStats.blockCount += allocCount; inoutStats.allocationCount += allocCount; - for(auto* item = m_AllocationList.Front(); item != nullptr; item = DedicatedAllocationLinkedList::GetNext(item)) + for(auto* item = m_AllocationList.Front(); item != VMA_NULL; item = DedicatedAllocationLinkedList::GetNext(item)) { const VkDeviceSize size = item->GetSize(); inoutStats.blockBytes += size; @@ -6331,6 +6503,7 @@ in a single VkDeviceMemory block. */ class VmaBlockMetadata { + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata) public: // pAllocationCallbacks, if not null, must be owned externally - alive and unchanged for the whole lifetime of this object. VmaBlockMetadata(const VkAllocationCallbacks* pAllocationCallbacks, @@ -6397,7 +6570,7 @@ public: protected: const VkAllocationCallbacks* GetAllocationCallbacks() const { return m_pAllocationCallbacks; } VkDeviceSize GetBufferImageGranularity() const { return m_BufferImageGranularity; } - VkDeviceSize GetDebugMargin() const { return IsVirtual() ? 0 : VMA_DEBUG_MARGIN; } + VkDeviceSize GetDebugMargin() const { return VkDeviceSize(IsVirtual() ? 0 : VMA_DEBUG_MARGIN); } void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; #if VMA_STATS_STRING_ENABLED @@ -6433,7 +6606,7 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size { if (IsVirtual()) { - VMA_DEBUG_LOG("UNFREED VIRTUAL ALLOCATION; Offset: %llu; Size: %llu; UserData: %p", offset, size, userData); + VMA_LEAK_LOG_FORMAT("UNFREED VIRTUAL ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p", offset, size, userData); } else { @@ -6444,14 +6617,14 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size const char* name = allocation->GetName(); #if VMA_STATS_STRING_ENABLED - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %s; Usage: %u", + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %s; Usage: %" PRIu64, offset, size, userData, name ? name : "vma_empty", VMA_SUBALLOCATION_TYPE_NAMES[allocation->GetSuballocationType()], - allocation->GetBufferImageUsage()); + (uint64_t)allocation->GetBufferImageUsage().Value); #else - VMA_DEBUG_LOG("UNFREED ALLOCATION; Offset: %llu; Size: %llu; UserData: %p; Name: %s; Type: %u", + VMA_LEAK_LOG_FORMAT("UNFREED ALLOCATION; Offset: %" PRIu64 "; Size: %" PRIu64 "; UserData: %p; Name: %s; Type: %u", offset, size, userData, name ? name : "vma_empty", - (uint32_t)allocation->GetSuballocationType()); + (unsigned)allocation->GetSuballocationType()); #endif // VMA_STATS_STRING_ENABLED } @@ -6465,13 +6638,13 @@ void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, json.WriteNumber(GetSize()); json.WriteString("UnusedBytes"); - json.WriteSize(unusedBytes); + json.WriteNumber(unusedBytes); json.WriteString("Allocations"); - json.WriteSize(allocationCount); + json.WriteNumber((uint64_t)allocationCount); json.WriteString("UnusedRanges"); - json.WriteSize(unusedRangeCount); + json.WriteNumber((uint64_t)unusedRangeCount); json.WriteString("Suballocations"); json.BeginArray(); @@ -6765,770 +6938,6 @@ void VmaBlockBufferImageGranularity::AllocPage(RegionInfo& page, uint8_t allocTy #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY_FUNCTIONS #endif // _VMA_BLOCK_BUFFER_IMAGE_GRANULARITY -#if 0 -#ifndef _VMA_BLOCK_METADATA_GENERIC -class VmaBlockMetadata_Generic : public VmaBlockMetadata -{ - friend class VmaDefragmentationAlgorithm_Generic; - friend class VmaDefragmentationAlgorithm_Fast; - VMA_CLASS_NO_COPY(VmaBlockMetadata_Generic) -public: - VmaBlockMetadata_Generic(const VkAllocationCallbacks* pAllocationCallbacks, - VkDeviceSize bufferImageGranularity, bool isVirtual); - virtual ~VmaBlockMetadata_Generic() = default; - - size_t GetAllocationCount() const override { return m_Suballocations.size() - m_FreeCount; } - VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } - bool IsEmpty() const override { return (m_Suballocations.size() == 1) && (m_FreeCount == 1); } - void Free(VmaAllocHandle allocHandle) override { FreeSuballocation(FindAtOffset((VkDeviceSize)allocHandle - 1)); } - VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; }; - - void Init(VkDeviceSize size) override; - bool Validate() const override; - - void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; - void AddStatistics(VmaStatistics& inoutStats) const override; - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; -#endif - - bool CreateAllocationRequest( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) override; - - VkResult CheckCorruption(const void* pBlockData) override; - - void Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - void* userData) override; - - void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; - void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; - VmaAllocHandle GetAllocationListBegin() const override; - VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; - void Clear() override; - void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; - void DebugLogAllAllocations() const override; - -private: - uint32_t m_FreeCount; - VkDeviceSize m_SumFreeSize; - VmaSuballocationList m_Suballocations; - // Suballocations that are free. Sorted by size, ascending. - VmaVector> m_FreeSuballocationsBySize; - - VkDeviceSize AlignAllocationSize(VkDeviceSize size) const { return IsVirtual() ? size : VmaAlignUp(size, (VkDeviceSize)16); } - - VmaSuballocationList::iterator FindAtOffset(VkDeviceSize offset) const; - bool ValidateFreeSuballocationList() const; - - // Checks if requested suballocation with given parameters can be placed in given pFreeSuballocItem. - // If yes, fills pOffset and returns true. If no, returns false. - bool CheckAllocation( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - VmaAllocHandle* pAllocHandle) const; - - // Given free suballocation, it merges it with following one, which must also be free. - void MergeFreeWithNext(VmaSuballocationList::iterator item); - // Releases given suballocation, making it free. - // Merges it with adjacent free suballocations if applicable. - // Returns iterator to new free suballocation at this place. - VmaSuballocationList::iterator FreeSuballocation(VmaSuballocationList::iterator suballocItem); - // Given free suballocation, it inserts it into sorted list of - // m_FreeSuballocationsBySize if it is suitable. - void RegisterFreeSuballocation(VmaSuballocationList::iterator item); - // Given free suballocation, it removes it from sorted list of - // m_FreeSuballocationsBySize if it is suitable. - void UnregisterFreeSuballocation(VmaSuballocationList::iterator item); -}; - -#ifndef _VMA_BLOCK_METADATA_GENERIC_FUNCTIONS -VmaBlockMetadata_Generic::VmaBlockMetadata_Generic(const VkAllocationCallbacks* pAllocationCallbacks, - VkDeviceSize bufferImageGranularity, bool isVirtual) - : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), - m_FreeCount(0), - m_SumFreeSize(0), - m_Suballocations(VmaStlAllocator(pAllocationCallbacks)), - m_FreeSuballocationsBySize(VmaStlAllocator(pAllocationCallbacks)) {} - -void VmaBlockMetadata_Generic::Init(VkDeviceSize size) -{ - VmaBlockMetadata::Init(size); - - m_FreeCount = 1; - m_SumFreeSize = size; - - VmaSuballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = size; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - - m_Suballocations.push_back(suballoc); - m_FreeSuballocationsBySize.push_back(m_Suballocations.begin()); -} - -bool VmaBlockMetadata_Generic::Validate() const -{ - VMA_VALIDATE(!m_Suballocations.empty()); - - // Expected offset of new suballocation as calculated from previous ones. - VkDeviceSize calculatedOffset = 0; - // Expected number of free suballocations as calculated from traversing their list. - uint32_t calculatedFreeCount = 0; - // Expected sum size of free suballocations as calculated from traversing their list. - VkDeviceSize calculatedSumFreeSize = 0; - // Expected number of free suballocations that should be registered in - // m_FreeSuballocationsBySize calculated from traversing their list. - size_t freeSuballocationsToRegister = 0; - // True if previous visited suballocation was free. - bool prevFree = false; - - const VkDeviceSize debugMargin = GetDebugMargin(); - - for (const auto& subAlloc : m_Suballocations) - { - // Actual offset of this suballocation doesn't match expected one. - VMA_VALIDATE(subAlloc.offset == calculatedOffset); - - const bool currFree = (subAlloc.type == VMA_SUBALLOCATION_TYPE_FREE); - // Two adjacent free suballocations are invalid. They should be merged. - VMA_VALIDATE(!prevFree || !currFree); - - VmaAllocation alloc = (VmaAllocation)subAlloc.userData; - if (!IsVirtual()) - { - VMA_VALIDATE(currFree == (alloc == VK_NULL_HANDLE)); - } - - if (currFree) - { - calculatedSumFreeSize += subAlloc.size; - ++calculatedFreeCount; - ++freeSuballocationsToRegister; - - // Margin required between allocations - every free space must be at least that large. - VMA_VALIDATE(subAlloc.size >= debugMargin); - } - else - { - if (!IsVirtual()) - { - VMA_VALIDATE((VkDeviceSize)alloc->GetAllocHandle() == subAlloc.offset + 1); - VMA_VALIDATE(alloc->GetSize() == subAlloc.size); - } - - // Margin required between allocations - previous allocation must be free. - VMA_VALIDATE(debugMargin == 0 || prevFree); - } - - calculatedOffset += subAlloc.size; - prevFree = currFree; - } - - // Number of free suballocations registered in m_FreeSuballocationsBySize doesn't - // match expected one. - VMA_VALIDATE(m_FreeSuballocationsBySize.size() == freeSuballocationsToRegister); - - VkDeviceSize lastSize = 0; - for (size_t i = 0; i < m_FreeSuballocationsBySize.size(); ++i) - { - VmaSuballocationList::iterator suballocItem = m_FreeSuballocationsBySize[i]; - - // Only free suballocations can be registered in m_FreeSuballocationsBySize. - VMA_VALIDATE(suballocItem->type == VMA_SUBALLOCATION_TYPE_FREE); - // They must be sorted by size ascending. - VMA_VALIDATE(suballocItem->size >= lastSize); - - lastSize = suballocItem->size; - } - - // Check if totals match calculated values. - VMA_VALIDATE(ValidateFreeSuballocationList()); - VMA_VALIDATE(calculatedOffset == GetSize()); - VMA_VALIDATE(calculatedSumFreeSize == m_SumFreeSize); - VMA_VALIDATE(calculatedFreeCount == m_FreeCount); - - return true; -} - -void VmaBlockMetadata_Generic::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const -{ - const uint32_t rangeCount = (uint32_t)m_Suballocations.size(); - inoutStats.statistics.blockCount++; - inoutStats.statistics.blockBytes += GetSize(); - - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - VmaAddDetailedStatisticsAllocation(inoutStats, suballoc.size); - else - VmaAddDetailedStatisticsUnusedRange(inoutStats, suballoc.size); - } -} - -void VmaBlockMetadata_Generic::AddStatistics(VmaStatistics& inoutStats) const -{ - inoutStats.blockCount++; - inoutStats.allocationCount += (uint32_t)m_Suballocations.size() - m_FreeCount; - inoutStats.blockBytes += GetSize(); - inoutStats.allocationBytes += GetSize() - m_SumFreeSize; -} - -#if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const -{ - PrintDetailedMap_Begin(json, - m_SumFreeSize, // unusedBytes - m_Suballocations.size() - (size_t)m_FreeCount, // allocationCount - m_FreeCount, // unusedRangeCount - mapRefCount); - - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type == VMA_SUBALLOCATION_TYPE_FREE) - { - PrintDetailedMap_UnusedRange(json, suballoc.offset, suballoc.size); - } - else - { - PrintDetailedMap_Allocation(json, suballoc.offset, suballoc.size, suballoc.userData); - } - } - - PrintDetailedMap_End(json); -} -#endif // VMA_STATS_STRING_ENABLED - -bool VmaBlockMetadata_Generic::CreateAllocationRequest( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(!upperAddress); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(pAllocationRequest != VMA_NULL); - VMA_HEAVY_ASSERT(Validate()); - - allocSize = AlignAllocationSize(allocSize); - - pAllocationRequest->type = VmaAllocationRequestType::Normal; - pAllocationRequest->size = allocSize; - - const VkDeviceSize debugMargin = GetDebugMargin(); - - // There is not enough total free space in this block to fulfill the request: Early return. - if (m_SumFreeSize < allocSize + debugMargin) - { - return false; - } - - // New algorithm, efficiently searching freeSuballocationsBySize. - const size_t freeSuballocCount = m_FreeSuballocationsBySize.size(); - if (freeSuballocCount > 0) - { - if (strategy == 0 || - strategy == VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT) - { - // Find first free suballocation with size not less than allocSize + debugMargin. - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + freeSuballocCount, - allocSize + debugMargin, - VmaSuballocationItemSizeLess()); - size_t index = it - m_FreeSuballocationsBySize.data(); - for (; index < freeSuballocCount; ++index) - { - if (CheckAllocation( - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - &pAllocationRequest->allocHandle)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - else if (strategy == VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET) - { - for (VmaSuballocationList::iterator it = m_Suballocations.begin(); - it != m_Suballocations.end(); - ++it) - { - if (it->type == VMA_SUBALLOCATION_TYPE_FREE && CheckAllocation( - allocSize, - allocAlignment, - allocType, - it, - &pAllocationRequest->allocHandle)) - { - pAllocationRequest->item = it; - return true; - } - } - } - else - { - VMA_ASSERT(strategy & (VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT | VMA_ALLOCATION_CREATE_STRATEGY_MIN_OFFSET_BIT )); - // Search staring from biggest suballocations. - for (size_t index = freeSuballocCount; index--; ) - { - if (CheckAllocation( - allocSize, - allocAlignment, - allocType, - m_FreeSuballocationsBySize[index], - &pAllocationRequest->allocHandle)) - { - pAllocationRequest->item = m_FreeSuballocationsBySize[index]; - return true; - } - } - } - } - - return false; -} - -VkResult VmaBlockMetadata_Generic::CheckCorruption(const void* pBlockData) -{ - for (auto& suballoc : m_Suballocations) - { - if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - { - if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) - { - VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN_COPY; - } - } - } - - return VK_SUCCESS; -} - -void VmaBlockMetadata_Generic::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - void* userData) -{ - VMA_ASSERT(request.type == VmaAllocationRequestType::Normal); - VMA_ASSERT(request.item != m_Suballocations.end()); - VmaSuballocation& suballoc = *request.item; - // Given suballocation is a free block. - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - // Given offset is inside this suballocation. - VMA_ASSERT((VkDeviceSize)request.allocHandle - 1 >= suballoc.offset); - const VkDeviceSize paddingBegin = (VkDeviceSize)request.allocHandle - suballoc.offset - 1; - VMA_ASSERT(suballoc.size >= paddingBegin + request.size); - const VkDeviceSize paddingEnd = suballoc.size - paddingBegin - request.size; - - // Unregister this free suballocation from m_FreeSuballocationsBySize and update - // it to become used. - UnregisterFreeSuballocation(request.item); - - suballoc.offset = (VkDeviceSize)request.allocHandle - 1; - suballoc.size = request.size; - suballoc.type = type; - suballoc.userData = userData; - - // If there are any free bytes remaining at the end, insert new free suballocation after current one. - if (paddingEnd) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = suballoc.offset + suballoc.size; - paddingSuballoc.size = paddingEnd; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - VmaSuballocationList::iterator next = request.item; - ++next; - const VmaSuballocationList::iterator paddingEndItem = - m_Suballocations.insert(next, paddingSuballoc); - RegisterFreeSuballocation(paddingEndItem); - } - - // If there are any free bytes remaining at the beginning, insert new free suballocation before current one. - if (paddingBegin) - { - VmaSuballocation paddingSuballoc = {}; - paddingSuballoc.offset = suballoc.offset - paddingBegin; - paddingSuballoc.size = paddingBegin; - paddingSuballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - const VmaSuballocationList::iterator paddingBeginItem = - m_Suballocations.insert(request.item, paddingSuballoc); - RegisterFreeSuballocation(paddingBeginItem); - } - - // Update totals. - m_FreeCount = m_FreeCount - 1; - if (paddingBegin > 0) - { - ++m_FreeCount; - } - if (paddingEnd > 0) - { - ++m_FreeCount; - } - m_SumFreeSize -= request.size; -} - -void VmaBlockMetadata_Generic::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) -{ - outInfo.offset = (VkDeviceSize)allocHandle - 1; - const VmaSuballocation& suballoc = *FindAtOffset(outInfo.offset); - outInfo.size = suballoc.size; - outInfo.pUserData = suballoc.userData; -} - -void* VmaBlockMetadata_Generic::GetAllocationUserData(VmaAllocHandle allocHandle) const -{ - return FindAtOffset((VkDeviceSize)allocHandle - 1)->userData; -} - -VmaAllocHandle VmaBlockMetadata_Generic::GetAllocationListBegin() const -{ - if (IsEmpty()) - return VK_NULL_HANDLE; - - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - return (VmaAllocHandle)(suballoc.offset + 1); - } - VMA_ASSERT(false && "Should contain at least 1 allocation!"); - return VK_NULL_HANDLE; -} - -VmaAllocHandle VmaBlockMetadata_Generic::GetNextAllocation(VmaAllocHandle prevAlloc) const -{ - VmaSuballocationList::const_iterator prev = FindAtOffset((VkDeviceSize)prevAlloc - 1); - - for (VmaSuballocationList::const_iterator it = ++prev; it != m_Suballocations.end(); ++it) - { - if (it->type != VMA_SUBALLOCATION_TYPE_FREE) - return (VmaAllocHandle)(it->offset + 1); - } - return VK_NULL_HANDLE; -} - -void VmaBlockMetadata_Generic::Clear() -{ - const VkDeviceSize size = GetSize(); - - VMA_ASSERT(IsVirtual()); - m_FreeCount = 1; - m_SumFreeSize = size; - m_Suballocations.clear(); - m_FreeSuballocationsBySize.clear(); - - VmaSuballocation suballoc = {}; - suballoc.offset = 0; - suballoc.size = size; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - m_Suballocations.push_back(suballoc); - - m_FreeSuballocationsBySize.push_back(m_Suballocations.begin()); -} - -void VmaBlockMetadata_Generic::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) -{ - VmaSuballocation& suballoc = *FindAtOffset((VkDeviceSize)allocHandle - 1); - suballoc.userData = userData; -} - -void VmaBlockMetadata_Generic::DebugLogAllAllocations() const -{ - for (const auto& suballoc : m_Suballocations) - { - if (suballoc.type != VMA_SUBALLOCATION_TYPE_FREE) - DebugLogAllocation(suballoc.offset, suballoc.size, suballoc.userData); - } -} - -VmaSuballocationList::iterator VmaBlockMetadata_Generic::FindAtOffset(VkDeviceSize offset) const -{ - VMA_HEAVY_ASSERT(!m_Suballocations.empty()); - const VkDeviceSize last = m_Suballocations.rbegin()->offset; - if (last == offset) - return m_Suballocations.rbegin().drop_const(); - const VkDeviceSize first = m_Suballocations.begin()->offset; - if (first == offset) - return m_Suballocations.begin().drop_const(); - - const size_t suballocCount = m_Suballocations.size(); - const VkDeviceSize step = (last - first + m_Suballocations.begin()->size) / suballocCount; - auto findSuballocation = [&](auto begin, auto end) -> VmaSuballocationList::iterator - { - for (auto suballocItem = begin; - suballocItem != end; - ++suballocItem) - { - if (suballocItem->offset == offset) - return suballocItem.drop_const(); - } - VMA_ASSERT(false && "Not found!"); - return m_Suballocations.end().drop_const(); - }; - // If requested offset is closer to the end of range, search from the end - if (offset - first > suballocCount * step / 2) - { - return findSuballocation(m_Suballocations.rbegin(), m_Suballocations.rend()); - } - return findSuballocation(m_Suballocations.begin(), m_Suballocations.end()); -} - -bool VmaBlockMetadata_Generic::ValidateFreeSuballocationList() const -{ - VkDeviceSize lastSize = 0; - for (size_t i = 0, count = m_FreeSuballocationsBySize.size(); i < count; ++i) - { - const VmaSuballocationList::iterator it = m_FreeSuballocationsBySize[i]; - - VMA_VALIDATE(it->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_VALIDATE(it->size >= lastSize); - lastSize = it->size; - } - return true; -} - -bool VmaBlockMetadata_Generic::CheckAllocation( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - VmaSuballocationType allocType, - VmaSuballocationList::const_iterator suballocItem, - VmaAllocHandle* pAllocHandle) const -{ - VMA_ASSERT(allocSize > 0); - VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(suballocItem != m_Suballocations.cend()); - VMA_ASSERT(pAllocHandle != VMA_NULL); - - const VkDeviceSize debugMargin = GetDebugMargin(); - const VkDeviceSize bufferImageGranularity = GetBufferImageGranularity(); - - const VmaSuballocation& suballoc = *suballocItem; - VMA_ASSERT(suballoc.type == VMA_SUBALLOCATION_TYPE_FREE); - - // Size of this suballocation is too small for this request: Early return. - if (suballoc.size < allocSize) - { - return false; - } - - // Start from offset equal to beginning of this suballocation. - VkDeviceSize offset = suballoc.offset + (suballocItem == m_Suballocations.cbegin() ? 0 : GetDebugMargin()); - - // Apply debugMargin from the end of previous alloc. - if (debugMargin > 0) - { - offset += debugMargin; - } - - // Apply alignment. - offset = VmaAlignUp(offset, allocAlignment); - - // Check previous suballocations for BufferImageGranularity conflicts. - // Make bigger alignment if necessary. - if (bufferImageGranularity > 1 && bufferImageGranularity != allocAlignment) - { - bool bufferImageGranularityConflict = false; - VmaSuballocationList::const_iterator prevSuballocItem = suballocItem; - while (prevSuballocItem != m_Suballocations.cbegin()) - { - --prevSuballocItem; - const VmaSuballocation& prevSuballoc = *prevSuballocItem; - if (VmaBlocksOnSamePage(prevSuballoc.offset, prevSuballoc.size, offset, bufferImageGranularity)) - { - if (VmaIsBufferImageGranularityConflict(prevSuballoc.type, allocType)) - { - bufferImageGranularityConflict = true; - break; - } - } - else - // Already on previous page. - break; - } - if (bufferImageGranularityConflict) - { - offset = VmaAlignUp(offset, bufferImageGranularity); - } - } - - // Calculate padding at the beginning based on current offset. - const VkDeviceSize paddingBegin = offset - suballoc.offset; - - // Fail if requested size plus margin after is bigger than size of this suballocation. - if (paddingBegin + allocSize + debugMargin > suballoc.size) - { - return false; - } - - // Check next suballocations for BufferImageGranularity conflicts. - // If conflict exists, allocation cannot be made here. - if (allocSize % bufferImageGranularity || offset % bufferImageGranularity) - { - VmaSuballocationList::const_iterator nextSuballocItem = suballocItem; - ++nextSuballocItem; - while (nextSuballocItem != m_Suballocations.cend()) - { - const VmaSuballocation& nextSuballoc = *nextSuballocItem; - if (VmaBlocksOnSamePage(offset, allocSize, nextSuballoc.offset, bufferImageGranularity)) - { - if (VmaIsBufferImageGranularityConflict(allocType, nextSuballoc.type)) - { - return false; - } - } - else - { - // Already on next page. - break; - } - ++nextSuballocItem; - } - } - - *pAllocHandle = (VmaAllocHandle)(offset + 1); - // All tests passed: Success. pAllocHandle is already filled. - return true; -} - -void VmaBlockMetadata_Generic::MergeFreeWithNext(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item != m_Suballocations.end()); - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - - VmaSuballocationList::iterator nextItem = item; - ++nextItem; - VMA_ASSERT(nextItem != m_Suballocations.end()); - VMA_ASSERT(nextItem->type == VMA_SUBALLOCATION_TYPE_FREE); - - item->size += nextItem->size; - --m_FreeCount; - m_Suballocations.erase(nextItem); -} - -VmaSuballocationList::iterator VmaBlockMetadata_Generic::FreeSuballocation(VmaSuballocationList::iterator suballocItem) -{ - // Change this suballocation to be marked as free. - VmaSuballocation& suballoc = *suballocItem; - suballoc.type = VMA_SUBALLOCATION_TYPE_FREE; - suballoc.userData = VMA_NULL; - - // Update totals. - ++m_FreeCount; - m_SumFreeSize += suballoc.size; - - // Merge with previous and/or next suballocation if it's also free. - bool mergeWithNext = false; - bool mergeWithPrev = false; - - VmaSuballocationList::iterator nextItem = suballocItem; - ++nextItem; - if ((nextItem != m_Suballocations.end()) && (nextItem->type == VMA_SUBALLOCATION_TYPE_FREE)) - { - mergeWithNext = true; - } - - VmaSuballocationList::iterator prevItem = suballocItem; - if (suballocItem != m_Suballocations.begin()) - { - --prevItem; - if (prevItem->type == VMA_SUBALLOCATION_TYPE_FREE) - { - mergeWithPrev = true; - } - } - - if (mergeWithNext) - { - UnregisterFreeSuballocation(nextItem); - MergeFreeWithNext(suballocItem); - } - - if (mergeWithPrev) - { - UnregisterFreeSuballocation(prevItem); - MergeFreeWithNext(prevItem); - RegisterFreeSuballocation(prevItem); - return prevItem; - } - else - { - RegisterFreeSuballocation(suballocItem); - return suballocItem; - } -} - -void VmaBlockMetadata_Generic::RegisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - if (m_FreeSuballocationsBySize.empty()) - { - m_FreeSuballocationsBySize.push_back(item); - } - else - { - VmaVectorInsertSorted(m_FreeSuballocationsBySize, item); - } - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} - -void VmaBlockMetadata_Generic::UnregisterFreeSuballocation(VmaSuballocationList::iterator item) -{ - VMA_ASSERT(item->type == VMA_SUBALLOCATION_TYPE_FREE); - VMA_ASSERT(item->size > 0); - - // You may want to enable this validation at the beginning or at the end of - // this function, depending on what do you want to check. - VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); - - VmaSuballocationList::iterator* const it = VmaBinaryFindFirstNotLess( - m_FreeSuballocationsBySize.data(), - m_FreeSuballocationsBySize.data() + m_FreeSuballocationsBySize.size(), - item, - VmaSuballocationItemSizeLess()); - for (size_t index = it - m_FreeSuballocationsBySize.data(); - index < m_FreeSuballocationsBySize.size(); - ++index) - { - if (m_FreeSuballocationsBySize[index] == item) - { - VmaVectorRemove(m_FreeSuballocationsBySize, index); - return; - } - VMA_ASSERT((m_FreeSuballocationsBySize[index]->size == item->size) && "Not found."); - } - VMA_ASSERT(0 && "Not found."); - - //VMA_HEAVY_ASSERT(ValidateFreeSuballocationList()); -} -#endif // _VMA_BLOCK_METADATA_GENERIC_FUNCTIONS -#endif // _VMA_BLOCK_METADATA_GENERIC -#endif // #if 0 - #ifndef _VMA_BLOCK_METADATA_LINEAR /* Allocations and their references in internal data structure look like this: @@ -7610,7 +7019,7 @@ GetSize() +-------+ */ class VmaBlockMetadata_Linear : public VmaBlockMetadata { - VMA_CLASS_NO_COPY(VmaBlockMetadata_Linear) + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_Linear) public: VmaBlockMetadata_Linear(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize bufferImageGranularity, bool isVirtual); @@ -7618,7 +7027,7 @@ public: VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize; } bool IsEmpty() const override { return GetAllocationCount() == 0; } - VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; }; + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; } void Init(VkDeviceSize size) override; bool Validate() const override; @@ -8091,30 +7500,17 @@ void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - // 1. Process free space before this allocation. - if (lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - } - - // 2. Process this allocation. + // Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - // 3. Prepare for next iteration. + // Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc2ndIndex; } // We are at the end. else { - if (lastOffset < freeSpace2ndTo1stEnd) - { - // There is free space from lastOffset to freeSpace2ndTo1stEnd. - const VkDeviceSize unusedRangeSize = freeSpace2ndTo1stEnd - lastOffset; - } - // End of loop. lastOffset = freeSpace2ndTo1stEnd; } @@ -8138,30 +7534,17 @@ void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const { const VmaSuballocation& suballoc = suballocations1st[nextAlloc1stIndex]; - // 1. Process free space before this allocation. - if (lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - } - - // 2. Process this allocation. + // Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - // 3. Prepare for next iteration. + // Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; ++nextAlloc1stIndex; } // We are at the end. else { - if (lastOffset < freeSpace1stTo2ndEnd) - { - // There is free space from lastOffset to freeSpace1stTo2ndEnd. - const VkDeviceSize unusedRangeSize = freeSpace1stTo2ndEnd - lastOffset; - } - // End of loop. lastOffset = freeSpace1stTo2ndEnd; } @@ -8184,30 +7567,17 @@ void VmaBlockMetadata_Linear::AddStatistics(VmaStatistics& inoutStats) const { const VmaSuballocation& suballoc = suballocations2nd[nextAlloc2ndIndex]; - // 1. Process free space before this allocation. - if (lastOffset < suballoc.offset) - { - // There is free space from lastOffset to suballoc.offset. - const VkDeviceSize unusedRangeSize = suballoc.offset - lastOffset; - } - - // 2. Process this allocation. + // Process this allocation. // There is allocation with suballoc.offset, suballoc.size. ++inoutStats.allocationCount; - // 3. Prepare for next iteration. + // Prepare for next iteration. lastOffset = suballoc.offset + suballoc.size; --nextAlloc2ndIndex; } // We are at the end. else { - if (lastOffset < size) - { - // There is free space from lastOffset to size. - const VkDeviceSize unusedRangeSize = size - lastOffset; - } - // End of loop. lastOffset = size; } @@ -8318,7 +7688,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const // We are at the end. else { - if (lastOffset < size) + if (lastOffset < freeSpace1stTo2ndEnd) { // There is free space from lastOffset to freeSpace1stTo2ndEnd. ++unusedRangeCount; @@ -8544,6 +7914,10 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest( VMA_ASSERT(allocType != VMA_SUBALLOCATION_TYPE_FREE); VMA_ASSERT(pAllocationRequest != VMA_NULL); VMA_HEAVY_ASSERT(Validate()); + + if(allocSize > GetSize()) + return false; + pAllocationRequest->size = allocSize; return upperAddress ? CreateAllocationRequest_UpperAddress( @@ -9253,713 +8627,6 @@ bool VmaBlockMetadata_Linear::CreateAllocationRequest_UpperAddress( #endif // _VMA_BLOCK_METADATA_LINEAR_FUNCTIONS #endif // _VMA_BLOCK_METADATA_LINEAR -#if 0 -#ifndef _VMA_BLOCK_METADATA_BUDDY -/* -- GetSize() is the original size of allocated memory block. -- m_UsableSize is this size aligned down to a power of two. - All allocations and calculations happen relative to m_UsableSize. -- GetUnusableSize() is the difference between them. - It is reported as separate, unused range, not available for allocations. - -Node at level 0 has size = m_UsableSize. -Each next level contains nodes with size 2 times smaller than current level. -m_LevelCount is the maximum number of levels to use in the current object. -*/ -class VmaBlockMetadata_Buddy : public VmaBlockMetadata -{ - VMA_CLASS_NO_COPY(VmaBlockMetadata_Buddy) -public: - VmaBlockMetadata_Buddy(const VkAllocationCallbacks* pAllocationCallbacks, - VkDeviceSize bufferImageGranularity, bool isVirtual); - virtual ~VmaBlockMetadata_Buddy(); - - size_t GetAllocationCount() const override { return m_AllocationCount; } - VkDeviceSize GetSumFreeSize() const override { return m_SumFreeSize + GetUnusableSize(); } - bool IsEmpty() const override { return m_Root->type == Node::TYPE_FREE; } - VkResult CheckCorruption(const void* pBlockData) override { return VK_ERROR_FEATURE_NOT_PRESENT; } - VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return (VkDeviceSize)allocHandle - 1; }; - void DebugLogAllAllocations() const override { DebugLogAllAllocationNode(m_Root, 0); } - - void Init(VkDeviceSize size) override; - bool Validate() const override; - - void AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const override; - void AddStatistics(VmaStatistics& inoutStats) const override; - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; -#endif - - bool CreateAllocationRequest( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) override; - - void Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - void* userData) override; - - void Free(VmaAllocHandle allocHandle) override; - void GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) override; - void* GetAllocationUserData(VmaAllocHandle allocHandle) const override; - VmaAllocHandle GetAllocationListBegin() const override; - VmaAllocHandle GetNextAllocation(VmaAllocHandle prevAlloc) const override; - void Clear() override; - void SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) override; - -private: - static const size_t MAX_LEVELS = 48; - - struct ValidationContext - { - size_t calculatedAllocationCount = 0; - size_t calculatedFreeCount = 0; - VkDeviceSize calculatedSumFreeSize = 0; - }; - struct Node - { - VkDeviceSize offset; - enum TYPE - { - TYPE_FREE, - TYPE_ALLOCATION, - TYPE_SPLIT, - TYPE_COUNT - } type; - Node* parent; - Node* buddy; - - union - { - struct - { - Node* prev; - Node* next; - } free; - struct - { - void* userData; - } allocation; - struct - { - Node* leftChild; - } split; - }; - }; - - // Size of the memory block aligned down to a power of two. - VkDeviceSize m_UsableSize; - uint32_t m_LevelCount; - VmaPoolAllocator m_NodeAllocator; - Node* m_Root; - struct - { - Node* front; - Node* back; - } m_FreeList[MAX_LEVELS]; - - // Number of nodes in the tree with type == TYPE_ALLOCATION. - size_t m_AllocationCount; - // Number of nodes in the tree with type == TYPE_FREE. - size_t m_FreeCount; - // Doesn't include space wasted due to internal fragmentation - allocation sizes are just aligned up to node sizes. - // Doesn't include unusable size. - VkDeviceSize m_SumFreeSize; - - VkDeviceSize GetUnusableSize() const { return GetSize() - m_UsableSize; } - VkDeviceSize LevelToNodeSize(uint32_t level) const { return m_UsableSize >> level; } - - VkDeviceSize AlignAllocationSize(VkDeviceSize size) const - { - if (!IsVirtual()) - { - size = VmaAlignUp(size, (VkDeviceSize)16); - } - return VmaNextPow2(size); - } - Node* FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const; - void DeleteNodeChildren(Node* node); - bool ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const; - uint32_t AllocSizeToLevel(VkDeviceSize allocSize) const; - void AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const; - // Adds node to the front of FreeList at given level. - // node->type must be FREE. - // node->free.prev, next can be undefined. - void AddToFreeListFront(uint32_t level, Node* node); - // Removes node from FreeList at given level. - // node->type must be FREE. - // node->free.prev, next stay untouched. - void RemoveFromFreeList(uint32_t level, Node* node); - void DebugLogAllAllocationNode(Node* node, uint32_t level) const; - -#if VMA_STATS_STRING_ENABLED - void PrintDetailedMapNode(class VmaJsonWriter& json, const Node* node, VkDeviceSize levelNodeSize) const; -#endif -}; - -#ifndef _VMA_BLOCK_METADATA_BUDDY_FUNCTIONS -VmaBlockMetadata_Buddy::VmaBlockMetadata_Buddy(const VkAllocationCallbacks* pAllocationCallbacks, - VkDeviceSize bufferImageGranularity, bool isVirtual) - : VmaBlockMetadata(pAllocationCallbacks, bufferImageGranularity, isVirtual), - m_NodeAllocator(pAllocationCallbacks, 32), // firstBlockCapacity - m_Root(VMA_NULL), - m_AllocationCount(0), - m_FreeCount(1), - m_SumFreeSize(0) -{ - memset(m_FreeList, 0, sizeof(m_FreeList)); -} - -VmaBlockMetadata_Buddy::~VmaBlockMetadata_Buddy() -{ - DeleteNodeChildren(m_Root); - m_NodeAllocator.Free(m_Root); -} - -void VmaBlockMetadata_Buddy::Init(VkDeviceSize size) -{ - VmaBlockMetadata::Init(size); - - m_UsableSize = VmaPrevPow2(size); - m_SumFreeSize = m_UsableSize; - - // Calculate m_LevelCount. - const VkDeviceSize minNodeSize = IsVirtual() ? 1 : 16; - m_LevelCount = 1; - while (m_LevelCount < MAX_LEVELS && - LevelToNodeSize(m_LevelCount) >= minNodeSize) - { - ++m_LevelCount; - } - - Node* rootNode = m_NodeAllocator.Alloc(); - rootNode->offset = 0; - rootNode->type = Node::TYPE_FREE; - rootNode->parent = VMA_NULL; - rootNode->buddy = VMA_NULL; - - m_Root = rootNode; - AddToFreeListFront(0, rootNode); -} - -bool VmaBlockMetadata_Buddy::Validate() const -{ - // Validate tree. - ValidationContext ctx; - if (!ValidateNode(ctx, VMA_NULL, m_Root, 0, LevelToNodeSize(0))) - { - VMA_VALIDATE(false && "ValidateNode failed."); - } - VMA_VALIDATE(m_AllocationCount == ctx.calculatedAllocationCount); - VMA_VALIDATE(m_SumFreeSize == ctx.calculatedSumFreeSize); - - // Validate free node lists. - for (uint32_t level = 0; level < m_LevelCount; ++level) - { - VMA_VALIDATE(m_FreeList[level].front == VMA_NULL || - m_FreeList[level].front->free.prev == VMA_NULL); - - for (Node* node = m_FreeList[level].front; - node != VMA_NULL; - node = node->free.next) - { - VMA_VALIDATE(node->type == Node::TYPE_FREE); - - if (node->free.next == VMA_NULL) - { - VMA_VALIDATE(m_FreeList[level].back == node); - } - else - { - VMA_VALIDATE(node->free.next->free.prev == node); - } - } - } - - // Validate that free lists ar higher levels are empty. - for (uint32_t level = m_LevelCount; level < MAX_LEVELS; ++level) - { - VMA_VALIDATE(m_FreeList[level].front == VMA_NULL && m_FreeList[level].back == VMA_NULL); - } - - return true; -} - -void VmaBlockMetadata_Buddy::AddDetailedStatistics(VmaDetailedStatistics& inoutStats) const -{ - inoutStats.statistics.blockCount++; - inoutStats.statistics.blockBytes += GetSize(); - - AddNodeToDetailedStatistics(inoutStats, m_Root, LevelToNodeSize(0)); - - const VkDeviceSize unusableSize = GetUnusableSize(); - if (unusableSize > 0) - VmaAddDetailedStatisticsUnusedRange(inoutStats, unusableSize); -} - -void VmaBlockMetadata_Buddy::AddStatistics(VmaStatistics& inoutStats) const -{ - inoutStats.blockCount++; - inoutStats.allocationCount += (uint32_t)m_AllocationCount; - inoutStats.blockBytes += GetSize(); - inoutStats.allocationBytes += GetSize() - m_SumFreeSize; -} - -#if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const -{ - VmaDetailedStatistics stats; - VmaClearDetailedStatistics(stats); - AddDetailedStatistics(stats); - - PrintDetailedMap_Begin( - json, - stats.statistics.blockBytes - stats.statistics.allocationBytes, - stats.statistics.allocationCount, - stats.unusedRangeCount, - mapRefCount); - - PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); - - const VkDeviceSize unusableSize = GetUnusableSize(); - if (unusableSize > 0) - { - PrintDetailedMap_UnusedRange(json, - m_UsableSize, // offset - unusableSize); // size - } - - PrintDetailedMap_End(json); -} -#endif // VMA_STATS_STRING_ENABLED - -bool VmaBlockMetadata_Buddy::CreateAllocationRequest( - VkDeviceSize allocSize, - VkDeviceSize allocAlignment, - bool upperAddress, - VmaSuballocationType allocType, - uint32_t strategy, - VmaAllocationRequest* pAllocationRequest) -{ - VMA_ASSERT(!upperAddress && "VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT can be used only with linear algorithm."); - - allocSize = AlignAllocationSize(allocSize); - - // Simple way to respect bufferImageGranularity. May be optimized some day. - // Whenever it might be an OPTIMAL image... - if (allocType == VMA_SUBALLOCATION_TYPE_UNKNOWN || - allocType == VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN || - allocType == VMA_SUBALLOCATION_TYPE_IMAGE_OPTIMAL) - { - allocAlignment = VMA_MAX(allocAlignment, GetBufferImageGranularity()); - allocSize = VmaAlignUp(allocSize, GetBufferImageGranularity()); - } - - if (allocSize > m_UsableSize) - { - return false; - } - - const uint32_t targetLevel = AllocSizeToLevel(allocSize); - for (uint32_t level = targetLevel; level--; ) - { - for (Node* freeNode = m_FreeList[level].front; - freeNode != VMA_NULL; - freeNode = freeNode->free.next) - { - if (freeNode->offset % allocAlignment == 0) - { - pAllocationRequest->type = VmaAllocationRequestType::Normal; - pAllocationRequest->allocHandle = (VmaAllocHandle)(freeNode->offset + 1); - pAllocationRequest->size = allocSize; - pAllocationRequest->customData = (void*)(uintptr_t)level; - return true; - } - } - } - - return false; -} - -void VmaBlockMetadata_Buddy::Alloc( - const VmaAllocationRequest& request, - VmaSuballocationType type, - void* userData) -{ - VMA_ASSERT(request.type == VmaAllocationRequestType::Normal); - - const uint32_t targetLevel = AllocSizeToLevel(request.size); - uint32_t currLevel = (uint32_t)(uintptr_t)request.customData; - - Node* currNode = m_FreeList[currLevel].front; - VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); - const VkDeviceSize offset = (VkDeviceSize)request.allocHandle - 1; - while (currNode->offset != offset) - { - currNode = currNode->free.next; - VMA_ASSERT(currNode != VMA_NULL && currNode->type == Node::TYPE_FREE); - } - - // Go down, splitting free nodes. - while (currLevel < targetLevel) - { - // currNode is already first free node at currLevel. - // Remove it from list of free nodes at this currLevel. - RemoveFromFreeList(currLevel, currNode); - - const uint32_t childrenLevel = currLevel + 1; - - // Create two free sub-nodes. - Node* leftChild = m_NodeAllocator.Alloc(); - Node* rightChild = m_NodeAllocator.Alloc(); - - leftChild->offset = currNode->offset; - leftChild->type = Node::TYPE_FREE; - leftChild->parent = currNode; - leftChild->buddy = rightChild; - - rightChild->offset = currNode->offset + LevelToNodeSize(childrenLevel); - rightChild->type = Node::TYPE_FREE; - rightChild->parent = currNode; - rightChild->buddy = leftChild; - - // Convert current currNode to split type. - currNode->type = Node::TYPE_SPLIT; - currNode->split.leftChild = leftChild; - - // Add child nodes to free list. Order is important! - AddToFreeListFront(childrenLevel, rightChild); - AddToFreeListFront(childrenLevel, leftChild); - - ++m_FreeCount; - ++currLevel; - currNode = m_FreeList[currLevel].front; - - /* - We can be sure that currNode, as left child of node previously split, - also fulfills the alignment requirement. - */ - } - - // Remove from free list. - VMA_ASSERT(currLevel == targetLevel && - currNode != VMA_NULL && - currNode->type == Node::TYPE_FREE); - RemoveFromFreeList(currLevel, currNode); - - // Convert to allocation node. - currNode->type = Node::TYPE_ALLOCATION; - currNode->allocation.userData = userData; - - ++m_AllocationCount; - --m_FreeCount; - m_SumFreeSize -= request.size; -} - -void VmaBlockMetadata_Buddy::GetAllocationInfo(VmaAllocHandle allocHandle, VmaVirtualAllocationInfo& outInfo) -{ - uint32_t level = 0; - outInfo.offset = (VkDeviceSize)allocHandle - 1; - const Node* const node = FindAllocationNode(outInfo.offset, level); - outInfo.size = LevelToNodeSize(level); - outInfo.pUserData = node->allocation.userData; -} - -void* VmaBlockMetadata_Buddy::GetAllocationUserData(VmaAllocHandle allocHandle) const -{ - uint32_t level = 0; - const Node* const node = FindAllocationNode((VkDeviceSize)allocHandle - 1, level); - return node->allocation.userData; -} - -VmaAllocHandle VmaBlockMetadata_Buddy::GetAllocationListBegin() const -{ - // Function only used for defragmentation, which is disabled for this algorithm - return VK_NULL_HANDLE; -} - -VmaAllocHandle VmaBlockMetadata_Buddy::GetNextAllocation(VmaAllocHandle prevAlloc) const -{ - // Function only used for defragmentation, which is disabled for this algorithm - return VK_NULL_HANDLE; -} - -void VmaBlockMetadata_Buddy::DeleteNodeChildren(Node* node) -{ - if (node->type == Node::TYPE_SPLIT) - { - DeleteNodeChildren(node->split.leftChild->buddy); - DeleteNodeChildren(node->split.leftChild); - const VkAllocationCallbacks* allocationCallbacks = GetAllocationCallbacks(); - m_NodeAllocator.Free(node->split.leftChild->buddy); - m_NodeAllocator.Free(node->split.leftChild); - } -} - -void VmaBlockMetadata_Buddy::Clear() -{ - DeleteNodeChildren(m_Root); - m_Root->type = Node::TYPE_FREE; - m_AllocationCount = 0; - m_FreeCount = 1; - m_SumFreeSize = m_UsableSize; -} - -void VmaBlockMetadata_Buddy::SetAllocationUserData(VmaAllocHandle allocHandle, void* userData) -{ - uint32_t level = 0; - Node* const node = FindAllocationNode((VkDeviceSize)allocHandle - 1, level); - node->allocation.userData = userData; -} - -VmaBlockMetadata_Buddy::Node* VmaBlockMetadata_Buddy::FindAllocationNode(VkDeviceSize offset, uint32_t& outLevel) const -{ - Node* node = m_Root; - VkDeviceSize nodeOffset = 0; - outLevel = 0; - VkDeviceSize levelNodeSize = LevelToNodeSize(0); - while (node->type == Node::TYPE_SPLIT) - { - const VkDeviceSize nextLevelNodeSize = levelNodeSize >> 1; - if (offset < nodeOffset + nextLevelNodeSize) - { - node = node->split.leftChild; - } - else - { - node = node->split.leftChild->buddy; - nodeOffset += nextLevelNodeSize; - } - ++outLevel; - levelNodeSize = nextLevelNodeSize; - } - - VMA_ASSERT(node != VMA_NULL && node->type == Node::TYPE_ALLOCATION); - return node; -} - -bool VmaBlockMetadata_Buddy::ValidateNode(ValidationContext& ctx, const Node* parent, const Node* curr, uint32_t level, VkDeviceSize levelNodeSize) const -{ - VMA_VALIDATE(level < m_LevelCount); - VMA_VALIDATE(curr->parent == parent); - VMA_VALIDATE((curr->buddy == VMA_NULL) == (parent == VMA_NULL)); - VMA_VALIDATE(curr->buddy == VMA_NULL || curr->buddy->buddy == curr); - switch (curr->type) - { - case Node::TYPE_FREE: - // curr->free.prev, next are validated separately. - ctx.calculatedSumFreeSize += levelNodeSize; - ++ctx.calculatedFreeCount; - break; - case Node::TYPE_ALLOCATION: - ++ctx.calculatedAllocationCount; - if (!IsVirtual()) - { - VMA_VALIDATE(curr->allocation.userData != VMA_NULL); - } - break; - case Node::TYPE_SPLIT: - { - const uint32_t childrenLevel = level + 1; - const VkDeviceSize childrenLevelNodeSize = levelNodeSize >> 1; - const Node* const leftChild = curr->split.leftChild; - VMA_VALIDATE(leftChild != VMA_NULL); - VMA_VALIDATE(leftChild->offset == curr->offset); - if (!ValidateNode(ctx, curr, leftChild, childrenLevel, childrenLevelNodeSize)) - { - VMA_VALIDATE(false && "ValidateNode for left child failed."); - } - const Node* const rightChild = leftChild->buddy; - VMA_VALIDATE(rightChild->offset == curr->offset + childrenLevelNodeSize); - if (!ValidateNode(ctx, curr, rightChild, childrenLevel, childrenLevelNodeSize)) - { - VMA_VALIDATE(false && "ValidateNode for right child failed."); - } - } - break; - default: - return false; - } - - return true; -} - -uint32_t VmaBlockMetadata_Buddy::AllocSizeToLevel(VkDeviceSize allocSize) const -{ - // I know this could be optimized somehow e.g. by using std::log2p1 from C++20. - uint32_t level = 0; - VkDeviceSize currLevelNodeSize = m_UsableSize; - VkDeviceSize nextLevelNodeSize = currLevelNodeSize >> 1; - while (allocSize <= nextLevelNodeSize && level + 1 < m_LevelCount) - { - ++level; - currLevelNodeSize >>= 1; - nextLevelNodeSize >>= 1; - } - return level; -} - -void VmaBlockMetadata_Buddy::Free(VmaAllocHandle allocHandle) -{ - uint32_t level = 0; - Node* node = FindAllocationNode((VkDeviceSize)allocHandle - 1, level); - - ++m_FreeCount; - --m_AllocationCount; - m_SumFreeSize += LevelToNodeSize(level); - - node->type = Node::TYPE_FREE; - - // Join free nodes if possible. - while (level > 0 && node->buddy->type == Node::TYPE_FREE) - { - RemoveFromFreeList(level, node->buddy); - Node* const parent = node->parent; - - m_NodeAllocator.Free(node->buddy); - m_NodeAllocator.Free(node); - parent->type = Node::TYPE_FREE; - - node = parent; - --level; - --m_FreeCount; - } - - AddToFreeListFront(level, node); -} - -void VmaBlockMetadata_Buddy::AddNodeToDetailedStatistics(VmaDetailedStatistics& inoutStats, const Node* node, VkDeviceSize levelNodeSize) const -{ - switch (node->type) - { - case Node::TYPE_FREE: - VmaAddDetailedStatisticsUnusedRange(inoutStats, levelNodeSize); - break; - case Node::TYPE_ALLOCATION: - VmaAddDetailedStatisticsAllocation(inoutStats, levelNodeSize); - break; - case Node::TYPE_SPLIT: - { - const VkDeviceSize childrenNodeSize = levelNodeSize / 2; - const Node* const leftChild = node->split.leftChild; - AddNodeToDetailedStatistics(inoutStats, leftChild, childrenNodeSize); - const Node* const rightChild = leftChild->buddy; - AddNodeToDetailedStatistics(inoutStats, rightChild, childrenNodeSize); - } - break; - default: - VMA_ASSERT(0); - } -} - -void VmaBlockMetadata_Buddy::AddToFreeListFront(uint32_t level, Node* node) -{ - VMA_ASSERT(node->type == Node::TYPE_FREE); - - // List is empty. - Node* const frontNode = m_FreeList[level].front; - if (frontNode == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].back == VMA_NULL); - node->free.prev = node->free.next = VMA_NULL; - m_FreeList[level].front = m_FreeList[level].back = node; - } - else - { - VMA_ASSERT(frontNode->free.prev == VMA_NULL); - node->free.prev = VMA_NULL; - node->free.next = frontNode; - frontNode->free.prev = node; - m_FreeList[level].front = node; - } -} - -void VmaBlockMetadata_Buddy::RemoveFromFreeList(uint32_t level, Node* node) -{ - VMA_ASSERT(m_FreeList[level].front != VMA_NULL); - - // It is at the front. - if (node->free.prev == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].front == node); - m_FreeList[level].front = node->free.next; - } - else - { - Node* const prevFreeNode = node->free.prev; - VMA_ASSERT(prevFreeNode->free.next == node); - prevFreeNode->free.next = node->free.next; - } - - // It is at the back. - if (node->free.next == VMA_NULL) - { - VMA_ASSERT(m_FreeList[level].back == node); - m_FreeList[level].back = node->free.prev; - } - else - { - Node* const nextFreeNode = node->free.next; - VMA_ASSERT(nextFreeNode->free.prev == node); - nextFreeNode->free.prev = node->free.prev; - } -} - -void VmaBlockMetadata_Buddy::DebugLogAllAllocationNode(Node* node, uint32_t level) const -{ - switch (node->type) - { - case Node::TYPE_FREE: - break; - case Node::TYPE_ALLOCATION: - DebugLogAllocation(node->offset, LevelToNodeSize(level), node->allocation.userData); - break; - case Node::TYPE_SPLIT: - { - ++level; - DebugLogAllAllocationNode(node->split.leftChild, level); - DebugLogAllAllocationNode(node->split.leftChild->buddy, level); - } - break; - default: - VMA_ASSERT(0); - } -} - -#if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, const Node* node, VkDeviceSize levelNodeSize) const -{ - switch (node->type) - { - case Node::TYPE_FREE: - PrintDetailedMap_UnusedRange(json, node->offset, levelNodeSize); - break; - case Node::TYPE_ALLOCATION: - PrintDetailedMap_Allocation(json, node->offset, levelNodeSize, node->allocation.userData); - break; - case Node::TYPE_SPLIT: - { - const VkDeviceSize childrenNodeSize = levelNodeSize / 2; - const Node* const leftChild = node->split.leftChild; - PrintDetailedMapNode(json, leftChild, childrenNodeSize); - const Node* const rightChild = leftChild->buddy; - PrintDetailedMapNode(json, rightChild, childrenNodeSize); - } - break; - default: - VMA_ASSERT(0); - } -} -#endif // VMA_STATS_STRING_ENABLED -#endif // _VMA_BLOCK_METADATA_BUDDY_FUNCTIONS -#endif // _VMA_BLOCK_METADATA_BUDDY -#endif // #if 0 - #ifndef _VMA_BLOCK_METADATA_TLSF // To not search current larger region if first allocation won't succeed and skip to smaller range // use with VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT as strategy in CreateAllocationRequest(). @@ -9967,7 +8634,7 @@ void VmaBlockMetadata_Buddy::PrintDetailedMapNode(class VmaJsonWriter& json, con // VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT for fastest alloc time possible. class VmaBlockMetadata_TLSF : public VmaBlockMetadata { - VMA_CLASS_NO_COPY(VmaBlockMetadata_TLSF) + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockMetadata_TLSF) public: VmaBlockMetadata_TLSF(const VkAllocationCallbacks* pAllocationCallbacks, VkDeviceSize bufferImageGranularity, bool isVirtual); @@ -9977,7 +8644,7 @@ public: size_t GetFreeRegionsCount() const override { return m_BlocksFreeCount + 1; } VkDeviceSize GetSumFreeSize() const override { return m_BlocksFreeSize + m_NullBlock->size; } bool IsEmpty() const override { return m_NullBlock->offset == 0; } - VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; }; + VkDeviceSize GetAllocationOffset(VmaAllocHandle allocHandle) const override { return ((Block*)allocHandle)->offset; } void Init(VkDeviceSize size) override; bool Validate() const override; @@ -10129,7 +8796,7 @@ void VmaBlockMetadata_TLSF::Init(VkDeviceSize size) else m_ListsCount += 4; - m_MemoryClasses = memoryClass + 2; + m_MemoryClasses = memoryClass + uint8_t(2); memset(m_InnerIsFreeBitmap, 0, MAX_MEMORY_CLASSES * sizeof(uint32_t)); m_FreeList = vma_new_array(GetAllocationCallbacks(), Block*, m_ListsCount); @@ -10322,7 +8989,7 @@ bool VmaBlockMetadata_TLSF::CreateAllocationRequest( // Round up to the next block VkDeviceSize sizeForNextList = allocSize; - VkDeviceSize smallSizeStep = SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4); + VkDeviceSize smallSizeStep = VkDeviceSize(SMALL_BUFFER_SIZE / (IsVirtual() ? 1 << SECOND_LEVEL_INDEX : 4)); if (allocSize > SMALL_BUFFER_SIZE) { sizeForNextList += (1ULL << (VMA_BITSCAN_MSB(allocSize) - SECOND_LEVEL_INDEX)); @@ -10332,8 +8999,8 @@ bool VmaBlockMetadata_TLSF::CreateAllocationRequest( else sizeForNextList += smallSizeStep; - uint32_t nextListIndex = 0; - uint32_t prevListIndex = 0; + uint32_t nextListIndex = m_ListsCount; + uint32_t prevListIndex = m_ListsCount; Block* nextListBlock = VMA_NULL; Block* prevListBlock = VMA_NULL; @@ -10731,7 +9398,7 @@ void VmaBlockMetadata_TLSF::DebugLogAllAllocations() const uint8_t VmaBlockMetadata_TLSF::SizeToMemoryClass(VkDeviceSize size) const { if (size > SMALL_BUFFER_SIZE) - return VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT; + return uint8_t(VMA_BITSCAN_MSB(size) - MEMORY_CLASS_SHIFT); return 0; } @@ -10906,7 +9573,7 @@ Synchronized internally with a mutex. class VmaBlockVector { friend struct VmaDefragmentationContext_T; - VMA_CLASS_NO_COPY(VmaBlockVector) + VMA_CLASS_NO_COPY_NO_MOVE(VmaBlockVector) public: VmaBlockVector( VmaAllocator hAllocator, @@ -11025,7 +9692,7 @@ private: #ifndef _VMA_DEFRAGMENTATION_CONTEXT struct VmaDefragmentationContext_T { - VMA_CLASS_NO_COPY(VmaDefragmentationContext_T) + VMA_CLASS_NO_COPY_NO_MOVE(VmaDefragmentationContext_T) public: VmaDefragmentationContext_T( VmaAllocator hAllocator, @@ -11075,6 +9742,8 @@ private: const VkDeviceSize m_MaxPassBytes; const uint32_t m_MaxPassAllocations; + const PFN_vmaCheckDefragmentationBreakFunction m_BreakCallback; + void* m_BreakCallbackUserData; VmaStlAllocator m_MoveAllocator; VmaVector> m_Moves; @@ -11112,7 +9781,7 @@ private: struct VmaPool_T { friend struct VmaPoolListItemTraits; - VMA_CLASS_NO_COPY(VmaPool_T) + VMA_CLASS_NO_COPY_NO_MOVE(VmaPool_T) public: VmaBlockVector m_BlockVector; VmaDedicatedAllocationList m_DedicatedAllocations; @@ -11154,6 +9823,9 @@ struct VmaPoolListItemTraits #ifndef _VMA_CURRENT_BUDGET_DATA struct VmaCurrentBudgetData { + VMA_CLASS_NO_COPY_NO_MOVE(VmaCurrentBudgetData) +public: + VMA_ATOMIC_UINT32 m_BlockCount[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT32 m_AllocationCount[VK_MAX_MEMORY_HEAPS]; VMA_ATOMIC_UINT64 m_BlockBytes[VK_MAX_MEMORY_HEAPS]; @@ -11222,7 +9894,7 @@ Thread-safe wrapper over VmaPoolAllocator free list, for allocation of VmaAlloca */ class VmaAllocationObjectAllocator { - VMA_CLASS_NO_COPY(VmaAllocationObjectAllocator) + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocationObjectAllocator) public: VmaAllocationObjectAllocator(const VkAllocationCallbacks* pAllocationCallbacks) : m_Allocator(pAllocationCallbacks, 1024) {} @@ -11252,7 +9924,7 @@ void VmaAllocationObjectAllocator::Free(VmaAllocation hAlloc) #ifndef _VMA_VIRTUAL_BLOCK_T struct VmaVirtualBlock_T { - VMA_CLASS_NO_COPY(VmaVirtualBlock_T) + VMA_CLASS_NO_COPY_NO_MOVE(VmaVirtualBlock_T) public: const bool m_AllocationCallbacksSpecified; const VkAllocationCallbacks m_AllocationCallbacks; @@ -11288,14 +9960,15 @@ VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo const uint32_t algorithm = createInfo.flags & VMA_VIRTUAL_BLOCK_CREATE_ALGORITHM_MASK; switch (algorithm) { - default: - VMA_ASSERT(0); case 0: m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); break; case VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT: m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_Linear)(VK_NULL_HANDLE, 1, true); break; + default: + VMA_ASSERT(0); + m_Metadata = vma_new(GetAllocationCallbacks(), VmaBlockMetadata_TLSF)(VK_NULL_HANDLE, 1, true); } m_Metadata->Init(createInfo.size); @@ -11303,12 +9976,13 @@ VmaVirtualBlock_T::VmaVirtualBlock_T(const VmaVirtualBlockCreateInfo& createInfo VmaVirtualBlock_T::~VmaVirtualBlock_T() { - // Define macro VMA_DEBUG_LOG to receive the list of the unfreed allocations + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. if (!m_Metadata->IsEmpty()) m_Metadata->DebugLogAllAllocations(); // This is the most important assert in the entire library. // Hitting it means you have some memory leak - unreleased virtual allocations. - VMA_ASSERT(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); + VMA_ASSERT_LEAK(m_Metadata->IsEmpty() && "Some virtual allocations were not freed before destruction of this virtual block!"); vma_delete(GetAllocationCallbacks(), m_Metadata); } @@ -11391,20 +10065,22 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) // Main allocator object. struct VmaAllocator_T { - VMA_CLASS_NO_COPY(VmaAllocator_T) + VMA_CLASS_NO_COPY_NO_MOVE(VmaAllocator_T) public: - bool m_UseMutex; - uint32_t m_VulkanApiVersion; + const bool m_UseMutex; + const uint32_t m_VulkanApiVersion; bool m_UseKhrDedicatedAllocation; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseKhrBindMemory2; // Can be set only if m_VulkanApiVersion < VK_MAKE_VERSION(1, 1, 0). bool m_UseExtMemoryBudget; bool m_UseAmdDeviceCoherentMemory; bool m_UseKhrBufferDeviceAddress; bool m_UseExtMemoryPriority; - VkDevice m_hDevice; - VkInstance m_hInstance; - bool m_AllocationCallbacksSpecified; - VkAllocationCallbacks m_AllocationCallbacks; + bool m_UseKhrMaintenance4; + bool m_UseKhrMaintenance5; + const VkDevice m_hDevice; + const VkInstance m_hInstance; + const bool m_AllocationCallbacksSpecified; + const VkAllocationCallbacks m_AllocationCallbacks; VmaDeviceMemoryCallbacks m_DeviceMemoryCallbacks; VmaAllocationObjectAllocator m_AllocationObjectAllocator; @@ -11485,7 +10161,7 @@ public: VkResult FindMemoryTypeIndex( uint32_t memoryTypeBits, const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + VmaBufferImageUsage bufImgUsage, uint32_t* pMemoryTypeIndex) const; // Main allocation function. @@ -11495,7 +10171,7 @@ public: bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, // UINT32_MAX if unknown. + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -11516,6 +10192,7 @@ public: #endif void GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationInfo* pAllocationInfo); + void GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo); VkResult CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool); void DestroyPool(VmaPool pool); @@ -11569,6 +10246,17 @@ public: const VkDeviceSize* offsets, const VkDeviceSize* sizes, VMA_CACHE_OPERATION op); + VkResult CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size); + VkResult CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size); + void FillAllocation(const VmaAllocation hAllocation, uint8_t pattern); /* @@ -11628,7 +10316,7 @@ private: bool dedicatedPreferred, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -11665,10 +10353,10 @@ private: float priority, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, + VmaBufferImageUsage dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, - const void* pNextChain = nullptr); + const void* pNextChain = VMA_NULL); void FreeDedicatedMemory(const VmaAllocation allocation); @@ -11756,8 +10444,8 @@ VmaDeviceMemoryBlock::VmaDeviceMemoryBlock(VmaAllocator hAllocator) VmaDeviceMemoryBlock::~VmaDeviceMemoryBlock() { - VMA_ASSERT(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); - VMA_ASSERT(m_hMemory == VK_NULL_HANDLE); + VMA_ASSERT_LEAK(m_MapCount == 0 && "VkDeviceMemory block is being destroyed while it is still mapped."); + VMA_ASSERT_LEAK(m_hMemory == VK_NULL_HANDLE); } void VmaDeviceMemoryBlock::Init( @@ -11779,14 +10467,16 @@ void VmaDeviceMemoryBlock::Init( switch (algorithm) { + case 0: + m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), + bufferImageGranularity, false); // isVirtual + break; case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT: m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_Linear)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual break; default: VMA_ASSERT(0); - // Fall-through. - case 0: m_pMetadata = vma_new(hAllocator, VmaBlockMetadata_TLSF)(hAllocator->GetAllocationCallbacks(), bufferImageGranularity, false); // isVirtual } @@ -11795,14 +10485,15 @@ void VmaDeviceMemoryBlock::Init( void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) { - // Define macro VMA_DEBUG_LOG to receive the list of the unfreed allocations + // Define macro VMA_DEBUG_LOG_FORMAT or more specialized VMA_LEAK_LOG_FORMAT + // to receive the list of the unfreed allocations. if (!m_pMetadata->IsEmpty()) m_pMetadata->DebugLogAllAllocations(); // This is the most important assert in the entire library. // Hitting it means you have some memory leak - unreleased VmaAllocation objects. - VMA_ASSERT(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); + VMA_ASSERT_LEAK(m_pMetadata->IsEmpty() && "Some allocations were not freed before destruction of this memory block!"); - VMA_ASSERT(m_hMemory != VK_NULL_HANDLE); + VMA_ASSERT_LEAK(m_hMemory != VK_NULL_HANDLE); allocator->FreeVulkanMemory(m_MemoryTypeIndex, m_pMetadata->GetSize(), m_hMemory); m_hMemory = VK_NULL_HANDLE; @@ -11840,7 +10531,7 @@ bool VmaDeviceMemoryBlock::Validate() const VkResult VmaDeviceMemoryBlock::CheckCorruption(VmaAllocator hAllocator) { - void* pData = nullptr; + void* pData = VMA_NULL; VkResult res = Map(hAllocator, 1, &pData); if (res != VK_SUCCESS) { @@ -11863,11 +10554,11 @@ VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); - m_MappingHysteresis.PostMap(); if (oldTotalMapCount != 0) { - m_MapCount += count; VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount += count; if (ppData != VMA_NULL) { *ppData = m_pMappedData; @@ -11885,11 +10576,13 @@ VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void &m_pMappedData); if (result == VK_SUCCESS) { + VMA_ASSERT(m_pMappedData != VMA_NULL); + m_MappingHysteresis.PostMap(); + m_MapCount = count; if (ppData != VMA_NULL) { *ppData = m_pMappedData; } - m_MapCount = count; } return result; } @@ -12006,15 +10699,11 @@ VmaAllocation_T::VmaAllocation_T(bool mappingAllowed) { if(mappingAllowed) m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; - -#if VMA_STATS_STRING_ENABLED - m_BufferImageUsage = 0; -#endif } VmaAllocation_T::~VmaAllocation_T() { - VMA_ASSERT(m_MapCount == 0 && "Allocation was not unmapped before destruction."); + VMA_ASSERT_LEAK(m_MapCount == 0 && "Allocation was not unmapped before destruction."); // Check if owned string was freed. VMA_ASSERT(m_pName == VMA_NULL); @@ -12092,11 +10781,11 @@ uint8_t VmaAllocation_T::SwapBlockAllocation(VmaAllocator hAllocator, VmaAllocat m_BlockAllocation.m_Block->Unmap(hAllocator, m_MapCount); m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, allocation); - VMA_SWAP(m_BlockAllocation, allocation->m_BlockAllocation); + std::swap(m_BlockAllocation, allocation->m_BlockAllocation); m_BlockAllocation.m_Block->m_pMetadata->SetAllocationUserData(m_BlockAllocation.m_AllocHandle, this); #if VMA_STATS_STRING_ENABLED - VMA_SWAP(m_BufferImageUsage, allocation->m_BufferImageUsage); + std::swap(m_BufferImageUsage, allocation->m_BufferImageUsage); #endif return m_MapCount; } @@ -12271,12 +10960,6 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) } #if VMA_STATS_STRING_ENABLED -void VmaAllocation_T::InitBufferImageUsage(uint32_t bufferImageUsage) -{ - VMA_ASSERT(m_BufferImageUsage == 0); - m_BufferImageUsage = bufferImageUsage; -} - void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const { json.WriteString("Type"); @@ -12285,7 +10968,7 @@ void VmaAllocation_T::PrintParameters(class VmaJsonWriter& json) const json.WriteString("Size"); json.WriteNumber(m_Size); json.WriteString("Usage"); - json.WriteNumber(m_BufferImageUsage); + json.WriteNumber(m_BufferImageUsage.Value); // It may be uint32_t or uint64_t. if (m_pUserData != VMA_NULL) { @@ -12503,7 +11186,7 @@ VkResult VmaBlockVector::AllocatePage( pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId()); + VMA_DEBUG_LOG_FORMAT(" Returned from last block #%" PRIu32, pCurrBlock->GetId()); IncrementallySortBlocks(); return VK_SUCCESS; } @@ -12539,7 +11222,7 @@ VkResult VmaBlockVector::AllocatePage( pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); IncrementallySortBlocks(); return VK_SUCCESS; } @@ -12558,7 +11241,7 @@ VkResult VmaBlockVector::AllocatePage( pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); IncrementallySortBlocks(); return VK_SUCCESS; } @@ -12575,7 +11258,7 @@ VkResult VmaBlockVector::AllocatePage( VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + VMA_DEBUG_LOG_FORMAT(" Returned from existing block #%" PRIu32, pCurrBlock->GetId()); IncrementallySortBlocks(); return VK_SUCCESS; } @@ -12642,7 +11325,7 @@ VkResult VmaBlockVector::AllocatePage( pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { - VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize); + VMA_DEBUG_LOG_FORMAT(" Created new block #%" PRIu32 " Size=%" PRIu64, pBlock->GetId(), newBlockSize); IncrementallySortBlocks(); return VK_SUCCESS; } @@ -12691,7 +11374,7 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) pBlock->PostFree(m_hAllocator); VMA_HEAVY_ASSERT(pBlock->Validate()); - VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", m_MemoryTypeIndex); + VMA_DEBUG_LOG_FORMAT(" Freed from MemoryTypeIndex=%" PRIu32, m_MemoryTypeIndex); const bool canDeleteBlock = m_Blocks.size() > m_MinBlockCount; // pBlock became empty after this deallocation. @@ -12724,7 +11407,7 @@ void VmaBlockVector::Free(const VmaAllocation hAllocation) // lock, for performance reason. if (pBlockToDelete != VMA_NULL) { - VMA_DEBUG_LOG(" Deleted empty block #%u", pBlockToDelete->GetId()); + VMA_DEBUG_LOG_FORMAT(" Deleted empty block #%" PRIu32, pBlockToDelete->GetId()); pBlockToDelete->Destroy(m_hAllocator); vma_delete(m_hAllocator, pBlockToDelete); } @@ -12771,7 +11454,7 @@ void VmaBlockVector::IncrementallySortBlocks() { if (m_Blocks[i - 1]->m_pMetadata->GetSumFreeSize() > m_Blocks[i]->m_pMetadata->GetSumFreeSize()) { - VMA_SWAP(m_Blocks[i - 1], m_Blocks[i]); + std::swap(m_Blocks[i - 1], m_Blocks[i]); return; } } @@ -12999,6 +11682,8 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T( const VmaDefragmentationInfo& info) : m_MaxPassBytes(info.maxBytesPerPass == 0 ? VK_WHOLE_SIZE : info.maxBytesPerPass), m_MaxPassAllocations(info.maxAllocationsPerPass == 0 ? UINT32_MAX : info.maxAllocationsPerPass), + m_BreakCallback(info.pfnBreakCallback), + m_BreakCallbackUserData(info.pBreakCallbackUserData), m_MoveAllocator(hAllocator->GetAllocationCallbacks()), m_Moves(m_MoveAllocator) { @@ -13032,20 +11717,18 @@ VmaDefragmentationContext_T::VmaDefragmentationContext_T( { case 0: // Default algorithm m_Algorithm = VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT; - case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: - { m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); break; - } + case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: + m_AlgorithmState = vma_new_array(hAllocator, StateBalanced, m_BlockVectorCount); + break; case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: - { if (hAllocator->GetBufferImageGranularity() > 1) { m_AlgorithmState = vma_new_array(hAllocator, StateExtensive, m_BlockVectorCount); } break; } - } } VmaDefragmentationContext_T::~VmaDefragmentationContext_T() @@ -13255,29 +11938,24 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo m_PassStats.bytesFreed += freedBlockSize; } - switch (m_Algorithm) + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT && + m_AlgorithmState != VMA_NULL) { - case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: - { - if (m_AlgorithmState != VMA_NULL) + // Avoid unnecessary tries to allocate when new free block is available + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; + if (state.firstFreeBlock != SIZE_MAX) { - // Avoid unnecessary tries to allocate when new free block is available - StateExtensive& state = reinterpret_cast(m_AlgorithmState)[vectorIndex]; - if (state.firstFreeBlock != SIZE_MAX) + const size_t diff = prevCount - currentCount; + if (state.firstFreeBlock >= diff) { - const size_t diff = prevCount - currentCount; - if (state.firstFreeBlock >= diff) - { - state.firstFreeBlock -= diff; - if (state.firstFreeBlock != 0) - state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); - } - else - state.firstFreeBlock = 0; + state.firstFreeBlock -= diff; + if (state.firstFreeBlock != 0) + state.firstFreeBlock -= vector->GetBlock(state.firstFreeBlock - 1)->m_pMetadata->IsEmpty(); } + else + state.firstFreeBlock = 0; } } - } } moveInfo.moveCount = 0; moveInfo.pMoves = VMA_NULL; @@ -13293,50 +11971,49 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo // Move blocks with immovable allocations according to algorithm if (immovableBlocks.size() > 0) { - switch (m_Algorithm) + do { - case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: - { - if (m_AlgorithmState != VMA_NULL) + if(m_Algorithm == VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT) { - bool swapped = false; - // Move to the start of free blocks range - for (const FragmentedBlock& block : immovableBlocks) + if (m_AlgorithmState != VMA_NULL) { - StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; - if (state.operation != StateExtensive::Operation::Cleanup) + bool swapped = false; + // Move to the start of free blocks range + for (const FragmentedBlock& block : immovableBlocks) { - VmaBlockVector* vector = m_pBlockVectors[block.data]; - VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); - - for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) + StateExtensive& state = reinterpret_cast(m_AlgorithmState)[block.data]; + if (state.operation != StateExtensive::Operation::Cleanup) { - if (vector->GetBlock(i) == block.block) + VmaBlockVector* vector = m_pBlockVectors[block.data]; + VmaMutexLockWrite lock(vector->GetMutex(), vector->GetAllocator()->m_UseMutex); + + for (size_t i = 0, count = vector->GetBlockCount() - m_ImmovableBlockCount; i < count; ++i) { - VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); - if (state.firstFreeBlock != SIZE_MAX) + if (vector->GetBlock(i) == block.block) { - if (i + 1 < state.firstFreeBlock) + std::swap(vector->m_Blocks[i], vector->m_Blocks[vector->GetBlockCount() - ++m_ImmovableBlockCount]); + if (state.firstFreeBlock != SIZE_MAX) { - if (state.firstFreeBlock > 1) - VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); - else - --state.firstFreeBlock; + if (i + 1 < state.firstFreeBlock) + { + if (state.firstFreeBlock > 1) + std::swap(vector->m_Blocks[i], vector->m_Blocks[--state.firstFreeBlock]); + else + --state.firstFreeBlock; + } } + swapped = true; + break; } - swapped = true; - break; } } } + if (swapped) + result = VK_INCOMPLETE; + break; } - if (swapped) - result = VK_INCOMPLETE; - break; } - } - default: - { + // Move to the beginning for (const FragmentedBlock& block : immovableBlocks) { @@ -13347,14 +12024,12 @@ VkResult VmaDefragmentationContext_T::DefragmentPassEnd(VmaDefragmentationPassMo { if (vector->GetBlock(i) == block.block) { - VMA_SWAP(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); + std::swap(vector->m_Blocks[i], vector->m_Blocks[m_ImmovableBlockCount++]); break; } } } - break; - } - } + } while (false); } // Bulk-map destination blocks @@ -13372,14 +12047,15 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation(VmaBlockVector& vector, { case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FAST_BIT: return ComputeDefragmentation_Fast(vector); - default: - VMA_ASSERT(0); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_BALANCED_BIT: return ComputeDefragmentation_Balanced(vector, index, true); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_FULL_BIT: return ComputeDefragmentation_Full(vector); case VMA_DEFRAGMENTATION_FLAG_ALGORITHM_EXTENSIVE_BIT: return ComputeDefragmentation_Extensive(vector, index); + default: + VMA_ASSERT(0); + return ComputeDefragmentation_Balanced(vector, index, true); } } @@ -13403,6 +12079,10 @@ VmaDefragmentationContext_T::MoveAllocationData VmaDefragmentationContext_T::Get VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCounters(VkDeviceSize bytes) { + // Check custom criteria if exists + if (m_BreakCallback && m_BreakCallback(m_BreakCallbackUserData)) + return CounterStatus::End; + // Ignore allocation if will exceed max size for copy if (m_PassStats.bytesMoved + bytes > m_MaxPassBytes) { @@ -13411,6 +12091,8 @@ VmaDefragmentationContext_T::CounterStatus VmaDefragmentationContext_T::CheckCou else return CounterStatus::End; } + else + m_IgnoredAllocs = 0; return CounterStatus::Pass; } @@ -13420,8 +12102,8 @@ bool VmaDefragmentationContext_T::IncrementCounters(VkDeviceSize bytes) // Early return when max found if (++m_PassStats.allocationsMoved >= m_MaxPassAllocations || m_PassStats.bytesMoved >= m_MaxPassBytes) { - VMA_ASSERT(m_PassStats.allocationsMoved == m_MaxPassAllocations || - m_PassStats.bytesMoved == m_MaxPassBytes && "Exceeded maximal pass threshold!"); + VMA_ASSERT((m_PassStats.allocationsMoved == m_MaxPassAllocations || + m_PassStats.bytesMoved == m_MaxPassBytes) && "Exceeded maximal pass threshold!"); return true; } return false; @@ -13445,10 +12127,10 @@ bool VmaDefragmentationContext_T::ReallocWithinBlock(VmaBlockVector& vector, Vma continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } VkDeviceSize offset = moveData.move.srcAllocation->GetOffset(); @@ -13534,10 +12216,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Fast(VmaBlockVector& ve continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } // Check all previous blocks for free space @@ -13581,10 +12263,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Balanced(VmaBlockVector continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } // Check all previous blocks for free space @@ -13668,10 +12350,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Full(VmaBlockVector& ve continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } // Check all previous blocks for free space @@ -13759,10 +12441,10 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } // Check all previous blocks for free space @@ -13770,7 +12452,7 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto { // Full clear performed already if (prevMoveCount != m_Moves.size() && freeMetadata->GetNextAllocation(handle) == VK_NULL_HANDLE) - reinterpret_cast(m_AlgorithmState)[index] = last; + vectorState.firstFreeBlock = last; return true; } } @@ -13800,14 +12482,15 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto case StateExtensive::Operation::FindFreeBlockBuffer: vectorState.operation = StateExtensive::Operation::MoveBuffers; break; - default: - VMA_ASSERT(0); case StateExtensive::Operation::FindFreeBlockTexture: vectorState.operation = StateExtensive::Operation::MoveTextures; break; case StateExtensive::Operation::FindFreeBlockAll: vectorState.operation = StateExtensive::Operation::MoveAll; break; + default: + VMA_ASSERT(0); + vectorState.operation = StateExtensive::Operation::MoveTextures; } vectorState.firstFreeBlock = last; // Nothing done, block found without reallocations, can perform another reallocs in same pass @@ -13839,6 +12522,7 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto } else break; + VMA_FALLTHROUGH; // Fallthrough } case StateExtensive::Operation::MoveBuffers: { @@ -13863,6 +12547,7 @@ bool VmaDefragmentationContext_T::ComputeDefragmentation_Extensive(VmaBlockVecto } else break; + VMA_FALLTHROUGH; // Fallthrough } case StateExtensive::Operation::MoveAll: { @@ -13945,10 +12630,10 @@ bool VmaDefragmentationContext_T::MoveDataToFreeBlocks(VmaSuballocationType curr continue; case CounterStatus::End: return true; - default: - VMA_ASSERT(0); case CounterStatus::Pass: break; + default: + VMA_ASSERT(0); } // Move only single type of resources at once @@ -13995,6 +12680,9 @@ VmaPool_T::VmaPool_T( VmaPool_T::~VmaPool_T() { VMA_ASSERT(m_PrevPool == VMA_NULL && m_NextPool == VMA_NULL); + + const VkAllocationCallbacks* allocs = m_BlockVector.GetAllocator()->GetAllocationCallbacks(); + VmaFreeString(allocs, m_Name); } void VmaPool_T::SetName(const char* pName) @@ -14023,6 +12711,8 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : m_UseAmdDeviceCoherentMemory((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT) != 0), m_UseKhrBufferDeviceAddress((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT) != 0), m_UseExtMemoryPriority((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT) != 0), + m_UseKhrMaintenance4((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT) != 0), + m_UseKhrMaintenance5((pCreateInfo->flags & VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT) != 0), m_hDevice(pCreateInfo->device), m_hInstance(pCreateInfo->instance), m_AllocationCallbacksSpecified(pCreateInfo->pAllocationCallbacks != VMA_NULL), @@ -14102,6 +12792,18 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); } #endif +#if !(VMA_KHR_MAINTENANCE4) + if(m_UseKhrMaintenance4) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif +#if !(VMA_KHR_MAINTENANCE5) + if(m_UseKhrMaintenance5) + { + VMA_ASSERT(0 && "VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT is set but required extension is not available in your Vulkan header or its support in VMA has been disabled by a preprocessor macro."); + } +#endif memset(&m_DeviceMemoryCallbacks, 0 ,sizeof(m_DeviceMemoryCallbacks)); memset(&m_PhysicalDeviceProperties, 0, sizeof(m_PhysicalDeviceProperties)); @@ -14180,7 +12882,7 @@ VmaAllocator_T::VmaAllocator_T(const VmaAllocatorCreateInfo* pCreateInfo) : GetMemoryTypeMinAlignment(memTypeIndex), // minAllocationAlignment VMA_NULL); // // pMemoryAllocateNext // No need to call m_pBlockVectors[memTypeIndex][blockVectorTypeIndex]->CreateMinBlocks here, - // becase minBlockCount is 0. + // because minBlockCount is 0. } } } @@ -14260,6 +12962,12 @@ void VmaAllocator_T::ImportVulkanFunctions_Static() m_VulkanFunctions.vkGetImageMemoryRequirements2KHR = (PFN_vkGetImageMemoryRequirements2)vkGetImageMemoryRequirements2; m_VulkanFunctions.vkBindBufferMemory2KHR = (PFN_vkBindBufferMemory2)vkBindBufferMemory2; m_VulkanFunctions.vkBindImageMemory2KHR = (PFN_vkBindImageMemory2)vkBindImageMemory2; + } +#endif + +#if VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { m_VulkanFunctions.vkGetPhysicalDeviceMemoryProperties2KHR = (PFN_vkGetPhysicalDeviceMemoryProperties2)vkGetPhysicalDeviceMemoryProperties2; } #endif @@ -14312,11 +13020,11 @@ void VmaAllocator_T::ImportVulkanFunctions_Custom(const VmaVulkanFunctions* pVul VMA_COPY_IF_NOT_NULL(vkBindImageMemory2KHR); #endif -#if VMA_MEMORY_BUDGET +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 VMA_COPY_IF_NOT_NULL(vkGetPhysicalDeviceMemoryProperties2KHR); #endif -#if VMA_VULKAN_VERSION >= 1003000 +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 VMA_COPY_IF_NOT_NULL(vkGetDeviceBufferMemoryRequirements); VMA_COPY_IF_NOT_NULL(vkGetDeviceImageMemoryRequirements); #endif @@ -14367,7 +13075,17 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() VMA_FETCH_DEVICE_FUNC(vkGetImageMemoryRequirements2KHR, PFN_vkGetImageMemoryRequirements2, "vkGetImageMemoryRequirements2"); VMA_FETCH_DEVICE_FUNC(vkBindBufferMemory2KHR, PFN_vkBindBufferMemory2, "vkBindBufferMemory2"); VMA_FETCH_DEVICE_FUNC(vkBindImageMemory2KHR, PFN_vkBindImageMemory2, "vkBindImageMemory2"); - VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2, "vkGetPhysicalDeviceMemoryProperties2"); + } +#endif + +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); } #endif @@ -14387,8 +13105,12 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() } #endif // #if VMA_BIND_MEMORY2 -#if VMA_MEMORY_BUDGET - if(m_UseExtMemoryBudget) +#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000 + if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 1, 0)) + { + VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2"); + } + else if(m_UseExtMemoryBudget) { VMA_FETCH_INSTANCE_FUNC(vkGetPhysicalDeviceMemoryProperties2KHR, PFN_vkGetPhysicalDeviceMemoryProperties2KHR, "vkGetPhysicalDeviceMemoryProperties2KHR"); } @@ -14401,6 +13123,13 @@ void VmaAllocator_T::ImportVulkanFunctions_Dynamic() VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirements, "vkGetDeviceImageMemoryRequirements"); } #endif +#if VMA_KHR_MAINTENANCE4 + if(m_UseKhrMaintenance4) + { + VMA_FETCH_DEVICE_FUNC(vkGetDeviceBufferMemoryRequirements, PFN_vkGetDeviceBufferMemoryRequirementsKHR, "vkGetDeviceBufferMemoryRequirementsKHR"); + VMA_FETCH_DEVICE_FUNC(vkGetDeviceImageMemoryRequirements, PFN_vkGetDeviceImageMemoryRequirementsKHR, "vkGetDeviceImageMemoryRequirementsKHR"); + } +#endif #undef VMA_FETCH_DEVICE_FUNC #undef VMA_FETCH_INSTANCE_FUNC @@ -14451,13 +13180,12 @@ void VmaAllocator_T::ValidateVulkanFunctions() } #endif -#if VMA_VULKAN_VERSION >= 1003000 - if(m_VulkanApiVersion >= VK_MAKE_VERSION(1, 3, 0)) - { - VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); - VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); - } -#endif + // Not validating these due to suspected driver bugs with these function + // pointers being null despite correct extension or Vulkan version is enabled. + // See issue #397. Their usage in VMA is optional anyway. + // + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceBufferMemoryRequirements != VMA_NULL); + // VMA_ASSERT(m_VulkanFunctions.vkGetDeviceImageMemoryRequirements != VMA_NULL); } VkDeviceSize VmaAllocator_T::CalcPreferredBlockSize(uint32_t memTypeIndex) @@ -14475,7 +13203,7 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( bool dedicatedPreferred, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -14485,7 +13213,7 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( VmaAllocation* pAllocations) { VMA_ASSERT(pAllocations != VMA_NULL); - VMA_DEBUG_LOG(" AllocateMemory: MemoryTypeIndex=%u, AllocationCount=%zu, Size=%llu", memTypeIndex, allocationCount, size); + VMA_DEBUG_LOG_FORMAT(" AllocateMemory: MemoryTypeIndex=%" PRIu32 ", AllocationCount=%zu, Size=%" PRIu64, memTypeIndex, allocationCount, size); VmaAllocationCreateInfo finalCreateInfo = createInfo; VkResult res = CalcMemTypeParams( @@ -14534,7 +13262,8 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( // Protection against creating each allocation as dedicated when we reach or exceed heap size/budget, // which can quickly deplete maxMemoryAllocationCount: Don't prefer dedicated allocations when above // 3/4 of the maximum allocation count. - if(m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) + if(m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount < UINT32_MAX / 4 && + m_DeviceMemoryCount.load() > m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount * 3 / 4) { dedicatedPreferred = false; } @@ -14628,7 +13357,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( float priority, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, + VmaBufferImageUsage dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, const void* pNextChain) @@ -14668,8 +13397,8 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( bool canContainBufferWithDeviceAddress = true; if(dedicatedBuffer != VK_NULL_HANDLE) { - canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == UINT32_MAX || // Usage flags unknown - (dedicatedBufferImageUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == VmaBufferImageUsage::UNKNOWN || + dedicatedBufferImageUsage.Contains(VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT); } else if(dedicatedImage != VK_NULL_HANDLE) { @@ -14730,7 +13459,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( { dedicatedAllocations.Register(pAllocations[allocIndex]); } - VMA_DEBUG_LOG(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%u", allocationCount, memTypeIndex); + VMA_DEBUG_LOG_FORMAT(" Allocated DedicatedMemory Count=%zu, MemoryTypeIndex=#%" PRIu32, allocationCount, memTypeIndex); } else { @@ -14881,7 +13610,7 @@ void VmaAllocator_T::GetImageMemoryRequirements( VkResult VmaAllocator_T::FindMemoryTypeIndex( uint32_t memoryTypeBits, const VmaAllocationCreateInfo* pAllocationCreateInfo, - VkFlags bufImgUsage, + VmaBufferImageUsage bufImgUsage, uint32_t* pMemoryTypeIndex) const { memoryTypeBits &= GetGlobalMemoryTypeBits(); @@ -15036,7 +13765,7 @@ VkResult VmaAllocator_T::AllocateMemory( bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, VkImage dedicatedImage, - VkFlags dedicatedBufferImageUsage, + VmaBufferImageUsage dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -15286,9 +14015,28 @@ void VmaAllocator_T::GetAllocationInfo(VmaAllocation hAllocation, VmaAllocationI pAllocationInfo->pName = hAllocation->GetName(); } +void VmaAllocator_T::GetAllocationInfo2(VmaAllocation hAllocation, VmaAllocationInfo2* pAllocationInfo) +{ + GetAllocationInfo(hAllocation, &pAllocationInfo->allocationInfo); + + switch (hAllocation->GetType()) + { + case VmaAllocation_T::ALLOCATION_TYPE_BLOCK: + pAllocationInfo->blockSize = hAllocation->GetBlock()->m_pMetadata->GetSize(); + pAllocationInfo->dedicatedMemory = VK_FALSE; + break; + case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: + pAllocationInfo->blockSize = pAllocationInfo->allocationInfo.size; + pAllocationInfo->dedicatedMemory = VK_TRUE; + break; + default: + VMA_ASSERT(0); + } +} + VkResult VmaAllocator_T::CreatePool(const VmaPoolCreateInfo* pCreateInfo, VmaPool* pPool) { - VMA_DEBUG_LOG(" CreatePool: MemoryTypeIndex=%u, flags=%u", pCreateInfo->memoryTypeIndex, pCreateInfo->flags); + VMA_DEBUG_LOG_FORMAT(" CreatePool: MemoryTypeIndex=%" PRIu32 ", flags=%" PRIu32, pCreateInfo->memoryTypeIndex, pCreateInfo->flags); VmaPoolCreateInfo newCreateInfo = *pCreateInfo; @@ -15432,7 +14180,7 @@ VkResult VmaAllocator_T::CheckCorruption(uint32_t memoryTypeBits) VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAllocateInfo, VkDeviceMemory* pMemory) { - AtomicTransactionalIncrement deviceMemoryCountIncrement; + AtomicTransactionalIncrement deviceMemoryCountIncrement; const uint64_t prevDeviceMemoryCount = deviceMemoryCountIncrement.Increment(&m_DeviceMemoryCount); #if VMA_DEBUG_DONT_EXCEED_MAX_MEMORY_ALLOCATION_COUNT if(prevDeviceMemoryCount >= m_PhysicalDeviceProperties.limits.maxMemoryAllocationCount) @@ -15622,7 +14370,7 @@ VkResult VmaAllocator_T::BindBufferMemory( VkBuffer hBuffer, const void* pNext) { - VkResult res = VK_SUCCESS; + VkResult res = VK_ERROR_UNKNOWN_COPY; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: @@ -15647,7 +14395,7 @@ VkResult VmaAllocator_T::BindImageMemory( VkImage hImage, const void* pNext) { - VkResult res = VK_SUCCESS; + VkResult res = VK_ERROR_UNKNOWN_COPY; switch(hAllocation->GetType()) { case VmaAllocation_T::ALLOCATION_TYPE_DEDICATED: @@ -15733,6 +14481,43 @@ VkResult VmaAllocator_T::FlushOrInvalidateAllocations( return res; } +VkResult VmaAllocator_T::CopyMemoryToAllocation( + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + void* dstMappedData = VMA_NULL; + VkResult res = Map(dstAllocation, &dstMappedData); + if(res == VK_SUCCESS) + { + memcpy((char*)dstMappedData + dstAllocationLocalOffset, pSrcHostPointer, (size_t)size); + Unmap(dstAllocation); + res = FlushOrInvalidateAllocation(dstAllocation, dstAllocationLocalOffset, size, VMA_CACHE_FLUSH); + } + return res; +} + +VkResult VmaAllocator_T::CopyAllocationToMemory( + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + void* srcMappedData = VMA_NULL; + VkResult res = Map(srcAllocation, &srcMappedData); + if(res == VK_SUCCESS) + { + res = FlushOrInvalidateAllocation(srcAllocation, srcAllocationLocalOffset, size, VMA_CACHE_INVALIDATE); + if(res == VK_SUCCESS) + { + memcpy(pDstHostPointer, (const char*)srcMappedData + srcAllocationLocalOffset, (size_t)size); + Unmap(srcAllocation); + } + } + return res; +} + void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) { VMA_ASSERT(allocation && allocation->GetType() == VmaAllocation_T::ALLOCATION_TYPE_DEDICATED); @@ -15767,7 +14552,7 @@ void VmaAllocator_T::FreeDedicatedMemory(const VmaAllocation allocation) m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); m_AllocationObjectAllocator.Free(allocation); - VMA_DEBUG_LOG(" Freed DedicatedMemory MemoryTypeIndex=%u", memTypeIndex); + VMA_DEBUG_LOG_FORMAT(" Freed DedicatedMemory MemoryTypeIndex=%" PRIu32, memTypeIndex); } uint32_t VmaAllocator_T::CalculateGpuDefragmentationMemoryTypeBits() const @@ -16012,7 +14797,7 @@ void VmaAllocator_T::PrintDetailedMap(VmaJsonWriter& json) { json.WriteString("Name"); json.BeginString(); - json.ContinueString_Size(index++); + json.ContinueString((uint64_t)index++); if (pool->GetName()) { json.ContinueString(" - "); @@ -16352,7 +15137,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); - return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, UINT32_MAX, pMemoryTypeIndex); + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, VmaBufferImageUsage::UNKNOWN, pMemoryTypeIndex); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( @@ -16370,21 +15155,22 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); VkResult res; -#if VMA_VULKAN_VERSION >= 1003000 +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 if(funcs->vkGetDeviceBufferMemoryRequirements) { // Can query straight from VkBufferCreateInfo :) - VkDeviceBufferMemoryRequirements devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS}; + VkDeviceBufferMemoryRequirementsKHR devBufMemReq = {VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR}; devBufMemReq.pCreateInfo = pBufferCreateInfo; VkMemoryRequirements2 memReq = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2}; (*funcs->vkGetDeviceBufferMemoryRequirements)(hDev, &devBufMemReq, &memReq); res = allocator->FindMemoryTypeIndex( - memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, pBufferCreateInfo->usage, pMemoryTypeIndex); + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); } else -#endif // #if VMA_VULKAN_VERSION >= 1003000 +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 { // Must create a dummy buffer to query :( VkBuffer hBuffer = VK_NULL_HANDLE; @@ -16396,7 +15182,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( funcs->vkGetBufferMemoryRequirements(hDev, hBuffer, &memReq); res = allocator->FindMemoryTypeIndex( - memReq.memoryTypeBits, pAllocationCreateInfo, pBufferCreateInfo->usage, pMemoryTypeIndex); + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), pMemoryTypeIndex); funcs->vkDestroyBuffer( hDev, hBuffer, allocator->GetAllocationCallbacks()); @@ -16420,11 +15207,11 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( const VmaVulkanFunctions* funcs = &allocator->GetVulkanFunctions(); VkResult res; -#if VMA_VULKAN_VERSION >= 1003000 +#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 if(funcs->vkGetDeviceImageMemoryRequirements) { // Can query straight from VkImageCreateInfo :) - VkDeviceImageMemoryRequirements devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS}; + VkDeviceImageMemoryRequirementsKHR devImgMemReq = {VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR}; devImgMemReq.pCreateInfo = pImageCreateInfo; VMA_ASSERT(pImageCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT_COPY && (pImageCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT_COPY) == 0 && "Cannot use this VkImageCreateInfo with vmaFindMemoryTypeIndexForImageInfo as I don't know what to pass as VkDeviceImageMemoryRequirements::planeAspect."); @@ -16433,10 +15220,11 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( (*funcs->vkGetDeviceImageMemoryRequirements)(hDev, &devImgMemReq, &memReq); res = allocator->FindMemoryTypeIndex( - memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, pImageCreateInfo->usage, pMemoryTypeIndex); + memReq.memoryRequirements.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); } else -#endif // #if VMA_VULKAN_VERSION >= 1003000 +#endif // VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000 { // Must create a dummy image to query :( VkImage hImage = VK_NULL_HANDLE; @@ -16448,7 +15236,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( funcs->vkGetImageMemoryRequirements(hDev, hImage, &memReq); res = allocator->FindMemoryTypeIndex( - memReq.memoryTypeBits, pAllocationCreateInfo, pImageCreateInfo->usage, pMemoryTypeIndex); + memReq.memoryTypeBits, pAllocationCreateInfo, + VmaBufferImageUsage(*pImageCreateInfo), pMemoryTypeIndex); funcs->vkDestroyImage( hDev, hImage, allocator->GetAllocationCallbacks()); @@ -16571,7 +15360,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer VK_NULL_HANDLE, // dedicatedImage - UINT32_MAX, // dedicatedBufferImageUsage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, 1, // allocationCount @@ -16610,7 +15399,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer VK_NULL_HANDLE, // dedicatedImage - UINT32_MAX, // dedicatedBufferImageUsage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, allocationCount, @@ -16653,7 +15442,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( prefersDedicatedAllocation, buffer, // dedicatedBuffer VK_NULL_HANDLE, // dedicatedImage - UINT32_MAX, // dedicatedBufferImageUsage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -16692,7 +15481,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer image, // dedicatedImage - UINT32_MAX, // dedicatedBufferImageUsage + VmaBufferImageUsage::UNKNOWN, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, 1, // allocationCount @@ -16757,6 +15546,18 @@ VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo( allocator->GetAllocationInfo(allocation, pAllocationInfo); } +VMA_CALL_PRE void VMA_CALL_POST vmaGetAllocationInfo2( + VmaAllocator allocator, + VmaAllocation allocation, + VmaAllocationInfo2* pAllocationInfo) +{ + VMA_ASSERT(allocator && allocation && pAllocationInfo); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + allocator->GetAllocationInfo2(allocation, pAllocationInfo); +} + VMA_CALL_PRE void VMA_CALL_POST vmaSetAllocationUserData( VmaAllocator allocator, VmaAllocation allocation, @@ -16822,9 +15623,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocation( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); - - return res; + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_FLUSH); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( @@ -16839,9 +15638,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocation( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); - - return res; + return allocator->FlushOrInvalidateAllocation(allocation, offset, size, VMA_CACHE_INVALIDATE); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( @@ -16864,9 +15661,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFlushAllocations( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); - - return res; + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_FLUSH); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( @@ -16889,9 +15684,49 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaInvalidateAllocations( VMA_DEBUG_GLOBAL_MUTEX_LOCK - const VkResult res = allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); + return allocator->FlushOrInvalidateAllocations(allocationCount, allocations, offsets, sizes, VMA_CACHE_INVALIDATE); +} - return res; +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyMemoryToAllocation( + VmaAllocator allocator, + const void* pSrcHostPointer, + VmaAllocation dstAllocation, + VkDeviceSize dstAllocationLocalOffset, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && pSrcHostPointer && dstAllocation); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyMemoryToAllocation"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyMemoryToAllocation(pSrcHostPointer, dstAllocation, dstAllocationLocalOffset, size); +} + +VMA_CALL_PRE VkResult VMA_CALL_POST vmaCopyAllocationToMemory( + VmaAllocator allocator, + VmaAllocation srcAllocation, + VkDeviceSize srcAllocationLocalOffset, + void* pDstHostPointer, + VkDeviceSize size) +{ + VMA_ASSERT(allocator && srcAllocation && pDstHostPointer); + + if(size == 0) + { + return VK_SUCCESS; + } + + VMA_DEBUG_LOG("vmaCopyAllocationToMemory"); + + VMA_DEBUG_GLOBAL_MUTEX_LOCK + + return allocator->CopyAllocationToMemory(srcAllocation, srcAllocationLocalOffset, pDstHostPointer, size); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaCheckCorruption( @@ -17083,7 +15918,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer VK_NULL_HANDLE, // dedicatedImage - pBufferCreateInfo->usage, // dedicatedBufferImageUsage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17100,7 +15935,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( { // All steps succeeded. #if VMA_STATS_STRING_ENABLED - (*pAllocation)->InitBufferImageUsage(pBufferCreateInfo->usage); + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); #endif if(pAllocationInfo != VMA_NULL) { @@ -17178,7 +16013,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer VK_NULL_HANDLE, // dedicatedImage - pBufferCreateInfo->usage, // dedicatedBufferImageUsage + VmaBufferImageUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5), // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17195,7 +16030,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( { // All steps succeeded. #if VMA_STATS_STRING_ENABLED - (*pAllocation)->InitBufferImageUsage(pBufferCreateInfo->usage); + (*pAllocation)->InitBufferUsage(*pBufferCreateInfo, allocator->m_UseKhrMaintenance5); #endif if(pAllocationInfo != VMA_NULL) { @@ -17354,7 +16189,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer *pImage, // dedicatedImage - pImageCreateInfo->usage, // dedicatedBufferImageUsage + VmaBufferImageUsage(*pImageCreateInfo), // dedicatedBufferImageUsage *pAllocationCreateInfo, suballocType, 1, // allocationCount @@ -17371,7 +16206,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( { // All steps succeeded. #if VMA_STATS_STRING_ENABLED - (*pAllocation)->InitBufferImageUsage(pImageCreateInfo->usage); + (*pAllocation)->InitImageUsage(*pImageCreateInfo); #endif if(pAllocationInfo != VMA_NULL) { @@ -17610,13 +16445,14 @@ VMA_CALL_PRE void VMA_CALL_POST vmaFreeVirtualBlockStatsString(VmaVirtualBlock V \section quick_start_project_setup Project setup Vulkan Memory Allocator comes in form of a "stb-style" single header file. -You don't need to build it as a separate library project. -You can add this file directly to your project and submit it to code repository next to your other source files. +While you can pull the entire repository e.g. as Git module, there is also Cmake script provided, +you don't need to build it as a separate library project. +You can add file "vk_mem_alloc.h" directly to your project and submit it to code repository next to your other source files. "Single header" doesn't mean that everything is contained in C/C++ declarations, like it tends to be in case of inline functions or C++ templates. It means that implementation is bundled with interface in a single file and needs to be extracted using preprocessor macro. -If you don't do it properly, you will get linker errors. +If you don't do it properly, it will result in linker errors. To do it properly: @@ -17630,33 +16466,46 @@ To do it properly: #include "vk_mem_alloc.h" \endcode -It may be a good idea to create dedicated CPP file just for this purpose. +It may be a good idea to create dedicated CPP file just for this purpose, e.g. "VmaUsage.cpp". This library includes header ``, which in turn includes `` on Windows. If you need some specific macros defined before including these headers (like `WIN32_LEAN_AND_MEAN` or `WINVER` for Windows, `VK_USE_PLATFORM_WIN32_KHR` for Vulkan), you must define them before every `#include` of this library. +It may be a good idea to create a dedicate header file for this purpose, e.g. "VmaUsage.h", +that will be included in other source files instead of VMA header directly. This library is written in C++, but has C-compatible interface. -Thus you can include and use vk_mem_alloc.h in C or C++ code, but full +Thus, you can include and use "vk_mem_alloc.h" in C or C++ code, but full implementation with `VMA_IMPLEMENTATION` macro must be compiled as C++, NOT as C. -Some features of C++14 are used. STL containers, RTTI, or C++ exceptions are not used. +Some features of C++14 are used and required. Features of C++20 are used optionally when available. +Some headers of standard C and C++ library are used, but STL containers, RTTI, or C++ exceptions are not used. \section quick_start_initialization Initialization +VMA offers library interface in a style similar to Vulkan, with object handles like #VmaAllocation, +structures describing parameters of objects to be created like #VmaAllocationCreateInfo, +and errors codes returned from functions using `VkResult` type. + +The first and the main object that needs to be created is #VmaAllocator. +It represents the initialization of the entire library. +Only one such object should be created per `VkDevice`. +You should create it at program startup, after `VkDevice` was created, and before any device memory allocator needs to be made. +It must be destroyed before `VkDevice` is destroyed. + At program startup: --# Initialize Vulkan to have `VkPhysicalDevice`, `VkDevice` and `VkInstance` object. --# Fill VmaAllocatorCreateInfo structure and create #VmaAllocator object by - calling vmaCreateAllocator(). +-# Initialize Vulkan to have `VkInstance`, `VkPhysicalDevice`, `VkDevice` object. +-# Fill VmaAllocatorCreateInfo structure and call vmaCreateAllocator() to create #VmaAllocator object. Only members `physicalDevice`, `device`, `instance` are required. However, you should inform the library which Vulkan version do you use by setting VmaAllocatorCreateInfo::vulkanApiVersion and which extensions did you enable -by setting VmaAllocatorCreateInfo::flags (like #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT for VK_KHR_buffer_device_address). +by setting VmaAllocatorCreateInfo::flags. Otherwise, VMA would use only features of Vulkan 1.0 core with no extensions. +See below for details. \subsection quick_start_initialization_selecting_vulkan_version Selecting Vulkan version @@ -17702,7 +16551,25 @@ You may need to configure importing Vulkan functions. There are 3 ways to do thi - Define `VMA_STATIC_VULKAN_FUNCTIONS` and `VMA_DYNAMIC_VULKAN_FUNCTIONS` to 0. - Pass these pointers via structure #VmaVulkanFunctions. -Example for case 2: +\subsection quick_start_initialization_enabling_extensions Enabling extensions + +VMA can automatically use following Vulkan extensions. +If you found them available on the selected physical device and you enabled them +while creating `VkInstance` / `VkDevice` object, inform VMA about their availability +by setting appropriate flags in VmaAllocatorCreateInfo::flags. + +Vulkan extension | VMA flag +------------------------------|----------------------------------------------------- +VK_KHR_dedicated_allocation | #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT +VK_KHR_bind_memory2 | #VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT +VK_KHR_maintenance4 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT +VK_KHR_maintenance5 | #VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT +VK_EXT_memory_budget | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT +VK_KHR_buffer_device_address | #VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT +VK_EXT_memory_priority | #VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT +VK_AMD_device_coherent_memory | #VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT + +Example with fetching pointers to Vulkan functions dynamically: \code #define VMA_STATIC_VULKAN_FUNCTIONS 0 @@ -17716,6 +16583,7 @@ vulkanFunctions.vkGetInstanceProcAddr = &vkGetInstanceProcAddr; vulkanFunctions.vkGetDeviceProcAddr = &vkGetDeviceProcAddr; VmaAllocatorCreateInfo allocatorCreateInfo = {}; +allocatorCreateInfo.flags = VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; allocatorCreateInfo.vulkanApiVersion = VK_API_VERSION_1_2; allocatorCreateInfo.physicalDevice = physicalDevice; allocatorCreateInfo.device = device; @@ -17724,9 +16592,24 @@ allocatorCreateInfo.pVulkanFunctions = &vulkanFunctions; VmaAllocator allocator; vmaCreateAllocator(&allocatorCreateInfo, &allocator); + +// Entire program... + +// At the end, don't forget to: +vmaDestroyAllocator(allocator); \endcode +\subsection quick_start_initialization_other_config Other configuration options + +There are additional configuration options available through preprocessor macros that you can define +before including VMA header and through parameters passed in #VmaAllocatorCreateInfo. +They include a possibility to use your own callbacks for host memory allocations (`VkAllocationCallbacks`), +callbacks for device memory allocations (instead of `vkAllocateMemory`, `vkFreeMemory`), +or your custom `VMA_ASSERT` macro, among others. +For more information, see: @ref configuration. + + \section quick_start_resource_allocation Resource allocation When you want to create a buffer or image: @@ -17749,13 +16632,19 @@ VmaAllocation allocation; vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); \endcode -Don't forget to destroy your objects when no longer needed: +Don't forget to destroy your buffer and allocation objects when no longer needed: \code vmaDestroyBuffer(allocator, buffer, allocation); -vmaDestroyAllocator(allocator); \endcode +If you need to map the buffer, you must set flag +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. +There are many additional parameters that can control the choice of memory type to be used for the allocation +and other features. +For more information, see documentation chapters: @ref choosing_memory_type, @ref memory_mapping. + \page choosing_memory_type Choosing memory type @@ -17778,10 +16667,10 @@ You can also combine multiple methods. vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). --# **This is the easiest and recommended way to use this library:** - If you want to create a buffer or an image, allocate memory for it and bind +-# If you want to create a buffer or an image, allocate memory for it, and bind them together, all in one call, you can use function vmaCreateBuffer(), vmaCreateImage(). + This is the easiest and recommended way to use this library! When using 3. or 4., the library internally queries Vulkan for memory types supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) @@ -17849,6 +16738,7 @@ vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer \endcode For more examples of creating different kinds of resources, see chapter \ref usage_patterns. +See also: @ref memory_mapping. Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, @@ -17860,7 +16750,7 @@ memory type, as described below. Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, `VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) are still available and work same way as in previous versions of the library -for backward compatibility, but they are not recommended. +for backward compatibility, but they are deprecated. \section choosing_memory_type_required_preferred_flags Required and preferred flags @@ -17890,8 +16780,8 @@ plus some extra "magic" (heuristics). \section choosing_memory_type_explicit_memory_types Explicit memory types -If you inspected memory types available on the physical device and you have -a preference for memory types that you want to use, you can fill member +If you inspected memory types available on the physical device and you have +a preference for memory types that you want to use, you can fill member VmaAllocationCreateInfo::memoryTypeBits. It is a bit mask, where each bit set means that a memory type with that index is allowed to be used for the allocation. Special value 0, just like `UINT32_MAX`, means there are no @@ -17913,6 +16803,21 @@ VmaAllocation allocation; vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); \endcode +You can also use this parameter to exclude some memory types. +If you inspect memory heaps and types available on the current physical device and +you determine that for some reason you don't want to use a specific memory type for the allocation, +you can enable automatic memory type selection but exclude certain memory type or types +by setting all bits of `memoryTypeBits` to 1 except the ones you choose. + +\code +// ... +uint32_t excludedMemoryTypeIndex = 2; +VmaAllocationCreateInfo allocInfo = {}; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocInfo.memoryTypeBits = ~(1u << excludedMemoryTypeIndex); +// ... +\endcode + \section choosing_memory_type_custom_memory_pools Custom memory pools @@ -17951,6 +16856,7 @@ You can use them directly with memory allocated by this library, but it is not recommended because of following issue: Mapping the same `VkDeviceMemory` block multiple times is illegal - only one mapping at a time is allowed. This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. +It is also not thread-safe. Because of this, Vulkan Memory Allocator provides following facilities: \note If you want to be able to map an allocation, you need to specify one of the flags @@ -17958,11 +16864,44 @@ Because of this, Vulkan Memory Allocator provides following facilities: in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, -but they can still be used for consistency. +but these flags can still be used for consistency. + +\section memory_mapping_copy_functions Copy functions + +The easiest way to copy data from a host pointer to an allocation is to use convenience function vmaCopyMemoryToAllocation(). +It automatically maps the Vulkan memory temporarily (if not already mapped), performs `memcpy`, +and calls `vkFlushMappedMemoryRanges` (if required - if memory type is not `HOST_COHERENT`). + +It is also the safest one, because using `memcpy` avoids a risk of accidentally introducing memory reads +(e.g. by doing `pMappedVectors[i] += v`), which may be very slow on memory types that are not `HOST_CACHED`. + +\code +struct ConstantBuffer +{ + ... +}; +ConstantBuffer constantBufferData = ... + +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = sizeof(ConstantBuffer); +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer buf; +VmaAllocation alloc; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); + +vmaCopyMemoryToAllocation(allocator, &constantBufferData, alloc, 0, sizeof(ConstantBuffer)); +\endcode + +Copy in the other direction - from an allocation to a host pointer can be performed the same way using function vmaCopyAllocationToMemory(). \section memory_mapping_mapping_functions Mapping functions -The library provides following functions for mapping of a specific #VmaAllocation: vmaMapMemory(), vmaUnmapMemory(). +The library provides following functions for mapping of a specific allocation: vmaMapMemory(), vmaUnmapMemory(). They are safer and more convenient to use than standard Vulkan functions. You can map an allocation multiple times simultaneously - mapping is reference-counted internally. You can also map different allocations simultaneously regardless of whether they use the same `VkDeviceMemory` block. @@ -18223,6 +17162,12 @@ vkDestroyImage(allocator, img2, nullptr); vkDestroyImage(allocator, img1, nullptr); \endcode +VMA also provides convenience functions that create a buffer or image and bind it to memory +represented by an existing #VmaAllocation: +vmaCreateAliasingBuffer(), vmaCreateAliasingBuffer2(), +vmaCreateAliasingImage(), vmaCreateAliasingImage2(). +Versions with "2" offer additional parameter `allocationLocalOffset`. + Remember that using resources that alias in memory requires proper synchronization. You need to issue a memory barrier to make sure commands that use `img1` and `img2` don't overlap on GPU timeline. @@ -18252,6 +17197,7 @@ A memory pool contains a number of `VkDeviceMemory` blocks. The library automatically creates and manages default pool for each memory type available on the device. Default memory pool automatically grows in size. Size of allocated blocks is also variable and managed automatically. +You are using default pools whenever you leave VmaAllocationCreateInfo::pool = null. You can create custom pool and allocate memory out of it. It can be useful if you want to: @@ -18323,13 +17269,6 @@ It is supported only when VmaPoolCreateInfo::blockSize = 0. To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. -\note Excessive use of custom pools is a common mistake when using this library. -Custom pools may be useful for special purposes - when you want to -keep certain type of resources separate e.g. to reserve minimum amount of memory -for them or limit maximum amount of memory they can occupy. For most -resources this is not needed and so it is not recommended to create #VmaPool -objects and allocations out of them. Allocating from the default pool is sufficient. - \section custom_memory_pools_MemTypeIndex Choosing memory type index @@ -18364,6 +17303,51 @@ When creating buffers/images allocated in that pool, provide following parameter - VmaAllocationCreateInfo: You don't need to pass same parameters. Fill only `pool` member. Other members are ignored anyway. + +\section custom_memory_pools_when_not_use When not to use custom pools + +Custom pools are commonly overused by VMA users. +While it may feel natural to keep some logical groups of resources separate in memory, +in most cases it does more harm than good. +Using custom pool shouldn't be your first choice. +Instead, please make all allocations from default pools first and only use custom pools +if you can prove and measure that it is beneficial in some way, +e.g. it results in lower memory usage, better performance, etc. + +Using custom pools has disadvantages: + +- Each pool has its own collection of `VkDeviceMemory` blocks. + Some of them may be partially or even completely empty. + Spreading allocations across multiple pools increases the amount of wasted (allocated but unbound) memory. +- You must manually choose specific memory type to be used by a custom pool (set as VmaPoolCreateInfo::memoryTypeIndex). + When using default pools, best memory type for each of your allocations can be selected automatically + using a carefully design algorithm that works across all kinds of GPUs. +- If an allocation from a custom pool at specific memory type fails, entire allocation operation returns failure. + When using default pools, VMA tries another compatible memory type. +- If you set VmaPoolCreateInfo::blockSize != 0, each memory block has the same size, + while default pools start from small blocks and only allocate next blocks larger and larger + up to the preferred block size. + +Many of the common concerns can be addressed in a different way than using custom pools: + +- If you want to keep your allocations of certain size (small versus large) or certain lifetime (transient versus long lived) + separate, you likely don't need to. + VMA uses a high quality allocation algorithm that manages memory well in various cases. + Please measure and check if using custom pools provides a benefit. +- If you want to keep your images and buffers separate, you don't need to. + VMA respects `bufferImageGranularity` limit automatically. +- If you want to keep your mapped and not mapped allocations separate, you don't need to. + VMA respects `nonCoherentAtomSize` limit automatically. + It also maps only those `VkDeviceMemory` blocks that need to map any allocation. + It even tries to keep mappable and non-mappable allocations in separate blocks to minimize the amount of mapped memory. +- If you want to choose a custom size for the default memory block, you can set it globally instead + using VmaAllocatorCreateInfo::preferredLargeHeapBlockSize. +- If you want to select specific memory type for your allocation, + you can set VmaAllocationCreateInfo::memoryTypeBits to `(1u << myMemoryTypeIndex)` instead. +- If you need to create a buffer with certain minimum alignment, you can still do it + using default pools with dedicated function vmaCreateBufferWithAlignment(). + + \section linear_algorithm Linear allocation algorithm Each Vulkan memory block managed by this library has accompanying metadata that @@ -18487,7 +17471,7 @@ for(;;) { // Inspect pass.pMoves[i].srcAllocation, identify what buffer/image it represents. VmaAllocationInfo allocInfo; - vmaGetAllocationInfo(allocator, pMoves[i].srcAllocation, &allocInfo); + vmaGetAllocationInfo(allocator, pass.pMoves[i].srcAllocation, &allocInfo); MyEngineResourceData* resData = (MyEngineResourceData*)allocInfo.pUserData; // Recreate and bind this buffer/image at: pass.pMoves[i].dstMemory, pass.pMoves[i].dstOffset. @@ -18495,7 +17479,7 @@ for(;;) VkImage newImg; res = vkCreateImage(device, &imgCreateInfo, nullptr, &newImg); // Check res... - res = vmaBindImageMemory(allocator, pMoves[i].dstTmpAllocation, newImg); + res = vmaBindImageMemory(allocator, pass.pMoves[i].dstTmpAllocation, newImg); // Check res... // Issue a vkCmdCopyBuffer/vkCmdCopyImage to copy its content to the new place. @@ -18974,22 +17958,35 @@ Margin validation (corruption detection) works only for memory types that are `HOST_VISIBLE` and `HOST_COHERENT`. -\page opengl_interop OpenGL Interop +\section debugging_memory_usage_leak_detection Leak detection features -VMA provides some features that help with interoperability with OpenGL. +At allocation and allocator destruction time VMA checks for unfreed and unmapped blocks using +`VMA_ASSERT_LEAK()`. This macro defaults to an assertion, triggering a typically fatal error in Debug +builds, and doing nothing in Release builds. You can provide your own definition of `VMA_ASSERT_LEAK()` +to change this behavior. + +At memory block destruction time VMA lists out all unfreed allocations using the `VMA_LEAK_LOG_FORMAT()` +macro, which defaults to `VMA_DEBUG_LOG_FORMAT`, which in turn defaults to a no-op. +If you're having trouble with leaks - for example, the aforementioned assertion triggers, but you don't +quite know \em why -, overriding this macro to print out the the leaking blocks, combined with assigning +individual names to allocations using vmaSetAllocationName(), can greatly aid in fixing them. + +\page other_api_interop Interop with other graphics APIs + +VMA provides some features that help with interoperability with other graphics APIs, e.g. OpenGL. \section opengl_interop_exporting_memory Exporting memory -If you want to attach `VkExportMemoryAllocateInfoKHR` structure to `pNext` chain of memory allocations made by the library: +If you want to attach `VkExportMemoryAllocateInfoKHR` or other structure to `pNext` chain of memory allocations made by the library: -It is recommended to create \ref custom_memory_pools for such allocations. +You can create \ref custom_memory_pools for such allocations. Define and fill in your `VkExportMemoryAllocateInfoKHR` structure and attach it to VmaPoolCreateInfo::pMemoryAllocateNext while creating the custom pool. Please note that the structure must remain alive and unchanged for the whole lifetime of the #VmaPool, not only while creating it, as no copy of the structure is made, but its original pointer is used for each allocation instead. -If you want to export all memory allocated by the library from certain memory types, +If you want to export all memory allocated by VMA from certain memory types, also dedicated allocations or other allocations made from default pools, an alternative solution is to fill in VmaAllocatorCreateInfo::pTypeExternalMemoryHandleTypes. It should point to an array with `VkExternalMemoryHandleTypeFlagsKHR` to be automatically passed by the library @@ -19006,7 +18003,7 @@ Buffers or images exported to a different API like OpenGL may require a differen higher than the one used by the library automatically, queried from functions like `vkGetBufferMemoryRequirements`. To impose such alignment: -It is recommended to create \ref custom_memory_pools for such allocations. +You can create \ref custom_memory_pools for such allocations. Set VmaPoolCreateInfo::minAllocationAlignment member to the minimum alignment required for each allocation to be made out of this pool. The alignment actually used will be the maximum of this member and the alignment returned for the specific buffer or image @@ -19017,8 +18014,17 @@ use special function vmaCreateBufferWithAlignment(), which takes additional para Note the problem of alignment affects only resources placed inside bigger `VkDeviceMemory` blocks and not dedicated allocations, as these, by definition, always have alignment = 0 because the resource is bound to the beginning of its dedicated block. +You can ensure that an allocation is created as dedicated by using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entire memory block passed on its allocation. +\section opengl_interop_extended_allocation_information Extended allocation information + +If you want to rely on VMA to allocate your buffers and images inside larger memory blocks, +but you need to know the size of the entire block and whether the allocation was made +with its own dedicated memory, use function vmaGetAllocationInfo2() to retrieve +extended allocation information in structure #VmaAllocationInfo2. + + \page usage_patterns Recommended usage patterns @@ -19188,7 +18194,8 @@ allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +VkResult result = vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); +// Check result... VkMemoryPropertyFlags memPropFlags; vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); @@ -19199,10 +18206,24 @@ if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) // [Executed in runtime]: memcpy(allocInfo.pMappedData, myData, myDataSize); + result = vmaFlushAllocation(allocator, alloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = buf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); } else { - // Allocation ended up in a non-mappable memory - need to transfer. + // Allocation ended up in a non-mappable memory - a transfer using a staging buffer is required. VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; stagingBufCreateInfo.size = 65536; stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; @@ -19215,17 +18236,46 @@ else VkBuffer stagingBuf; VmaAllocation stagingAlloc; VmaAllocationInfo stagingAllocInfo; - vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, - &stagingBuf, &stagingAlloc, stagingAllocInfo); + result = vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, &stagingAllocInfo); + // Check result... // [Executed in runtime]: memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); - //vkCmdPipelineBarrier: VK_ACCESS_HOST_WRITE_BIT --> VK_ACCESS_TRANSFER_READ_BIT + result = vmaFlushAllocation(allocator, stagingAlloc, 0, VK_WHOLE_SIZE); + // Check result... + + VkBufferMemoryBarrier bufMemBarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + bufMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + bufMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier.buffer = stagingBuf; + bufMemBarrier.offset = 0; + bufMemBarrier.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + VkBufferCopy bufCopy = { 0, // srcOffset 0, // dstOffset, - myDataSize); // size + myDataSize, // size + }; + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); + + VkBufferMemoryBarrier bufMemBarrier2 = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER }; + bufMemBarrier2.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufMemBarrier2.dstAccessMask = VK_ACCESS_UNIFORM_READ_BIT; // We created a uniform buffer + bufMemBarrier2.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + bufMemBarrier2.buffer = buf; + bufMemBarrier2.offset = 0; + bufMemBarrier2.size = VK_WHOLE_SIZE; + + vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + 0, 0, nullptr, 1, &bufMemBarrier2, 0, nullptr); } \endcode @@ -19258,14 +18308,22 @@ Please check "CONFIGURATION SECTION" in the code to find macros that you can def before each include of this file or change directly in this file to provide your own implementation of basic facilities like assert, `min()` and `max()` functions, mutex, atomic etc. -The library uses its own implementation of containers by default, but you can switch to using -STL containers instead. For example, define `VMA_ASSERT(expr)` before including the library to provide custom implementation of the assertion, compatible with your project. By default it is defined to standard C `assert(expr)` in `_DEBUG` configuration and empty otherwise. +Similarly, you can define `VMA_LEAK_LOG_FORMAT` macro to enable printing of leaked (unfreed) allocations, +including their names and other parameters. Example: + +\code +#define VMA_LEAK_LOG_FORMAT(format, ...) do { \ + printf((format), __VA_ARGS__); \ + printf("\n"); \ + } while(false) +\endcode + \section config_Vulkan_functions Pointers to Vulkan functions There are multiple ways to import pointers to Vulkan functions in the library. @@ -19458,7 +18516,7 @@ vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullpt `priority` member is ignored in the following situations: - Allocations created in custom pools: They inherit the priority, along with all other allocation parameters - from the parametrs passed in #VmaPoolCreateInfo when the pool was created. + from the parameters passed in #VmaPoolCreateInfo when the pool was created. - Allocations created in default pools: They inherit the priority from the parameters VMA used when creating default pools, which means `priority == 0.5f`. diff --git a/src/util/vulkan_loader.h b/src/util/vulkan_loader.h index 7fe0ac0a3..d760cdbdf 100644 --- a/src/util/vulkan_loader.h +++ b/src/util/vulkan_loader.h @@ -78,6 +78,7 @@ class Error; #pragma clang diagnostic ignored "-Wnullability-completeness" #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wunused-function" #elif defined(_MSC_VER) #pragma warning(push, 0) #endif