From 06e1456d4fd56c7ea8271bc994fbeb3f2ee2a929 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 12 Oct 2019 13:47:25 +0200 Subject: [PATCH] vulkan: memory allocator --- core/hw/pvr/Renderer_if.cpp | 6 +- core/hw/pvr/Renderer_if.h | 1 - core/rend/TexCache.cpp | 91 ++++-------- core/rend/TexCache.h | 53 ++++++- core/rend/gles/gltex.cpp | 15 -- core/rend/vulkan/allocator.cpp | 3 + core/rend/vulkan/allocator.h | 199 +++++++++++++++++++++++++++ core/rend/vulkan/buffer.cpp | 10 +- core/rend/vulkan/buffer.h | 58 +++++--- core/rend/vulkan/drawer.cpp | 7 +- core/rend/vulkan/drawer.h | 36 +++-- core/rend/vulkan/pipeline.h | 37 ++--- core/rend/vulkan/texture.cpp | 32 +++-- core/rend/vulkan/texture.h | 15 +- core/rend/vulkan/utils.h | 1 + core/rend/vulkan/vulkan_context.cpp | 4 +- core/rend/vulkan/vulkan_renderer.cpp | 19 ++- 17 files changed, 426 insertions(+), 161 deletions(-) create mode 100644 core/rend/vulkan/allocator.cpp create mode 100644 core/rend/vulkan/allocator.h diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index feab6794d..bd001f59e 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -450,8 +450,10 @@ void* rend_thread(void* p) bool pend_rend = false; -void rend_resize(int width, int height) { - renderer->Resize(width, height); +void rend_resize(int width, int height) +{ + if (renderer != nullptr) + renderer->Resize(width, height); } diff --git a/core/hw/pvr/Renderer_if.h b/core/hw/pvr/Renderer_if.h index d6a79d9d7..4981e97b3 100644 --- a/core/hw/pvr/Renderer_if.h +++ b/core/hw/pvr/Renderer_if.h @@ -18,7 +18,6 @@ void *rend_thread(void *); void rend_set_fb_scale(float x,float y); void rend_resize(int width, int height); -void rend_text_invl(vram_block* bl); /////// extern TA_context* _pvrrc; diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 760796797..5c33e82d3 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -479,7 +479,7 @@ bool BaseTextureCacheData::Delete() if (lock_block) libCore_vramlock_Unlock_block(lock_block); - lock_block=0; + lock_block = nullptr; delete[] custom_image_data; @@ -489,19 +489,20 @@ bool BaseTextureCacheData::Delete() void BaseTextureCacheData::Create() { //Reset state info .. - Lookups=0; - Updates=0; - dirty=FrameCount; + Lookups = 0; + Updates = 0; + dirty = FrameCount; lock_block = nullptr; custom_image_data = nullptr; + custom_load_in_progress = 0; //decode info from tsp/tcw into the texture struct - tex=&format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt]; //texture format table entry + tex = &format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt]; //texture format table entry - sa_tex = (tcw.TexAddr<<3) & VRAM_MASK; //texture start address - sa = sa_tex; //data texture start address (modified for MIPs, as needed) - w=8<bpp == 4) @@ -541,37 +542,37 @@ void BaseTextureCacheData::Create() texconv = tex->PL; texconv32 = tex->PL32; //calculate the size, in bytes, for the locking - size=stride*h*tex->bpp/8; + size = stride * h * tex->bpp / 8; } else { // Quake 3 Arena uses one. Not sure if valid but no need to crash - //verify(w==h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN* + //verify(w == h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN* if (tcw.VQ_Comp) { verify(tex->VQ != NULL || tex->VQ32 != NULL); vq_codebook = sa; if (tcw.MipMapped) - sa+=MipPoint[tsp.TexU]; + sa += MipPoint[tsp.TexU]; texconv = tex->VQ; texconv32 = tex->VQ32; - size=w*h/8; + size = w * h / 8; } else { verify(tex->TW != NULL || tex->TW32 != NULL); if (tcw.MipMapped) - sa+=MipPoint[tsp.TexU]*tex->bpp/2; + sa += MipPoint[tsp.TexU] * tex->bpp / 2; texconv = tex->TW; texconv32 = tex->TW32; - size=w*h*tex->bpp/8; + size = w * h * tex->bpp / 8; } } break; default: WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt); - size=w*h*2; + size = w * h * 2; texconv = NULL; texconv32 = NULL; } @@ -731,47 +732,7 @@ void BaseTextureCacheData::CheckCustomTexture() } } -static std::unordered_map> TexCache; -typedef std::unordered_map>::iterator TexCacheIter; - -// Only use TexU and TexV from TSP in the cache key -// TexV : 7, TexU : 7 -static const TSP TSPTextureCacheMask = { { 7, 7 } }; -// TexAddr : 0x1FFFFF, Reserved : 0, StrideSel : 0, ScanOrder : 1, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 -static const TCW TCWTextureCacheMask = { { 0x1FFFFF, 0, 0, 1, 7, 1, 1 } }; - -BaseTextureCacheData *getTextureCacheData(TSP tsp, TCW tcw, BaseTextureCacheData *(*factory)()) -{ - u64 key = tsp.full & TSPTextureCacheMask.full; - if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) - // Paletted textures have a palette selection that must be part of the key - // We also add the palette type to the key to avoid thrashing the cache - // when the palette type is changed. If the palette type is changed back in the future, - // this texture will stil be available. - key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6); - else - key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32; - - TexCacheIter tx = TexCache.find(key); - - BaseTextureCacheData* tf; - if (tx != TexCache.end()) - { - tf = tx->second.get(); - // Needed if the texture is updated - tf->tcw.StrideSel = tcw.StrideSel; - } - else //create if not existing - { - tf = factory(); - TexCache[key] = std::unique_ptr(tf); - - tf->tsp = tsp; - tf->tcw = tcw; - } - - return tf; -} +std::unordered_map> TexCache; void CollectCleanup() { @@ -788,12 +749,10 @@ void CollectCleanup() break; } - for (u64 id : list) { + for (u64 id : list) + { if (TexCache[id]->Delete()) - { - //printf("Deleting %d\n", TexCache[list[i]].texID); TexCache.erase(id); - } } } @@ -974,5 +933,13 @@ void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst) } dst += (stride - width * 2) / 2; } - +} + +void rend_text_invl(vram_block* bl) +{ + BaseTextureCacheData* tcd = (BaseTextureCacheData*)bl->userdata; + tcd->dirty = FrameCount; + tcd->lock_block = nullptr; + + libCore_vramlock_Unlock_block_wb(bl); } diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index c74830ee3..a8b146917 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -1,5 +1,7 @@ #pragma once #include +#include +#include #include "oslib/oslib.h" #include "hw/pvr/pvr_regs.h" #include "hw/pvr/ta_structs.h" @@ -664,8 +666,8 @@ struct BaseTextureCacheData u32 texture_hash; // xxhash of texture data, used for custom textures u32 old_texture_hash; // legacy hash u8* custom_image_data; // loaded custom image data - volatile u32 custom_width; - volatile u32 custom_height; + u32 custom_width; + u32 custom_height; std::atomic_int custom_load_in_progress; void PrintTextureName(); @@ -701,9 +703,54 @@ struct BaseTextureCacheData virtual bool Delete(); virtual ~BaseTextureCacheData() {} }; -BaseTextureCacheData *getTextureCacheData(TSP tsp, TCW tcw, BaseTextureCacheData *(*factory)()); + +extern std::unordered_map> TexCache; +typedef std::unordered_map>::iterator TexCacheIter; + +// Only use TexU and TexV from TSP in the cache key +// TexV : 7, TexU : 7 +const TSP TSPTextureCacheMask = { { 7, 7 } }; +// TexAddr : 0x1FFFFF, Reserved : 0, StrideSel : 0, ScanOrder : 1, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 +const TCW TCWTextureCacheMask = { { 0x1FFFFF, 0, 0, 1, 7, 1, 1 } }; + +template +BaseTextureCacheData *getTextureCacheData(TSP tsp, TCW tcw, Func factory) +{ + u64 key = tsp.full & TSPTextureCacheMask.full; + if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) + // Paletted textures have a palette selection that must be part of the key + // We also add the palette type to the key to avoid thrashing the cache + // when the palette type is changed. If the palette type is changed back in the future, + // this texture will stil be available. + key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6); + else + key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32; + + TexCacheIter it = TexCache.find(key); + + BaseTextureCacheData* texture; + if (it != TexCache.end()) + { + texture = it->second.get(); + // Needed if the texture is updated + texture->tcw.StrideSel = tcw.StrideSel; + } + else //create if not existing + { + texture = factory(); + TexCache[key] = std::unique_ptr(texture); + + texture->tsp = tsp; + texture->tcw = tcw; + } + texture->Lookups++; + + return texture; +} + void CollectCleanup(); void killtex(); +void rend_text_invl(vram_block* bl); void ReadFramebuffer(PixelBuffer& pb, int& width, int& height); void WriteTextureToVRam(u32 width, u32 height, u8 *data, u16 *dst); diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 182850093..11d64bd13 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -369,9 +369,6 @@ u64 gl_GetTexture(TSP tsp, TCW tcw) // TexCacheHits = 0; // } - //update state for opts/stuff - tf->Lookups++; - //return gl texture return tf->texID; } @@ -391,9 +388,6 @@ text_info raw_GetTexture(TSP tsp, TCW tcw) if (tf->NeedsUpdate()) tf->Update(); - //update state for opts/stuff - tf->Lookups++; - //return gl texture rv.height = tf->h; rv.width = tf->w; @@ -408,15 +402,6 @@ void DoCleanup() { } -void rend_text_invl(vram_block* bl) -{ - TextureCacheData* tcd = (TextureCacheData*)bl->userdata; - tcd->dirty=FrameCount; - tcd->lock_block=0; - - libCore_vramlock_Unlock_block_wb(bl); -} - GLuint fbTextureId; void RenderFramebuffer() diff --git a/core/rend/vulkan/allocator.cpp b/core/rend/vulkan/allocator.cpp new file mode 100644 index 000000000..ddb03e30a --- /dev/null +++ b/core/rend/vulkan/allocator.cpp @@ -0,0 +1,3 @@ +#include "allocator.h" + +SimpleAllocator SimpleAllocator::instance; diff --git a/core/rend/vulkan/allocator.h b/core/rend/vulkan/allocator.h new file mode 100644 index 000000000..f06894f4a --- /dev/null +++ b/core/rend/vulkan/allocator.h @@ -0,0 +1,199 @@ +#pragma once +#include +#include +#include "vulkan.h" +#include "utils.h" + +class VulkanAllocator; + +// Manages a large chunk of memory using buddy memory allocation algorithm +class Chunk +{ +public: + Chunk(vk::DeviceSize size) + { + verify(size >= SmallestBlockSize); + freeBlocks.push_back(std::make_pair(0, PowerOf2(size))); + } + +private: + vk::DeviceSize PowerOf2(vk::DeviceSize size) + { + vk::DeviceSize alignedSize = SmallestBlockSize; + while (alignedSize < size) + alignedSize *= 2; + return alignedSize; + } + + vk::DeviceSize Allocate(vk::DeviceSize size, vk::DeviceSize alignment) + { + alignment--; + const vk::DeviceSize alignedSize = PowerOf2(size); + + auto smallestFreeBlock = freeBlocks.end(); + for (auto it = freeBlocks.begin(); it != freeBlocks.end(); it++) + { + if (it->second == alignedSize && (it->first & alignment) == 0) + { + // Free block of the right size and alignment found -> return it + usedBlocks.insert(*it); + vk::DeviceSize offset = it->first; + freeBlocks.erase(it); + + return offset; + } + if (it->second > alignedSize && (it->first & alignment) == 0 + && (smallestFreeBlock == freeBlocks.end() || smallestFreeBlock->second > it->second)) + smallestFreeBlock = it; + } + if (smallestFreeBlock == freeBlocks.end()) + return OutOfMemory; + + // We need to split larger blocks until we have one of the required size + vk::DeviceSize offset = smallestFreeBlock->first; + smallestFreeBlock->second /= 2; + smallestFreeBlock->first += smallestFreeBlock->second; + while (smallestFreeBlock->second > alignedSize) + { + freeBlocks.push_front(std::make_pair(offset + smallestFreeBlock->second / 2, smallestFreeBlock->second / 2)); + smallestFreeBlock = freeBlocks.begin(); + } + usedBlocks[offset] = alignedSize; + + return offset; + } + + void Free(vk::DeviceSize offset) + { + auto usedBlock = usedBlocks.find(offset); + verify(usedBlock != usedBlocks.end()); + vk::DeviceSize buddyOffset = offset ^ usedBlock->second; + vk::DeviceSize buddySize = usedBlock->second; + auto buddy = freeBlocks.end(); + while (true) + { + auto it = freeBlocks.begin(); + for (; it != freeBlocks.end(); it++) + { + if (it->first == buddyOffset && it->second == buddySize) + { + it->second *= 2; + it->first &= ~(it->second - 1); + if (buddy != freeBlocks.end()) + freeBlocks.erase(buddy); + buddy = it; + buddyOffset = it->first ^ buddy->second; + buddySize = it->second; + break; + } + } + if (buddy == freeBlocks.end()) + { + // Initial order buddy not found -> add block to free list + freeBlocks.push_front(std::make_pair(offset, usedBlock->second)); + break; + } + if (it == freeBlocks.end()) + break; + } + usedBlocks.erase(usedBlock); + } + + // first object/key is offset, second one/value is size + std::list> freeBlocks; + std::unordered_map usedBlocks; + vk::UniqueDeviceMemory deviceMemory; + + static const vk::DeviceSize OutOfMemory = (vk::DeviceSize)-1; + static const vk::DeviceSize SmallestBlockSize = 256; + + friend class VulkanAllocator; +}; + +class Allocator +{ +public: + virtual ~Allocator() {} + virtual vk::DeviceSize Allocate(vk::DeviceSize size, vk::DeviceSize alignment, u32 memoryType, vk::DeviceMemory& deviceMemory) = 0; + virtual void Free(vk::DeviceSize offset, u32 memoryType, vk::DeviceMemory deviceMemory) = 0; +}; + +class VulkanAllocator : public Allocator +{ +public: + vk::DeviceSize Allocate(vk::DeviceSize size, vk::DeviceSize alignment, u32 memoryType, vk::DeviceMemory& deviceMemory) override + { + std::vector& chunks = findChunks(memoryType); + for (auto& chunk : chunks) + { + vk::DeviceSize offset = chunk.Allocate(size, alignment); + if (offset != Chunk::OutOfMemory) + { + deviceMemory = *chunk.deviceMemory; + return offset; + } + } + const vk::DeviceSize newChunkSize = std::max(chunkSize, size); + chunks.emplace_back(newChunkSize); + Chunk& chunk = chunks.back(); + chunk.deviceMemory = VulkanContext::Instance()->GetDevice()->allocateMemoryUnique(vk::MemoryAllocateInfo(newChunkSize, memoryType)); + vk::DeviceSize offset = chunk.Allocate(size, alignment); + verify(offset != Chunk::OutOfMemory); + + deviceMemory = *chunk.deviceMemory; + return offset; + } + + void Free(vk::DeviceSize offset, u32 memoryType, vk::DeviceMemory deviceMemory) override + { + std::vector& chunks = findChunks(memoryType); + for (auto chunkIt = chunks.begin(); chunkIt < chunks.end(); chunkIt++) + { + if (*chunkIt->deviceMemory == deviceMemory) + { + chunkIt->Free(offset); + if (chunks.size() > 1 && chunkIt->usedBlocks.empty()) + { + chunks.erase(chunkIt); + } + return; + } + } + die("Invalid free"); + } + + void SetChunkSize(vk::DeviceSize chunkSize) { + this->chunkSize = chunkSize; + } + +private: + std::vector& findChunks(u32 memoryType) + { + for (auto& pair : chunksPerMemType) + if (pair.first == memoryType) + return pair.second; + chunksPerMemType.push_back(std::make_pair(memoryType, std::vector())); + return chunksPerMemType.back().second; + } + + vk::DeviceSize chunkSize; + std::vector>> chunksPerMemType; +}; + +class SimpleAllocator : public Allocator +{ +public: + vk::DeviceSize Allocate(vk::DeviceSize size, vk::DeviceSize alignment, u32 memoryType, vk::DeviceMemory& deviceMemory) override + { + deviceMemory = VulkanContext::Instance()->GetDevice()->allocateMemory(vk::MemoryAllocateInfo(size, memoryType)); + + return 0; + } + + void Free(vk::DeviceSize offset, u32 memoryType, vk::DeviceMemory deviceMemory) override + { + VulkanContext::Instance()->GetDevice()->free(deviceMemory); + } + + static SimpleAllocator instance; +}; diff --git a/core/rend/vulkan/buffer.cpp b/core/rend/vulkan/buffer.cpp index 7fdff22b0..5285a9b2c 100644 --- a/core/rend/vulkan/buffer.cpp +++ b/core/rend/vulkan/buffer.cpp @@ -22,13 +22,15 @@ #include "utils.h" BufferData::BufferData(vk::PhysicalDevice const& physicalDevice, vk::Device const& device, vk::DeviceSize size, - vk::BufferUsageFlags usage, vk::MemoryPropertyFlags propertyFlags) - : m_size(size) + vk::BufferUsageFlags usage, Allocator *allocator, vk::MemoryPropertyFlags propertyFlags) + : device(device), bufferSize(size), allocator(allocator) #if !defined(NDEBUG) , m_usage(usage), m_propertyFlags(propertyFlags) #endif { buffer = device.createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), size, usage)); - deviceMemory = allocateMemory(device, physicalDevice.getMemoryProperties(), device.getBufferMemoryRequirements(buffer.get()), propertyFlags); - device.bindBufferMemory(buffer.get(), deviceMemory.get(), 0); + vk::MemoryRequirements memoryRequirements = device.getBufferMemoryRequirements(buffer.get()); + memoryType = findMemoryType(physicalDevice.getMemoryProperties(), memoryRequirements.memoryTypeBits, propertyFlags); + offset = allocator->Allocate(memoryRequirements.size, memoryRequirements.alignment, memoryType, sharedDeviceMemory); + device.bindBufferMemory(buffer.get(), sharedDeviceMemory, offset); } diff --git a/core/rend/vulkan/buffer.h b/core/rend/vulkan/buffer.h index 671f4c052..6a9d24399 100644 --- a/core/rend/vulkan/buffer.h +++ b/core/rend/vulkan/buffer.h @@ -20,52 +20,72 @@ */ #pragma once #include "vulkan.h" +#include "allocator.h" struct BufferData { BufferData(vk::PhysicalDevice const& physicalDevice, vk::Device const& device, vk::DeviceSize size, vk::BufferUsageFlags usage, + Allocator *allocator = &SimpleAllocator::instance, vk::MemoryPropertyFlags propertyFlags = vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); - - void upload(vk::Device const& device, u32 size, const void *data, u32 offset = 0) const + ~BufferData() { - verify((m_propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) && (m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); - verify(offset + size <= m_size); - - void* dataPtr = device.mapMemory(*this->deviceMemory, offset, size); - memcpy(dataPtr, data, size); - device.unmapMemory(*this->deviceMemory); + buffer.reset(); + allocator->Free(offset, memoryType, sharedDeviceMemory); } - void upload(vk::Device const& device, size_t count, u32 *sizes, const void **data, u32 offset = 0) const + void upload(vk::Device const& device, u32 size, const void *data, u32 bufOffset = 0) const + { + verify((m_propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) && (m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); + verify(offset + bufOffset + size <= bufferSize); + + void* dataPtr = device.mapMemory(sharedDeviceMemory, offset + bufOffset, size); + memcpy(dataPtr, data, size); + device.unmapMemory(sharedDeviceMemory); + } + + void upload(vk::Device const& device, size_t count, u32 *sizes, const void **data, u32 bufOffset = 0) const { verify((m_propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) && (m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); u32 totalSize = 0; for (int i = 0; i < count; i++) totalSize += sizes[i]; - verify(offset + totalSize <= m_size); - void* dataPtr = device.mapMemory(*this->deviceMemory, offset, totalSize); + verify(offset + bufOffset + totalSize <= bufferSize); + void* dataPtr = device.mapMemory(sharedDeviceMemory, offset + bufOffset, totalSize); for (int i = 0; i < count; i++) { memcpy(dataPtr, data[i], sizes[i]); dataPtr = (u8 *)dataPtr + sizes[i]; } - device.unmapMemory(*this->deviceMemory); + device.unmapMemory(sharedDeviceMemory); } - void download(vk::Device const& device, u32 size, void *data, u32 offset = 0) const + void download(vk::Device const& device, u32 size, void *data, u32 bufOffset = 0) const { verify((m_propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) && (m_propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)); - verify(offset + size <= m_size); + verify(offset + bufOffset + size <= bufferSize); - void* dataPtr = device.mapMemory(*this->deviceMemory, offset, size); + void* dataPtr = device.mapMemory(sharedDeviceMemory, offset + bufOffset, size); memcpy(data, dataPtr, size); - device.unmapMemory(*this->deviceMemory); + device.unmapMemory(sharedDeviceMemory); } - vk::UniqueDeviceMemory deviceMemory; - vk::UniqueBuffer buffer; - vk::DeviceSize m_size; + void *MapMemory() + { + return device.mapMemory(sharedDeviceMemory, offset, bufferSize); + } + void UnmapMemory() + { + device.unmapMemory(sharedDeviceMemory); + } + + vk::UniqueBuffer buffer; + vk::DeviceSize bufferSize; + Allocator *allocator; + vk::DeviceSize offset; + u32 memoryType; + vk::DeviceMemory sharedDeviceMemory; + vk::Device device; #if !defined(NDEBUG) private: diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index dda7c1441..b8c7b024b 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -494,8 +494,8 @@ vk::CommandBuffer TextureDrawer::BeginRenderPass() for (tsp.TexU = 0; tsp.TexU <= 7 && (8 << tsp.TexU) < origWidth; tsp.TexU++); for (tsp.TexV = 0; tsp.TexV <= 7 && (8 << tsp.TexV) < origHeight; tsp.TexV++); - texture = static_cast(getTextureCacheData(tsp, tcw, [](){ - return (BaseTextureCacheData *)new Texture(VulkanContext::Instance()->GetPhysicalDevice(), *VulkanContext::Instance()->GetDevice()); + texture = static_cast(getTextureCacheData(tsp, tcw, [this](){ + return (BaseTextureCacheData *)new Texture(VulkanContext::Instance()->GetPhysicalDevice(), *VulkanContext::Instance()->GetDevice(), this->texAllocator); })); if (texture->IsNew()) texture->Create(); @@ -600,9 +600,6 @@ void TextureDrawer::EndRenderPass() } //memset(&vram[fb_rtt.TexAddr << 3], '\0', size); - if (width > 1024 || height > 1024) - return; - texture->dirty = 0; if (texture->lock_block == NULL) texture->lock_block = libCore_vramlock_Lock(texture->sa_tex, texture->sa + texture->size - 1, texture); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index 7f90e3cf2..ec10614ba 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -35,7 +35,7 @@ public: bool Draw(const Texture *fogTexture); protected: - virtual void Init(SamplerManager *samplerManager, ShaderManager *shaderManager) + void Init(SamplerManager *samplerManager, ShaderManager *shaderManager) { this->samplerManager = samplerManager; pipelineManager->Init(shaderManager); @@ -72,16 +72,20 @@ private: class ScreenDrawer : public Drawer { public: - void Init(SamplerManager *samplerManager, ShaderManager *shaderManager) override + void Init(SamplerManager *samplerManager, ShaderManager *shaderManager) { - pipelineManager = std::unique_ptr(new PipelineManager()); + if (!pipelineManager) + pipelineManager = std::unique_ptr(new PipelineManager()); Drawer::Init(samplerManager, shaderManager); - while (descriptorSets.size() < GetContext()->GetSwapChainSize()) - { - descriptorSets.push_back(DescriptorSets()); - descriptorSets.back().Init(samplerManager, pipelineManager->GetPipelineLayout(), pipelineManager->GetPerFrameDSLayout(), pipelineManager->GetPerPolyDSLayout()); - } + if (descriptorSets.size() > GetContext()->GetSwapChainSize()) + descriptorSets.resize(GetContext()->GetSwapChainSize()); + else + while (descriptorSets.size() < GetContext()->GetSwapChainSize()) + { + descriptorSets.push_back(DescriptorSets()); + descriptorSets.back().Init(samplerManager, pipelineManager->GetPipelineLayout(), pipelineManager->GetPerFrameDSLayout(), pipelineManager->GetPerPolyDSLayout()); + } } protected: @@ -95,12 +99,12 @@ protected: std::max(512 * 1024u, size), vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eUniformBuffer))); } - else if (mainBuffers[GetCurrentImage()]->m_size < size) + else if (mainBuffers[GetCurrentImage()]->bufferSize < size) { - u32 newSize = mainBuffers[GetCurrentImage()]->m_size; + u32 newSize = mainBuffers[GetCurrentImage()]->bufferSize; while (newSize < size) newSize *= 2; - INFO_LOG(RENDERER, "Increasing main buffer size %d -> %d", (u32)mainBuffers[GetCurrentImage()]->m_size, newSize); + INFO_LOG(RENDERER, "Increasing main buffer size %d -> %d", (u32)mainBuffers[GetCurrentImage()]->bufferSize, newSize); mainBuffers[GetCurrentImage()] = std::unique_ptr(new BufferData(GetContext()->GetPhysicalDevice(), GetContext()->GetDevice().get(), newSize, vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eUniformBuffer)); } @@ -131,13 +135,14 @@ private: class TextureDrawer : public Drawer { public: - void Init(SamplerManager *samplerManager, ShaderManager *shaderManager) override + void Init(SamplerManager *samplerManager, ShaderManager *shaderManager, VulkanAllocator *texAllocator) { pipelineManager = std::unique_ptr(new RttPipelineManager()); Drawer::Init(samplerManager, shaderManager); descriptorSets.Init(samplerManager, pipelineManager->GetPipelineLayout(), pipelineManager->GetPerFrameDSLayout(), pipelineManager->GetPerPolyDSLayout()); fence = GetContext()->GetDevice()->createFenceUnique(vk::FenceCreateInfo()); + this->texAllocator = texAllocator; } void SetCommandPool(CommandPool *commandPool) { this->commandPool = commandPool; } @@ -148,12 +153,12 @@ protected: virtual BufferData* GetMainBuffer(u32 size) override { - if (!mainBuffer || mainBuffer->m_size < size) + if (!mainBuffer || mainBuffer->bufferSize < size) { - u32 newSize = mainBuffer ? mainBuffer->m_size : 128 * 1024u; + u32 newSize = mainBuffer ? mainBuffer->bufferSize : 128 * 1024u; while (newSize < size) newSize *= 2; - INFO_LOG(RENDERER, "Increasing RTT main buffer size %d -> %d", !mainBuffer ? 0 : (u32)mainBuffer->m_size, newSize); + INFO_LOG(RENDERER, "Increasing RTT main buffer size %d -> %d", !mainBuffer ? 0 : (u32)mainBuffer->bufferSize, newSize); mainBuffer = std::unique_ptr(new BufferData(GetContext()->GetPhysicalDevice(), *GetContext()->GetDevice(), newSize, vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eUniformBuffer)); } @@ -176,4 +181,5 @@ private: DescriptorSets descriptorSets; std::unique_ptr mainBuffer; CommandPool *commandPool; + VulkanAllocator *texAllocator; }; diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index 8b688b50f..1aa43fa8c 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -130,23 +130,26 @@ public: { this->shaderManager = shaderManager; - // Descriptor set and pipeline layout - vk::DescriptorSetLayoutBinding perFrameBindings[] = { - { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms - { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // fragment uniforms - { 2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// fog texture - }; - vk::DescriptorSetLayoutBinding perPolyBindings[] = { - { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture - }; - perFrameLayout = GetContext()->GetDevice()->createDescriptorSetLayoutUnique( - vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); - perPolyLayout = GetContext()->GetDevice()->createDescriptorSetLayoutUnique( - vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings)); - vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout }; - vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 20); - pipelineLayout = GetContext()->GetDevice()->createPipelineLayoutUnique( - vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant)); + if (!perFrameLayout) + { + // Descriptor set and pipeline layout + vk::DescriptorSetLayoutBinding perFrameBindings[] = { + { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms + { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // fragment uniforms + { 2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// fog texture + }; + vk::DescriptorSetLayoutBinding perPolyBindings[] = { + { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture + }; + perFrameLayout = GetContext()->GetDevice()->createDescriptorSetLayoutUnique( + vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); + perPolyLayout = GetContext()->GetDevice()->createDescriptorSetLayoutUnique( + vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings)); + vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout }; + vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 20); + pipelineLayout = GetContext()->GetDevice()->createPipelineLayoutUnique( + vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant)); + } renderPass = VulkanContext::Instance()->GetRenderPass(); } diff --git a/core/rend/vulkan/texture.cpp b/core/rend/vulkan/texture.cpp index 69a7adac1..5152aff08 100644 --- a/core/rend/vulkan/texture.cpp +++ b/core/rend/vulkan/texture.cpp @@ -169,7 +169,6 @@ void Texture::Init(u32 width, u32 height, vk::Format format) { this->extent = vk::Extent2D(width, height); this->format = format; - vk::PhysicalDeviceMemoryProperties memoryProperties = physicalDevice.getMemoryProperties(); vk::FormatProperties formatProperties = physicalDevice.getFormatProperties(format); vk::FormatFeatureFlags formatFeatureFlags = vk::FormatFeatureFlagBits::eSampledImage; @@ -182,10 +181,14 @@ void Texture::Init(u32 width, u32 height, vk::Format format) if (needsStaging) { verify((formatProperties.optimalTilingFeatures & formatFeatureFlags) == formatFeatureFlags); - stagingBufferData = std::unique_ptr(new BufferData(physicalDevice, device, extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc)); + if (allocator) + stagingBufferData = std::unique_ptr(new BufferData(physicalDevice, device, extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc, allocator)); + else + stagingBufferData = std::unique_ptr(new BufferData(physicalDevice, device, extent.width * extent.height * 4, vk::BufferUsageFlagBits::eTransferSrc)); imageTiling = vk::ImageTiling::eOptimal; usageFlags |= vk::ImageUsageFlagBits::eTransferDst; initialLayout = vk::ImageLayout::eUndefined; + requirements = vk::MemoryPropertyFlagBits::eDeviceLocal; } else { @@ -193,8 +196,7 @@ void Texture::Init(u32 width, u32 height, vk::Format format) initialLayout = vk::ImageLayout::ePreinitialized; requirements = vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostVisible; } - CreateImage(imageTiling, usageFlags, initialLayout, requirements, - vk::ImageAspectFlagBits::eColor); + CreateImage(imageTiling, usageFlags, initialLayout, requirements, vk::ImageAspectFlagBits::eColor); } void Texture::CreateImage(vk::ImageTiling tiling, vk::ImageUsageFlags usage, vk::ImageLayout initialLayout, @@ -206,10 +208,19 @@ void Texture::CreateImage(vk::ImageTiling tiling, vk::ImageUsageFlags usage, vk: image = device.createImageUnique(imageCreateInfo); vk::MemoryRequirements memReq = device.getImageMemoryRequirements(image.get()); - u32 memoryTypeIndex = findMemoryType(physicalDevice.getMemoryProperties(), memReq.memoryTypeBits, memoryProperties); - deviceMemory = device.allocateMemoryUnique(vk::MemoryAllocateInfo(memReq.size, memoryTypeIndex)); + memoryType = findMemoryType(physicalDevice.getMemoryProperties(), memReq.memoryTypeBits, memoryProperties); + if (allocator) + { + if (sharedDeviceMemory) + allocator->Free(memoryOffset, memoryType, sharedDeviceMemory); + memoryOffset = allocator->Allocate(memReq.size, memReq.alignment, memoryType, sharedDeviceMemory); + } + else + { + deviceMemory = device.allocateMemoryUnique(vk::MemoryAllocateInfo(memReq.size, memoryType)); + } - device.bindImageMemory(image.get(), deviceMemory.get(), 0); + device.bindImageMemory(image.get(), allocator ? sharedDeviceMemory : *deviceMemory, memoryOffset); vk::ImageViewCreateInfo imageViewCreateInfo(vk::ImageViewCreateFlags(), image.get(), vk::ImageViewType::e2D, format, vk::ComponentMapping(), vk::ImageSubresourceRange(aspectMask, 0, 1, 0, 1)); @@ -227,13 +238,13 @@ void Texture::SetImage(u32 srcSize, void *srcData, bool isNew) ? device.getBufferMemoryRequirements(stagingBufferData->buffer.get()).size : device.getImageMemoryRequirements(image.get()).size; void* data = needsStaging - ? device.mapMemory(stagingBufferData->deviceMemory.get(), 0, size) - : device.mapMemory(deviceMemory.get(), 0, size); + ? stagingBufferData->MapMemory() + : device.mapMemory(allocator ? sharedDeviceMemory : *deviceMemory, memoryOffset, size); memcpy(data, srcData, srcSize); - device.unmapMemory(needsStaging ? stagingBufferData->deviceMemory.get() : deviceMemory.get()); if (needsStaging) { + stagingBufferData->UnmapMemory(); // Since we're going to blit to the texture image, set its layout to eTransferDstOptimal setImageLayout(commandBuffer, image.get(), format, isNew ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferDstOptimal); @@ -244,6 +255,7 @@ void Texture::SetImage(u32 srcSize, void *srcData, bool isNew) } else { + device.unmapMemory(allocator ? sharedDeviceMemory : *deviceMemory); // If we can use the linear tiled image as a texture, just do it setImageLayout(commandBuffer, image.get(), format, vk::ImageLayout::ePreinitialized, vk::ImageLayout::eShaderReadOnlyOptimal); } diff --git a/core/rend/vulkan/texture.h b/core/rend/vulkan/texture.h index 3f180a0bd..9245ac5fd 100644 --- a/core/rend/vulkan/texture.h +++ b/core/rend/vulkan/texture.h @@ -29,9 +29,16 @@ void setImageLayout(vk::CommandBuffer const& commandBuffer, vk::Image image, vk: struct Texture : BaseTextureCacheData { - Texture(vk::PhysicalDevice physicalDevice, vk::Device device) - : physicalDevice(physicalDevice), device(device), format(vk::Format::eUndefined) + Texture(vk::PhysicalDevice physicalDevice, vk::Device device, VulkanAllocator *allocator = nullptr) + : physicalDevice(physicalDevice), device(device), format(vk::Format::eUndefined), allocator(allocator) {} + ~Texture() override + { + imageView.reset(); + image.reset(); + if (allocator) + allocator->Free(memoryOffset, memoryType, sharedDeviceMemory); + } void UploadToGPU(int width, int height, u8 *data) override; u64 GetIntId() { return (u64)reinterpret_cast(this); } std::string GetId() override { char s[20]; sprintf(s, "%p", this); return s; } @@ -57,6 +64,10 @@ private: vk::PhysicalDevice physicalDevice; vk::Device device; + VulkanAllocator *allocator; + vk::DeviceMemory sharedDeviceMemory; + u32 memoryType = 0; + vk::DeviceSize memoryOffset = 0; friend class TextureDrawer; }; diff --git a/core/rend/vulkan/utils.h b/core/rend/vulkan/utils.h index c87395721..d155c50c6 100644 --- a/core/rend/vulkan/utils.h +++ b/core/rend/vulkan/utils.h @@ -18,6 +18,7 @@ You should have received a copy of the GNU General Public License along with Flycast. If not, see . */ +#pragma once #include "vulkan.h" static inline u32 findMemoryType(vk::PhysicalDeviceMemoryProperties const& memoryProperties, u32 typeBits, vk::MemoryPropertyFlags requirementsMask) diff --git a/core/rend/vulkan/vulkan_context.cpp b/core/rend/vulkan/vulkan_context.cpp index 67d6884e8..45bcb7a87 100644 --- a/core/rend/vulkan/vulkan_context.cpp +++ b/core/rend/vulkan/vulkan_context.cpp @@ -371,8 +371,8 @@ void VulkanContext::CreateSwapChain() if (surfaceCapabilities.currentExtent.width == std::numeric_limits::max()) { // If the surface size is undefined, the size is set to the size of the images requested. - swapchainExtent.width = std::min(std::max(width, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width); - swapchainExtent.height = std::min(std::max(height, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height); + swapchainExtent.width = std::min(std::max(640u, surfaceCapabilities.minImageExtent.width), surfaceCapabilities.maxImageExtent.width); + swapchainExtent.height = std::min(std::max(480u, surfaceCapabilities.minImageExtent.height), surfaceCapabilities.maxImageExtent.height); } else { diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index b5222cce8..3761203d2 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -21,6 +21,7 @@ #include #include "vulkan.h" #include "hw/pvr/Renderer_if.h" +#include "allocator.h" #include "commandpool.h" #include "drawer.h" #include "shaders.h" @@ -36,7 +37,8 @@ public: shaderManager.Init(); texCommandPool.Init(); - textureDrawer.Init(&samplerManager, &shaderManager); + texAllocator.SetChunkSize(16 * 1024 * 1024); + textureDrawer.Init(&samplerManager, &shaderManager, &texAllocator); textureDrawer.SetCommandPool(&texCommandPool); screenDrawer.Init(&samplerManager, &shaderManager); @@ -45,6 +47,8 @@ public: void Resize(int w, int h) override { + texCommandPool.Init(); + screenDrawer.Init(&samplerManager, &shaderManager); } void Term() override @@ -52,15 +56,21 @@ public: printf("VulkanRenderer::Term\n"); GetContext()->WaitIdle(); killtex(); + fogTexture = nullptr; texCommandPool.Term(); shaderManager.Term(); } + void RenderFramebuffer() + { + // TODO + } + bool Process(TA_context* ctx) override { if (ctx->rend.isRenderFramebuffer) { - // TODO RenderFramebuffer(); + RenderFramebuffer(); return false; } @@ -96,8 +106,8 @@ public: virtual u64 GetTexture(TSP tsp, TCW tcw) override { - Texture* tf = static_cast(getTextureCacheData(tsp, tcw, [](){ - return (BaseTextureCacheData *)new Texture(VulkanContext::Instance()->GetPhysicalDevice(), *VulkanContext::Instance()->GetDevice()); + Texture* tf = static_cast(getTextureCacheData(tsp, tcw, [this](){ + return (BaseTextureCacheData *)new Texture(VulkanContext::Instance()->GetPhysicalDevice(), *VulkanContext::Instance()->GetDevice(), &this->texAllocator); })); if (tf->IsNew()) @@ -146,6 +156,7 @@ private: ShaderManager shaderManager; ScreenDrawer screenDrawer; TextureDrawer textureDrawer; + VulkanAllocator texAllocator; }; Renderer* rend_Vulkan()