diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index f12cb65d9..33bdaf889 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -240,12 +240,20 @@ bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) { for (auto it = access_watches_.begin(); it != access_watches_.end(); ++it) { auto entry = *it; if ((entry->address <= physical_address && - entry->address + entry->length > physical_address) || - (entry->address >= physical_address && - entry->address < physical_address + length)) { - // This watch lies within the range. + entry->address + entry->length > physical_address + length)) { + // This range lies entirely within this watch. return true; } + + // TODO(DrChat): Check if the range is partially covered, and subtract the + // covered portion if it is. + if ((entry->address <= physical_address && + entry->address + entry->length > physical_address)) { + // The beginning of range lies partially within this watch. + } else if ((entry->address < physical_address + length && + entry->address + entry->length > physical_address + length)) { + // The ending of this range lies partially within this watch. + } } return false; diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index e68a2e276..e61cd1c20 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -77,7 +77,7 @@ class MMIOHandler { // Fires and clears any access watches that overlap this range. void InvalidateRange(uint32_t physical_address, size_t length); - // Returns true if /any/ part of this range is watched. + // Returns true if /all/ of this range is watched. bool IsRangeWatched(uint32_t physical_address, size_t length); protected: diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index 9aa649742..3ab07abd5 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -47,6 +47,10 @@ XE_GPU_REGISTER(0x0D04, kDword, SQ_EO_RT) XE_GPU_REGISTER(0x0C85, kDword, PA_CL_ENHANCE) +// Set with WAIT_UNTIL = WAIT_3D_IDLECLEAN +XE_GPU_REGISTER(0x0E00, kDword, UNKNOWN_0E00) +XE_GPU_REGISTER(0x0E40, kDword, UNKNOWN_0E40) + XE_GPU_REGISTER(0x0E42, kDword, UNKNOWN_0E42) XE_GPU_REGISTER(0x0F01, kDword, RB_BC_CONTROL) diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 72b2f2d39..64dc3121b 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -16,10 +16,80 @@ #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" +#include "third_party/vulkan/vk_mem_alloc.h" + namespace xe { namespace gpu { namespace vulkan { +#if XE_ARCH_AMD64 +void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr, + uint16_t cmp_value, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); + __m128i shufmask = + _mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07, + 0x04, 0x05, 0x02, 0x03, 0x00, 0x01); + __m128i cmpval = _mm_set1_epi16(cmp_value); + + size_t i; + for (i = 0; i + 8 <= count; i += 8) { + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + __m128i output = _mm_shuffle_epi8(input, shufmask); + + __m128i mask = _mm_cmpeq_epi16(output, cmpval); + output = _mm_or_si128(output, mask); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); + } + for (; i < count; ++i) { // handle residual elements + dest[i] = byte_swap(src[i]); + } +} + +void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr, + uint32_t cmp_value, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); + __m128i shufmask = + _mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05, + 0x06, 0x07, 0x00, 0x01, 0x02, 0x03); + __m128i cmpval = _mm_set1_epi32(cmp_value); + + size_t i; + for (i = 0; i + 4 <= count; i += 4) { + __m128i input = _mm_loadu_si128(reinterpret_cast(&src[i])); + __m128i output = _mm_shuffle_epi8(input, shufmask); + + __m128i mask = _mm_cmpeq_epi32(output, cmpval); + output = _mm_or_si128(output, mask); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output); + } + for (; i < count; ++i) { // handle residual elements + dest[i] = byte_swap(src[i]); + } +} +#else +void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr, + uint16_t cmp_value, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); + for (size_t i = 0; i < count; ++i) { + uint16_t value = byte_swap(src[i]); + dest[i] = value == cmp_value ? 0xFFFF : value; + } +} + +void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr, + uint32_t cmp_value, size_t count) { + auto dest = reinterpret_cast(dest_ptr); + auto src = reinterpret_cast(src_ptr); + for (size_t i = 0; i < count; ++i) { + uint32_t value = byte_swap(src[i]); + dest[i] = value == cmp_value ? 0xFFFFFFFF : value; + } +} +#endif + using xe::ui::vulkan::CheckResult; constexpr VkDeviceSize kConstantRegisterUniformRange = @@ -32,7 +102,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory, device_, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - capacity); + capacity, 4096); } BufferCache::~BufferCache() { Shutdown(); } @@ -47,6 +117,15 @@ VkResult BufferCache::Initialize() { return status; } + // Create a memory allocator for textures. + VmaAllocatorCreateInfo alloc_info = { + 0, *device_, *device_, 0, 0, nullptr, nullptr, + }; + status = vmaCreateAllocator(&alloc_info, &mem_allocator_); + if (status != VK_SUCCESS) { + return status; + } + // Descriptor pool used for all of our cached descriptors. // In the steady state we don't allocate anything, so these are all manually // managed. @@ -150,28 +229,23 @@ VkResult BufferCache::Initialize() { } void BufferCache::Shutdown() { + if (mem_allocator_) { + vmaDestroyAllocator(mem_allocator_); + mem_allocator_ = nullptr; + } + if (transient_descriptor_set_) { vkFreeDescriptorSets(*device_, descriptor_pool_, 1, &transient_descriptor_set_); transient_descriptor_set_ = nullptr; } - if (descriptor_set_layout_) { - vkDestroyDescriptorSetLayout(*device_, descriptor_set_layout_, nullptr); - descriptor_set_layout_ = nullptr; - } - - if (descriptor_pool_) { - vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr); - descriptor_pool_ = nullptr; - } + VK_SAFE_DESTROY(vkDestroyDescriptorSetLayout, *device_, + descriptor_set_layout_, nullptr); + VK_SAFE_DESTROY(vkDestroyDescriptorPool, *device_, descriptor_pool_, nullptr); transient_buffer_->Shutdown(); - - if (gpu_memory_pool_) { - vkFreeMemory(*device_, gpu_memory_pool_, nullptr); - gpu_memory_pool_ = nullptr; - } + VK_SAFE_DESTROY(vkFreeMemory, *device_, gpu_memory_pool_, nullptr); } std::pair BufferCache::UploadConstantRegisters( @@ -278,13 +352,8 @@ std::pair BufferCache::UploadConstantRegisters( std::pair BufferCache::UploadIndexBuffer( VkCommandBuffer command_buffer, uint32_t source_addr, uint32_t source_length, IndexFormat format, VkFence fence) { - auto offset = FindCachedTransientData(source_addr, source_length); - if (offset != VK_WHOLE_SIZE) { - return {transient_buffer_->gpu_buffer(), offset}; - } - // Allocate space in the buffer for our data. - offset = AllocateTransientData(source_length, fence); + auto offset = AllocateTransientData(source_length, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return {nullptr, VK_WHOLE_SIZE}; @@ -292,17 +361,36 @@ std::pair BufferCache::UploadIndexBuffer( const void* source_ptr = memory_->TranslatePhysical(source_addr); - // Copy data into the buffer. - // TODO(benvanik): get min/max indices and pass back? + uint32_t prim_reset_index = + register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32; + bool prim_reset_enabled = + !!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)); + + // Copy data into the buffer. If primitive reset is enabled, translate any + // primitive reset indices to something Vulkan understands. // TODO(benvanik): memcpy then use compute shaders to swap? - if (format == IndexFormat::kInt16) { - // Endian::k8in16, swap half-words. - xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 2); - } else if (format == IndexFormat::kInt32) { - // Endian::k8in32, swap words. - xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 4); + if (prim_reset_enabled) { + if (format == IndexFormat::kInt16) { + // Endian::k8in16, swap half-words. + copy_cmp_swap_16_unaligned( + transient_buffer_->host_base() + offset, source_ptr, + static_cast(prim_reset_index), source_length / 2); + } else if (format == IndexFormat::kInt32) { + // Endian::k8in32, swap words. + copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset, + source_ptr, prim_reset_index, + source_length / 4); + } + } else { + if (format == IndexFormat::kInt16) { + // Endian::k8in16, swap half-words. + xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 2); + } else if (format == IndexFormat::kInt32) { + // Endian::k8in32, swap words. + xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, + source_ptr, source_length / 4); + } } transient_buffer_->Flush(offset, source_length); @@ -323,7 +411,6 @@ std::pair BufferCache::UploadIndexBuffer( VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr); - CacheTransientData(source_addr, source_length, offset); return {transient_buffer_->gpu_buffer(), offset}; } @@ -335,29 +422,41 @@ std::pair BufferCache::UploadVertexBuffer( return {transient_buffer_->gpu_buffer(), offset}; } + // Slow path :) + // Expand the region up to the allocation boundary + auto physical_heap = memory_->GetPhysicalHeap(); + uint32_t upload_base = source_addr; + uint32_t upload_size = source_length; + + // Ping the memory subsystem for allocation size. + // TODO(DrChat): Artifacting occurring in GripShift with this enabled. + // physical_heap->QueryBaseAndSize(&upload_base, &upload_size); + assert(upload_base <= source_addr); + uint32_t source_offset = source_addr - upload_base; + // Allocate space in the buffer for our data. - offset = AllocateTransientData(source_length, fence); + offset = AllocateTransientData(upload_size, fence); if (offset == VK_WHOLE_SIZE) { // OOM. return {nullptr, VK_WHOLE_SIZE}; } - const void* source_ptr = memory_->TranslatePhysical(source_addr); + const void* upload_ptr = memory_->TranslatePhysical(upload_base); // Copy data into the buffer. // TODO(benvanik): memcpy then use compute shaders to swap? if (endian == Endian::k8in32) { // Endian::k8in32, swap words. xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset, - source_ptr, source_length / 4); + upload_ptr, source_length / 4); } else if (endian == Endian::k16in32) { xe::copy_and_swap_16_in_32_unaligned( - transient_buffer_->host_base() + offset, source_ptr, source_length / 4); + transient_buffer_->host_base() + offset, upload_ptr, source_length / 4); } else { assert_always(); } - transient_buffer_->Flush(offset, source_length); + transient_buffer_->Flush(offset, upload_size); // Append a barrier to the command buffer. VkBufferMemoryBarrier barrier = { @@ -369,14 +468,14 @@ std::pair BufferCache::UploadVertexBuffer( VK_QUEUE_FAMILY_IGNORED, transient_buffer_->gpu_buffer(), offset, - source_length, + upload_size, }; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1, &barrier, 0, nullptr); - CacheTransientData(source_addr, source_length, offset); - return {transient_buffer_->gpu_buffer(), offset}; + CacheTransientData(upload_base, upload_size, offset); + return {transient_buffer_->gpu_buffer(), offset + source_offset}; } VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length, @@ -409,10 +508,22 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length, VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address, uint32_t guest_length) { - uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address); - auto it = transient_cache_.find(key); - if (it != transient_cache_.end()) { - return it->second; + if (transient_cache_.empty()) { + // Short-circuit exit. + return VK_WHOLE_SIZE; + } + + // Find the first element > guest_address + auto it = transient_cache_.upper_bound(guest_address); + if (it != transient_cache_.begin()) { + // it = first element <= guest_address + --it; + + if ((it->first + it->second.first) >= (guest_address + guest_length)) { + // This data is contained within some existing transient data. + auto source_offset = static_cast(guest_address - it->first); + return it->second.second + source_offset; + } } return VK_WHOLE_SIZE; @@ -421,8 +532,17 @@ VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address, void BufferCache::CacheTransientData(uint32_t guest_address, uint32_t guest_length, VkDeviceSize offset) { - uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address); - transient_cache_[key] = offset; + transient_cache_[guest_address] = {guest_length, offset}; + + // Erase any entries contained within + auto it = transient_cache_.upper_bound(guest_address); + while (it != transient_cache_.end()) { + if ((guest_address + guest_length) >= (it->first + it->second.first)) { + it = transient_cache_.erase(it); + } else { + break; + } + } } void BufferCache::Flush(VkCommandBuffer command_buffer) { diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index ffaa8b8fd..2f321f26f 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -18,6 +18,8 @@ #include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan_device.h" +#include "third_party/vulkan/vk_mem_alloc.h" + #include namespace xe { @@ -95,6 +97,15 @@ class BufferCache { void Scavenge(); private: + // This represents an uploaded vertex buffer. + struct VertexBuffer { + uint32_t guest_address; + uint32_t size; + + VmaAllocation alloc; + VmaAllocationInfo alloc_info; + }; + // Allocates a block of memory in the transient buffer. // When memory is not available fences are checked and space is reclaimed. // Returns VK_WHOLE_SIZE if requested amount of memory is not available. @@ -115,11 +126,12 @@ class BufferCache { ui::vulkan::VulkanDevice* device_ = nullptr; VkDeviceMemory gpu_memory_pool_ = nullptr; + VmaAllocator mem_allocator_ = nullptr; // Staging ringbuffer we cycle through fast. Used for data we don't // plan on keeping past the current frame. std::unique_ptr transient_buffer_ = nullptr; - std::map transient_cache_; + std::map> transient_cache_; VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorSetLayout descriptor_set_layout_ = nullptr; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 6aace62fe..f36755e79 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -1202,16 +1202,12 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); // } + // Primitive restart index is handled in the buffer cache. if (regs.pa_su_sc_mode_cntl & (1 << 21)) { state_info.primitiveRestartEnable = VK_TRUE; } else { state_info.primitiveRestartEnable = VK_FALSE; } - // TODO(benvanik): no way to specify in Vulkan? - assert_true(regs.multi_prim_ib_reset_index == 0xFFFF || - regs.multi_prim_ib_reset_index == 0xFFFFFF || - regs.multi_prim_ib_reset_index == 0xFFFFFFFF); - // glPrimitiveRestartIndex(regs.multi_prim_ib_reset_index); return UpdateStatus::kMismatch; } diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9bab7fda5..6112d9a3d 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -860,14 +860,13 @@ bool VulkanCommandProcessor::PopulateVertexBuffers( // TODO: Make the buffer cache ... actually cache buffers. We can have // a list of buffers that were cached, and store those in chunks in a // multiple of the host's page size. - // WRITE WATCHES: We need to invalidate vertex buffers if they're written - // to. Since most vertex buffers aren't aligned to a page boundary, this - // means a watch may cover more than one vertex buffer. - // We need to maintain a list of write watches, and what memory ranges - // they cover. If a vertex buffer lies within a write watch's range, assign - // it to the watch. If there's partial alignment where a buffer lies within - // one watch and outside of it, should we create a new watch or extend the - // existing watch? + // So, we need to track all vertex buffers in a sorted map, and track all + // write watches in a sorted map. When a vertex buffer is uploaded, track + // all untracked pages with 1-page write watches. In the callback, + // invalidate any overlapping vertex buffers. + // + // We would keep the old transient buffer as a staging buffer, and upload + // to a GPU-only buffer that tracks all cached vertex buffers. auto buffer_ref = buffer_cache_->UploadVertexBuffer( current_setup_buffer_, physical_address, source_length, static_cast(fetch->endian), current_batch_fence_); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index e397e53eb..0ef461683 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -273,14 +273,11 @@ dword_result_t NtQueryVirtualMemory( return X_STATUS_INVALID_PARAMETER; } - memory_basic_information_ptr->base_address = - static_cast(alloc_info.base_address); - memory_basic_information_ptr->allocation_base = - static_cast(alloc_info.allocation_base); + memory_basic_information_ptr->base_address = alloc_info.base_address; + memory_basic_information_ptr->allocation_base = alloc_info.allocation_base; memory_basic_information_ptr->allocation_protect = ToXdkProtectFlags(alloc_info.allocation_protect); - memory_basic_information_ptr->region_size = - static_cast(alloc_info.region_size); + memory_basic_information_ptr->region_size = alloc_info.region_size; uint32_t x_state = 0; if (alloc_info.state & kMemoryAllocationReserve) { x_state |= X_MEM_RESERVE; @@ -290,7 +287,7 @@ dword_result_t NtQueryVirtualMemory( } memory_basic_information_ptr->state = x_state; memory_basic_information_ptr->protect = ToXdkProtectFlags(alloc_info.protect); - memory_basic_information_ptr->type = alloc_info.type; + memory_basic_information_ptr->type = X_MEM_PRIVATE; return X_STATUS_SUCCESS; } diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 87f135682..cb2553fe1 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -339,6 +339,8 @@ BaseHeap* Memory::LookupHeapByType(bool physical, uint32_t page_size) { } } +VirtualHeap* Memory::GetPhysicalHeap() { return &heaps_.physical; } + void Memory::Zero(uint32_t address, uint32_t size) { std::memset(TranslateVirtual(address), 0, size); } @@ -1096,16 +1098,19 @@ bool BaseHeap::QueryRegionInfo(uint32_t base_address, out_info->region_size = 0; out_info->state = 0; out_info->protect = 0; - out_info->type = 0; if (start_page_entry.state) { // Committed/reserved region. out_info->allocation_base = start_page_entry.base_address * page_size_; out_info->allocation_protect = start_page_entry.allocation_protect; + out_info->allocation_size = start_page_entry.region_page_count * page_size_; out_info->state = start_page_entry.state; out_info->protect = start_page_entry.current_protect; - out_info->type = 0x20000; + + // Scan forward and report the size of the region matching the initial + // base address's attributes. for (uint32_t page_number = start_page_number; - page_number < start_page_number + start_page_entry.region_page_count; + page_number < + start_page_entry.base_address + start_page_entry.region_page_count; ++page_number) { auto page_entry = page_table_[page_number]; if (page_entry.base_address != start_page_entry.base_address || @@ -1144,6 +1149,20 @@ bool BaseHeap::QuerySize(uint32_t address, uint32_t* out_size) { return true; } +bool BaseHeap::QueryBaseAndSize(uint32_t* in_out_address, uint32_t* out_size) { + uint32_t page_number = (*in_out_address - heap_base_) / page_size_; + if (page_number > page_table_.size()) { + XELOGE("BaseHeap::QuerySize base page out of range"); + *out_size = 0; + return false; + } + auto global_lock = global_critical_region_.Acquire(); + auto page_entry = page_table_[page_number]; + *in_out_address = (page_entry.base_address * page_size_); + *out_size = (page_entry.region_page_count * page_size_); + return true; +} + bool BaseHeap::QueryProtect(uint32_t address, uint32_t* out_protect) { uint32_t page_number = (address - heap_base_) / page_size_; if (page_number > page_table_.size()) { diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 7ca057a1d..4309ded05 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -56,6 +56,8 @@ struct HeapAllocationInfo { uint32_t allocation_base; // The memory protection option when the region was initially allocated. uint32_t allocation_protect; + // The size specified when the region was initially allocated, in bytes. + uint32_t allocation_size; // The size of the region beginning at the base address in which all pages // have identical attributes, in bytes. uint32_t region_size; @@ -63,8 +65,6 @@ struct HeapAllocationInfo { uint32_t state; // The access protection of the pages in the region. uint32_t protect; - // The type of pages in the region (private). - uint32_t type; }; // Describes a single page in the page table. @@ -144,6 +144,9 @@ class BaseHeap { // Queries the size of the region containing the given address. bool QuerySize(uint32_t address, uint32_t* out_size); + // Queries the base and size of a region containing the given address. + bool QueryBaseAndSize(uint32_t* in_out_address, uint32_t* out_size); + // Queries the current protection mode of the region containing the given // address. bool QueryProtect(uint32_t address, uint32_t* out_protect); @@ -332,6 +335,9 @@ class Memory { // Gets the heap with the given properties. BaseHeap* LookupHeapByType(bool physical, uint32_t page_size); + // Gets the physical base heap. + VirtualHeap* GetPhysicalHeap(); + // Dumps a map of all allocated memory to the log. void DumpMap(); diff --git a/src/xenia/ui/vulkan/circular_buffer.cc b/src/xenia/ui/vulkan/circular_buffer.cc index 0347413cc..05f3f7917 100644 --- a/src/xenia/ui/vulkan/circular_buffer.cc +++ b/src/xenia/ui/vulkan/circular_buffer.cc @@ -42,7 +42,7 @@ CircularBuffer::CircularBuffer(VulkanDevice* device, VkBufferUsageFlags usage, VkMemoryRequirements reqs; vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs); - alignment_ = reqs.alignment; + alignment_ = xe::round_up(alignment, reqs.alignment); } CircularBuffer::~CircularBuffer() { Shutdown(); } diff --git a/src/xenia/ui/vulkan/vulkan_util.h b/src/xenia/ui/vulkan/vulkan_util.h index 62419429e..ba93ff132 100644 --- a/src/xenia/ui/vulkan/vulkan_util.h +++ b/src/xenia/ui/vulkan/vulkan_util.h @@ -26,10 +26,14 @@ namespace ui { namespace vulkan { #define VK_SAFE_DESTROY(fn, dev, obj, alloc) \ - if (obj) { \ - fn(dev, obj, alloc); \ - obj = nullptr; \ - } + \ + do { \ + if (obj) { \ + fn(dev, obj, alloc); \ + obj = nullptr; \ + } \ + \ + } while (0) class Fence { public: