Merge branch 'vtx_cache'
This commit is contained in:
commit
79308b477f
|
@ -240,12 +240,20 @@ bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) {
|
|||
for (auto it = access_watches_.begin(); it != access_watches_.end(); ++it) {
|
||||
auto entry = *it;
|
||||
if ((entry->address <= physical_address &&
|
||||
entry->address + entry->length > physical_address) ||
|
||||
(entry->address >= physical_address &&
|
||||
entry->address < physical_address + length)) {
|
||||
// This watch lies within the range.
|
||||
entry->address + entry->length > physical_address + length)) {
|
||||
// This range lies entirely within this watch.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO(DrChat): Check if the range is partially covered, and subtract the
|
||||
// covered portion if it is.
|
||||
if ((entry->address <= physical_address &&
|
||||
entry->address + entry->length > physical_address)) {
|
||||
// The beginning of range lies partially within this watch.
|
||||
} else if ((entry->address < physical_address + length &&
|
||||
entry->address + entry->length > physical_address + length)) {
|
||||
// The ending of this range lies partially within this watch.
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -77,7 +77,7 @@ class MMIOHandler {
|
|||
// Fires and clears any access watches that overlap this range.
|
||||
void InvalidateRange(uint32_t physical_address, size_t length);
|
||||
|
||||
// Returns true if /any/ part of this range is watched.
|
||||
// Returns true if /all/ of this range is watched.
|
||||
bool IsRangeWatched(uint32_t physical_address, size_t length);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -47,6 +47,10 @@ XE_GPU_REGISTER(0x0D04, kDword, SQ_EO_RT)
|
|||
|
||||
XE_GPU_REGISTER(0x0C85, kDword, PA_CL_ENHANCE)
|
||||
|
||||
// Set with WAIT_UNTIL = WAIT_3D_IDLECLEAN
|
||||
XE_GPU_REGISTER(0x0E00, kDword, UNKNOWN_0E00)
|
||||
XE_GPU_REGISTER(0x0E40, kDword, UNKNOWN_0E40)
|
||||
|
||||
XE_GPU_REGISTER(0x0E42, kDword, UNKNOWN_0E42)
|
||||
|
||||
XE_GPU_REGISTER(0x0F01, kDword, RB_BC_CONTROL)
|
||||
|
|
|
@ -16,10 +16,80 @@
|
|||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
#include "third_party/vulkan/vk_mem_alloc.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint16_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
|
||||
__m128i shufmask =
|
||||
_mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07,
|
||||
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
|
||||
__m128i cmpval = _mm_set1_epi16(cmp_value);
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 8 <= count; i += 8) {
|
||||
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output = _mm_shuffle_epi8(input, shufmask);
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi16(output, cmpval);
|
||||
output = _mm_or_si128(output, mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint32_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
|
||||
__m128i shufmask =
|
||||
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
|
||||
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
|
||||
__m128i cmpval = _mm_set1_epi32(cmp_value);
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i + 4 <= count; i += 4) {
|
||||
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
|
||||
__m128i output = _mm_shuffle_epi8(input, shufmask);
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32(output, cmpval);
|
||||
output = _mm_or_si128(output, mask);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
|
||||
}
|
||||
for (; i < count; ++i) { // handle residual elements
|
||||
dest[i] = byte_swap(src[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void copy_and_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint16_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
uint16_t value = byte_swap(src[i]);
|
||||
dest[i] = value == cmp_value ? 0xFFFF : value;
|
||||
}
|
||||
}
|
||||
|
||||
void copy_and_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
|
||||
uint32_t cmp_value, size_t count) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
uint32_t value = byte_swap(src[i]);
|
||||
dest[i] = value == cmp_value ? 0xFFFFFFFF : value;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
constexpr VkDeviceSize kConstantRegisterUniformRange =
|
||||
|
@ -32,7 +102,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
|||
device_,
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
capacity);
|
||||
capacity, 4096);
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() { Shutdown(); }
|
||||
|
@ -47,6 +117,15 @@ VkResult BufferCache::Initialize() {
|
|||
return status;
|
||||
}
|
||||
|
||||
// Create a memory allocator for textures.
|
||||
VmaAllocatorCreateInfo alloc_info = {
|
||||
0, *device_, *device_, 0, 0, nullptr, nullptr,
|
||||
};
|
||||
status = vmaCreateAllocator(&alloc_info, &mem_allocator_);
|
||||
if (status != VK_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Descriptor pool used for all of our cached descriptors.
|
||||
// In the steady state we don't allocate anything, so these are all manually
|
||||
// managed.
|
||||
|
@ -150,28 +229,23 @@ VkResult BufferCache::Initialize() {
|
|||
}
|
||||
|
||||
void BufferCache::Shutdown() {
|
||||
if (mem_allocator_) {
|
||||
vmaDestroyAllocator(mem_allocator_);
|
||||
mem_allocator_ = nullptr;
|
||||
}
|
||||
|
||||
if (transient_descriptor_set_) {
|
||||
vkFreeDescriptorSets(*device_, descriptor_pool_, 1,
|
||||
&transient_descriptor_set_);
|
||||
transient_descriptor_set_ = nullptr;
|
||||
}
|
||||
|
||||
if (descriptor_set_layout_) {
|
||||
vkDestroyDescriptorSetLayout(*device_, descriptor_set_layout_, nullptr);
|
||||
descriptor_set_layout_ = nullptr;
|
||||
}
|
||||
|
||||
if (descriptor_pool_) {
|
||||
vkDestroyDescriptorPool(*device_, descriptor_pool_, nullptr);
|
||||
descriptor_pool_ = nullptr;
|
||||
}
|
||||
VK_SAFE_DESTROY(vkDestroyDescriptorSetLayout, *device_,
|
||||
descriptor_set_layout_, nullptr);
|
||||
VK_SAFE_DESTROY(vkDestroyDescriptorPool, *device_, descriptor_pool_, nullptr);
|
||||
|
||||
transient_buffer_->Shutdown();
|
||||
|
||||
if (gpu_memory_pool_) {
|
||||
vkFreeMemory(*device_, gpu_memory_pool_, nullptr);
|
||||
gpu_memory_pool_ = nullptr;
|
||||
}
|
||||
VK_SAFE_DESTROY(vkFreeMemory, *device_, gpu_memory_pool_, nullptr);
|
||||
}
|
||||
|
||||
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||
|
@ -278,13 +352,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
|||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, IndexFormat format, VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
offset = AllocateTransientData(source_length, fence);
|
||||
auto offset = AllocateTransientData(source_length, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
|
@ -292,17 +361,36 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
|||
|
||||
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): get min/max indices and pass back?
|
||||
uint32_t prim_reset_index =
|
||||
register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
|
||||
bool prim_reset_enabled =
|
||||
!!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21));
|
||||
|
||||
// Copy data into the buffer. If primitive reset is enabled, translate any
|
||||
// primitive reset indices to something Vulkan understands.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
if (format == IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 2);
|
||||
} else if (format == IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
if (prim_reset_enabled) {
|
||||
if (format == IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
copy_cmp_swap_16_unaligned(
|
||||
transient_buffer_->host_base() + offset, source_ptr,
|
||||
static_cast<uint16_t>(prim_reset_index), source_length / 2);
|
||||
} else if (format == IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, prim_reset_index,
|
||||
source_length / 4);
|
||||
}
|
||||
} else {
|
||||
if (format == IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 2);
|
||||
} else if (format == IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
}
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, source_length);
|
||||
|
@ -323,7 +411,6 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
|||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
|
@ -335,29 +422,41 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
|||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
// Slow path :)
|
||||
// Expand the region up to the allocation boundary
|
||||
auto physical_heap = memory_->GetPhysicalHeap();
|
||||
uint32_t upload_base = source_addr;
|
||||
uint32_t upload_size = source_length;
|
||||
|
||||
// Ping the memory subsystem for allocation size.
|
||||
// TODO(DrChat): Artifacting occurring in GripShift with this enabled.
|
||||
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
|
||||
assert(upload_base <= source_addr);
|
||||
uint32_t source_offset = source_addr - upload_base;
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
offset = AllocateTransientData(source_length, fence);
|
||||
offset = AllocateTransientData(upload_size, fence);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
const void* source_ptr = memory_->TranslatePhysical(source_addr);
|
||||
const void* upload_ptr = memory_->TranslatePhysical(upload_base);
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
if (endian == Endian::k8in32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
|
||||
source_ptr, source_length / 4);
|
||||
upload_ptr, source_length / 4);
|
||||
} else if (endian == Endian::k16in32) {
|
||||
xe::copy_and_swap_16_in_32_unaligned(
|
||||
transient_buffer_->host_base() + offset, source_ptr, source_length / 4);
|
||||
transient_buffer_->host_base() + offset, upload_ptr, source_length / 4);
|
||||
} else {
|
||||
assert_always();
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, source_length);
|
||||
transient_buffer_->Flush(offset, upload_size);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
|
@ -369,14 +468,14 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
|||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
source_length,
|
||||
upload_size,
|
||||
};
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
CacheTransientData(upload_base, upload_size, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset + source_offset};
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length,
|
||||
|
@ -409,10 +508,22 @@ VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
|
|||
|
||||
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length) {
|
||||
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||
auto it = transient_cache_.find(key);
|
||||
if (it != transient_cache_.end()) {
|
||||
return it->second;
|
||||
if (transient_cache_.empty()) {
|
||||
// Short-circuit exit.
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
// Find the first element > guest_address
|
||||
auto it = transient_cache_.upper_bound(guest_address);
|
||||
if (it != transient_cache_.begin()) {
|
||||
// it = first element <= guest_address
|
||||
--it;
|
||||
|
||||
if ((it->first + it->second.first) >= (guest_address + guest_length)) {
|
||||
// This data is contained within some existing transient data.
|
||||
auto source_offset = static_cast<VkDeviceSize>(guest_address - it->first);
|
||||
return it->second.second + source_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_WHOLE_SIZE;
|
||||
|
@ -421,8 +532,17 @@ VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
|
|||
void BufferCache::CacheTransientData(uint32_t guest_address,
|
||||
uint32_t guest_length,
|
||||
VkDeviceSize offset) {
|
||||
uint64_t key = uint64_t(guest_length) << 32 | uint64_t(guest_address);
|
||||
transient_cache_[key] = offset;
|
||||
transient_cache_[guest_address] = {guest_length, offset};
|
||||
|
||||
// Erase any entries contained within
|
||||
auto it = transient_cache_.upper_bound(guest_address);
|
||||
while (it != transient_cache_.end()) {
|
||||
if ((guest_address + guest_length) >= (it->first + it->second.first)) {
|
||||
it = transient_cache_.erase(it);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
#include "third_party/vulkan/vk_mem_alloc.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace xe {
|
||||
|
@ -95,6 +97,15 @@ class BufferCache {
|
|||
void Scavenge();
|
||||
|
||||
private:
|
||||
// This represents an uploaded vertex buffer.
|
||||
struct VertexBuffer {
|
||||
uint32_t guest_address;
|
||||
uint32_t size;
|
||||
|
||||
VmaAllocation alloc;
|
||||
VmaAllocationInfo alloc_info;
|
||||
};
|
||||
|
||||
// Allocates a block of memory in the transient buffer.
|
||||
// When memory is not available fences are checked and space is reclaimed.
|
||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||
|
@ -115,11 +126,12 @@ class BufferCache {
|
|||
ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||
|
||||
VkDeviceMemory gpu_memory_pool_ = nullptr;
|
||||
VmaAllocator mem_allocator_ = nullptr;
|
||||
|
||||
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||
// plan on keeping past the current frame.
|
||||
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
|
||||
std::map<uint64_t, VkDeviceSize> transient_cache_;
|
||||
std::map<uint32_t, std::pair<uint32_t, VkDeviceSize>> transient_cache_;
|
||||
|
||||
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||
|
|
|
@ -1202,16 +1202,12 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState(
|
|||
// glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
|
||||
// }
|
||||
|
||||
// Primitive restart index is handled in the buffer cache.
|
||||
if (regs.pa_su_sc_mode_cntl & (1 << 21)) {
|
||||
state_info.primitiveRestartEnable = VK_TRUE;
|
||||
} else {
|
||||
state_info.primitiveRestartEnable = VK_FALSE;
|
||||
}
|
||||
// TODO(benvanik): no way to specify in Vulkan?
|
||||
assert_true(regs.multi_prim_ib_reset_index == 0xFFFF ||
|
||||
regs.multi_prim_ib_reset_index == 0xFFFFFF ||
|
||||
regs.multi_prim_ib_reset_index == 0xFFFFFFFF);
|
||||
// glPrimitiveRestartIndex(regs.multi_prim_ib_reset_index);
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
|
|
@ -860,14 +860,13 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
|
|||
// TODO: Make the buffer cache ... actually cache buffers. We can have
|
||||
// a list of buffers that were cached, and store those in chunks in a
|
||||
// multiple of the host's page size.
|
||||
// WRITE WATCHES: We need to invalidate vertex buffers if they're written
|
||||
// to. Since most vertex buffers aren't aligned to a page boundary, this
|
||||
// means a watch may cover more than one vertex buffer.
|
||||
// We need to maintain a list of write watches, and what memory ranges
|
||||
// they cover. If a vertex buffer lies within a write watch's range, assign
|
||||
// it to the watch. If there's partial alignment where a buffer lies within
|
||||
// one watch and outside of it, should we create a new watch or extend the
|
||||
// existing watch?
|
||||
// So, we need to track all vertex buffers in a sorted map, and track all
|
||||
// write watches in a sorted map. When a vertex buffer is uploaded, track
|
||||
// all untracked pages with 1-page write watches. In the callback,
|
||||
// invalidate any overlapping vertex buffers.
|
||||
//
|
||||
// We would keep the old transient buffer as a staging buffer, and upload
|
||||
// to a GPU-only buffer that tracks all cached vertex buffers.
|
||||
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
||||
current_setup_buffer_, physical_address, source_length,
|
||||
static_cast<Endian>(fetch->endian), current_batch_fence_);
|
||||
|
|
|
@ -273,14 +273,11 @@ dword_result_t NtQueryVirtualMemory(
|
|||
return X_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
memory_basic_information_ptr->base_address =
|
||||
static_cast<uint32_t>(alloc_info.base_address);
|
||||
memory_basic_information_ptr->allocation_base =
|
||||
static_cast<uint32_t>(alloc_info.allocation_base);
|
||||
memory_basic_information_ptr->base_address = alloc_info.base_address;
|
||||
memory_basic_information_ptr->allocation_base = alloc_info.allocation_base;
|
||||
memory_basic_information_ptr->allocation_protect =
|
||||
ToXdkProtectFlags(alloc_info.allocation_protect);
|
||||
memory_basic_information_ptr->region_size =
|
||||
static_cast<uint32_t>(alloc_info.region_size);
|
||||
memory_basic_information_ptr->region_size = alloc_info.region_size;
|
||||
uint32_t x_state = 0;
|
||||
if (alloc_info.state & kMemoryAllocationReserve) {
|
||||
x_state |= X_MEM_RESERVE;
|
||||
|
@ -290,7 +287,7 @@ dword_result_t NtQueryVirtualMemory(
|
|||
}
|
||||
memory_basic_information_ptr->state = x_state;
|
||||
memory_basic_information_ptr->protect = ToXdkProtectFlags(alloc_info.protect);
|
||||
memory_basic_information_ptr->type = alloc_info.type;
|
||||
memory_basic_information_ptr->type = X_MEM_PRIVATE;
|
||||
|
||||
return X_STATUS_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -339,6 +339,8 @@ BaseHeap* Memory::LookupHeapByType(bool physical, uint32_t page_size) {
|
|||
}
|
||||
}
|
||||
|
||||
VirtualHeap* Memory::GetPhysicalHeap() { return &heaps_.physical; }
|
||||
|
||||
void Memory::Zero(uint32_t address, uint32_t size) {
|
||||
std::memset(TranslateVirtual(address), 0, size);
|
||||
}
|
||||
|
@ -1096,16 +1098,19 @@ bool BaseHeap::QueryRegionInfo(uint32_t base_address,
|
|||
out_info->region_size = 0;
|
||||
out_info->state = 0;
|
||||
out_info->protect = 0;
|
||||
out_info->type = 0;
|
||||
if (start_page_entry.state) {
|
||||
// Committed/reserved region.
|
||||
out_info->allocation_base = start_page_entry.base_address * page_size_;
|
||||
out_info->allocation_protect = start_page_entry.allocation_protect;
|
||||
out_info->allocation_size = start_page_entry.region_page_count * page_size_;
|
||||
out_info->state = start_page_entry.state;
|
||||
out_info->protect = start_page_entry.current_protect;
|
||||
out_info->type = 0x20000;
|
||||
|
||||
// Scan forward and report the size of the region matching the initial
|
||||
// base address's attributes.
|
||||
for (uint32_t page_number = start_page_number;
|
||||
page_number < start_page_number + start_page_entry.region_page_count;
|
||||
page_number <
|
||||
start_page_entry.base_address + start_page_entry.region_page_count;
|
||||
++page_number) {
|
||||
auto page_entry = page_table_[page_number];
|
||||
if (page_entry.base_address != start_page_entry.base_address ||
|
||||
|
@ -1144,6 +1149,20 @@ bool BaseHeap::QuerySize(uint32_t address, uint32_t* out_size) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool BaseHeap::QueryBaseAndSize(uint32_t* in_out_address, uint32_t* out_size) {
|
||||
uint32_t page_number = (*in_out_address - heap_base_) / page_size_;
|
||||
if (page_number > page_table_.size()) {
|
||||
XELOGE("BaseHeap::QuerySize base page out of range");
|
||||
*out_size = 0;
|
||||
return false;
|
||||
}
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
auto page_entry = page_table_[page_number];
|
||||
*in_out_address = (page_entry.base_address * page_size_);
|
||||
*out_size = (page_entry.region_page_count * page_size_);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BaseHeap::QueryProtect(uint32_t address, uint32_t* out_protect) {
|
||||
uint32_t page_number = (address - heap_base_) / page_size_;
|
||||
if (page_number > page_table_.size()) {
|
||||
|
|
|
@ -56,6 +56,8 @@ struct HeapAllocationInfo {
|
|||
uint32_t allocation_base;
|
||||
// The memory protection option when the region was initially allocated.
|
||||
uint32_t allocation_protect;
|
||||
// The size specified when the region was initially allocated, in bytes.
|
||||
uint32_t allocation_size;
|
||||
// The size of the region beginning at the base address in which all pages
|
||||
// have identical attributes, in bytes.
|
||||
uint32_t region_size;
|
||||
|
@ -63,8 +65,6 @@ struct HeapAllocationInfo {
|
|||
uint32_t state;
|
||||
// The access protection of the pages in the region.
|
||||
uint32_t protect;
|
||||
// The type of pages in the region (private).
|
||||
uint32_t type;
|
||||
};
|
||||
|
||||
// Describes a single page in the page table.
|
||||
|
@ -144,6 +144,9 @@ class BaseHeap {
|
|||
// Queries the size of the region containing the given address.
|
||||
bool QuerySize(uint32_t address, uint32_t* out_size);
|
||||
|
||||
// Queries the base and size of a region containing the given address.
|
||||
bool QueryBaseAndSize(uint32_t* in_out_address, uint32_t* out_size);
|
||||
|
||||
// Queries the current protection mode of the region containing the given
|
||||
// address.
|
||||
bool QueryProtect(uint32_t address, uint32_t* out_protect);
|
||||
|
@ -332,6 +335,9 @@ class Memory {
|
|||
// Gets the heap with the given properties.
|
||||
BaseHeap* LookupHeapByType(bool physical, uint32_t page_size);
|
||||
|
||||
// Gets the physical base heap.
|
||||
VirtualHeap* GetPhysicalHeap();
|
||||
|
||||
// Dumps a map of all allocated memory to the log.
|
||||
void DumpMap();
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ CircularBuffer::CircularBuffer(VulkanDevice* device, VkBufferUsageFlags usage,
|
|||
|
||||
VkMemoryRequirements reqs;
|
||||
vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs);
|
||||
alignment_ = reqs.alignment;
|
||||
alignment_ = xe::round_up(alignment, reqs.alignment);
|
||||
}
|
||||
CircularBuffer::~CircularBuffer() { Shutdown(); }
|
||||
|
||||
|
|
|
@ -26,10 +26,14 @@ namespace ui {
|
|||
namespace vulkan {
|
||||
|
||||
#define VK_SAFE_DESTROY(fn, dev, obj, alloc) \
|
||||
if (obj) { \
|
||||
fn(dev, obj, alloc); \
|
||||
obj = nullptr; \
|
||||
}
|
||||
\
|
||||
do { \
|
||||
if (obj) { \
|
||||
fn(dev, obj, alloc); \
|
||||
obj = nullptr; \
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
class Fence {
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue