Vulkan: Fix register constants on AMD

This commit is contained in:
Dr. Chat 2017-03-13 23:43:34 -05:00
parent 5f5dc61428
commit 855ebb9cfe
5 changed files with 83 additions and 16 deletions

View File

@ -110,6 +110,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
buffer_info.buffer = transient_buffer_->gpu_buffer();
buffer_info.offset = 0;
buffer_info.range = kConstantRegisterUniformRange;
VkWriteDescriptorSet descriptor_writes[2];
auto& vertex_uniform_binding_write = descriptor_writes[0];
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -147,6 +148,7 @@ BufferCache::~BufferCache() {
}
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence) {
@ -175,6 +177,24 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
32 * 4);
dest_ptr += 32 * 4;
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
kConstantRegisterUniformRange,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
return {offset, offset};
// Packed upload code.
@ -229,8 +249,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
uint32_t source_addr, uint32_t source_length, IndexFormat format,
VkFence fence) {
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, IndexFormat format, VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset};
@ -258,13 +278,31 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
source_ptr, source_length / 4);
}
transient_buffer_->Flush(offset, source_length);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
source_length,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset};
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
uint32_t source_addr, uint32_t source_length, Endian endian,
VkFence fence) {
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, Endian endian, VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset};
@ -292,6 +330,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
assert_always();
}
transient_buffer_->Flush(offset, source_length);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
source_length,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset};
}

View File

@ -53,6 +53,7 @@ class BufferCache {
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
// The returned offsets may alias.
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence);
@ -61,19 +62,17 @@ class BufferCache {
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
uint32_t source_length,
IndexFormat format,
VkFence fence);
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, IndexFormat format, VkFence fence);
// Uploads vertex buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
uint32_t source_length,
Endian endian,
VkFence fence);
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, Endian endian, VkFence fence);
// Flushes all pending data to the GPU.
// Until this is called the GPU is not guaranteed to see any data.

View File

@ -626,7 +626,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
// Upload the constants the shaders require.
// These are optional, and if none are defined 0 will be returned.
auto constant_offsets = buffer_cache_->UploadConstantRegisters(
vertex_shader->constant_register_map(),
current_setup_buffer_, vertex_shader->constant_register_map(),
pixel_shader ? pixel_shader->constant_register_map() : dummy_map,
current_batch_fence_);
if (constant_offsets.first == VK_WHOLE_SIZE ||
@ -681,7 +681,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t));
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
source_addr, source_length, info.format, current_batch_fence_);
current_setup_buffer_, source_addr, source_length, info.format,
current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer.
return false;
@ -745,8 +746,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
// Upload (or get a cached copy of) the buffer.
uint32_t source_length = uint32_t(valid_range);
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
physical_address, source_length, static_cast<Endian>(fetch->endian),
current_batch_fence_);
current_setup_buffer_, physical_address, source_length,
static_cast<Endian>(fetch->endian), current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer.
return false;

View File

@ -232,6 +232,16 @@ void CircularBuffer::Flush(Allocation* allocation) {
vkFlushMappedMemoryRanges(*device_, 1, &range);
}
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = gpu_memory_;
range.offset = gpu_base_ + offset;
range.size = length;
vkFlushMappedMemoryRanges(*device_, 1, &range);
}
void CircularBuffer::Clear() {
for (auto alloc : allocations_) {
delete alloc;

View File

@ -61,6 +61,7 @@ class CircularBuffer {
// reaches the signaled state.
Allocation* Acquire(VkDeviceSize length, VkFence fence);
void Flush(Allocation* allocation);
void Flush(VkDeviceSize offset, VkDeviceSize length);
// Clears all allocations, regardless of whether they've been consumed or not.
void Clear();