Vulkan: Fix register constants on AMD

This commit is contained in:
Dr. Chat 2017-03-13 23:43:34 -05:00
parent 5f5dc61428
commit 44e7c3712a
6 changed files with 91 additions and 25 deletions

View File

@ -117,6 +117,14 @@ void SpirvShaderTranslator::StartTranslation() {
Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1); Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1); Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
// Strides
b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride,
4 * sizeof(float));
b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
Id consts_struct_type = b.makeStructType( Id consts_struct_type = b.makeStructType(
{float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock);
@ -124,25 +132,16 @@ void SpirvShaderTranslator::StartTranslation() {
// Constants member decorations. // Constants member decorations.
b.addMemberDecoration(consts_struct_type, 0, b.addMemberDecoration(consts_struct_type, 0,
spv::Decoration::DecorationOffset, 0); spv::Decoration::DecorationOffset, 0);
b.addMemberDecoration(consts_struct_type, 0,
spv::Decoration::DecorationArrayStride,
4 * sizeof(float));
b.addMemberName(consts_struct_type, 0, "float_consts"); b.addMemberName(consts_struct_type, 0, "float_consts");
b.addMemberDecoration(consts_struct_type, 1, b.addMemberDecoration(consts_struct_type, 1,
spv::Decoration::DecorationOffset, spv::Decoration::DecorationOffset,
512 * 4 * sizeof(float)); 512 * 4 * sizeof(float));
b.addMemberDecoration(consts_struct_type, 1,
spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
b.addMemberName(consts_struct_type, 1, "loop_consts"); b.addMemberName(consts_struct_type, 1, "loop_consts");
b.addMemberDecoration(consts_struct_type, 2, b.addMemberDecoration(consts_struct_type, 2,
spv::Decoration::DecorationOffset, spv::Decoration::DecorationOffset,
512 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); 512 * 4 * sizeof(float) + 32 * sizeof(uint32_t));
b.addMemberDecoration(consts_struct_type, 2,
spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
b.addMemberName(consts_struct_type, 2, "bool_consts"); b.addMemberName(consts_struct_type, 2, "bool_consts");
consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, consts_ = b.createVariable(spv::StorageClass::StorageClassUniform,

View File

@ -110,6 +110,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
buffer_info.buffer = transient_buffer_->gpu_buffer(); buffer_info.buffer = transient_buffer_->gpu_buffer();
buffer_info.offset = 0; buffer_info.offset = 0;
buffer_info.range = kConstantRegisterUniformRange; buffer_info.range = kConstantRegisterUniformRange;
VkWriteDescriptorSet descriptor_writes[2]; VkWriteDescriptorSet descriptor_writes[2];
auto& vertex_uniform_binding_write = descriptor_writes[0]; auto& vertex_uniform_binding_write = descriptor_writes[0];
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -147,6 +148,7 @@ BufferCache::~BufferCache() {
} }
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters( std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map, const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence) { VkFence fence) {
@ -175,6 +177,24 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
32 * 4); 32 * 4);
dest_ptr += 32 * 4; dest_ptr += 32 * 4;
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
kConstantRegisterUniformRange,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
return {offset, offset}; return {offset, offset};
// Packed upload code. // Packed upload code.
@ -229,8 +249,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
uint32_t source_addr, uint32_t source_length, IndexFormat format, VkCommandBuffer command_buffer, uint32_t source_addr,
VkFence fence) { uint32_t source_length, IndexFormat format, VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length); auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) { if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
@ -258,13 +278,31 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
source_ptr, source_length / 4); source_ptr, source_length / 4);
} }
transient_buffer_->Flush(offset, source_length);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
source_length,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset); CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
uint32_t source_addr, uint32_t source_length, Endian endian, VkCommandBuffer command_buffer, uint32_t source_addr,
VkFence fence) { uint32_t source_length, Endian endian, VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length); auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) { if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
@ -292,6 +330,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
assert_always(); assert_always();
} }
transient_buffer_->Flush(offset, source_length);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
source_length,
};
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
CacheTransientData(source_addr, source_length, offset); CacheTransientData(source_addr, source_length, offset);
return {transient_buffer_->gpu_buffer(), offset}; return {transient_buffer_->gpu_buffer(), offset};
} }

View File

@ -53,6 +53,7 @@ class BufferCache {
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM). // VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
// The returned offsets may alias. // The returned offsets may alias.
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters( std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map, const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence); VkFence fence);
@ -61,19 +62,17 @@ class BufferCache {
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr, std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
uint32_t source_length, VkCommandBuffer command_buffer, uint32_t source_addr,
IndexFormat format, uint32_t source_length, IndexFormat format, VkFence fence);
VkFence fence);
// Uploads vertex buffer data from guest memory, possibly eliding with // Uploads vertex buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr, std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
uint32_t source_length, VkCommandBuffer command_buffer, uint32_t source_addr,
Endian endian, uint32_t source_length, Endian endian, VkFence fence);
VkFence fence);
// Flushes all pending data to the GPU. // Flushes all pending data to the GPU.
// Until this is called the GPU is not guaranteed to see any data. // Until this is called the GPU is not guaranteed to see any data.

View File

@ -626,7 +626,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
// Upload the constants the shaders require. // Upload the constants the shaders require.
// These are optional, and if none are defined 0 will be returned. // These are optional, and if none are defined 0 will be returned.
auto constant_offsets = buffer_cache_->UploadConstantRegisters( auto constant_offsets = buffer_cache_->UploadConstantRegisters(
vertex_shader->constant_register_map(), current_setup_buffer_, vertex_shader->constant_register_map(),
pixel_shader ? pixel_shader->constant_register_map() : dummy_map, pixel_shader ? pixel_shader->constant_register_map() : dummy_map,
current_batch_fence_); current_batch_fence_);
if (constant_offsets.first == VK_WHOLE_SIZE || if (constant_offsets.first == VK_WHOLE_SIZE ||
@ -681,7 +681,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t)); : sizeof(uint16_t));
auto buffer_ref = buffer_cache_->UploadIndexBuffer( auto buffer_ref = buffer_cache_->UploadIndexBuffer(
source_addr, source_length, info.format, current_batch_fence_); current_setup_buffer_, source_addr, source_length, info.format,
current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
return false; return false;
@ -745,8 +746,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
// Upload (or get a cached copy of) the buffer. // Upload (or get a cached copy of) the buffer.
uint32_t source_length = uint32_t(valid_range); uint32_t source_length = uint32_t(valid_range);
auto buffer_ref = buffer_cache_->UploadVertexBuffer( auto buffer_ref = buffer_cache_->UploadVertexBuffer(
physical_address, source_length, static_cast<Endian>(fetch->endian), current_setup_buffer_, physical_address, source_length,
current_batch_fence_); static_cast<Endian>(fetch->endian), current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
return false; return false;

View File

@ -232,6 +232,16 @@ void CircularBuffer::Flush(Allocation* allocation) {
vkFlushMappedMemoryRanges(*device_, 1, &range); vkFlushMappedMemoryRanges(*device_, 1, &range);
} }
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = gpu_memory_;
range.offset = gpu_base_ + offset;
range.size = length;
vkFlushMappedMemoryRanges(*device_, 1, &range);
}
void CircularBuffer::Clear() { void CircularBuffer::Clear() {
for (auto alloc : allocations_) { for (auto alloc : allocations_) {
delete alloc; delete alloc;

View File

@ -61,6 +61,7 @@ class CircularBuffer {
// reaches the signaled state. // reaches the signaled state.
Allocation* Acquire(VkDeviceSize length, VkFence fence); Allocation* Acquire(VkDeviceSize length, VkFence fence);
void Flush(Allocation* allocation); void Flush(Allocation* allocation);
void Flush(VkDeviceSize offset, VkDeviceSize length);
// Clears all allocations, regardless of whether they've been consumed or not. // Clears all allocations, regardless of whether they've been consumed or not.
void Clear(); void Clear();