Vulkan: Fix register constants on AMD
This commit is contained in:
parent
5f5dc61428
commit
44e7c3712a
|
@ -117,6 +117,14 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
|
||||
Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
|
||||
|
||||
// Strides
|
||||
b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride,
|
||||
4 * sizeof(float));
|
||||
b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride,
|
||||
sizeof(uint32_t));
|
||||
b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride,
|
||||
sizeof(uint32_t));
|
||||
|
||||
Id consts_struct_type = b.makeStructType(
|
||||
{float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
|
||||
b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock);
|
||||
|
@ -124,25 +132,16 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
// Constants member decorations.
|
||||
b.addMemberDecoration(consts_struct_type, 0,
|
||||
spv::Decoration::DecorationOffset, 0);
|
||||
b.addMemberDecoration(consts_struct_type, 0,
|
||||
spv::Decoration::DecorationArrayStride,
|
||||
4 * sizeof(float));
|
||||
b.addMemberName(consts_struct_type, 0, "float_consts");
|
||||
|
||||
b.addMemberDecoration(consts_struct_type, 1,
|
||||
spv::Decoration::DecorationOffset,
|
||||
512 * 4 * sizeof(float));
|
||||
b.addMemberDecoration(consts_struct_type, 1,
|
||||
spv::Decoration::DecorationArrayStride,
|
||||
sizeof(uint32_t));
|
||||
b.addMemberName(consts_struct_type, 1, "loop_consts");
|
||||
|
||||
b.addMemberDecoration(consts_struct_type, 2,
|
||||
spv::Decoration::DecorationOffset,
|
||||
512 * 4 * sizeof(float) + 32 * sizeof(uint32_t));
|
||||
b.addMemberDecoration(consts_struct_type, 2,
|
||||
spv::Decoration::DecorationArrayStride,
|
||||
sizeof(uint32_t));
|
||||
b.addMemberName(consts_struct_type, 2, "bool_consts");
|
||||
|
||||
consts_ = b.createVariable(spv::StorageClass::StorageClassUniform,
|
||||
|
|
|
@ -110,6 +110,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
|||
buffer_info.buffer = transient_buffer_->gpu_buffer();
|
||||
buffer_info.offset = 0;
|
||||
buffer_info.range = kConstantRegisterUniformRange;
|
||||
|
||||
VkWriteDescriptorSet descriptor_writes[2];
|
||||
auto& vertex_uniform_binding_write = descriptor_writes[0];
|
||||
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
|
@ -147,6 +148,7 @@ BufferCache::~BufferCache() {
|
|||
}
|
||||
|
||||
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||
VkCommandBuffer command_buffer,
|
||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||
VkFence fence) {
|
||||
|
@ -175,6 +177,24 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
|||
32 * 4);
|
||||
dest_ptr += 32 * 4;
|
||||
|
||||
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
kConstantRegisterUniformRange,
|
||||
};
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
return {offset, offset};
|
||||
|
||||
// Packed upload code.
|
||||
|
@ -229,8 +249,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
|||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||
uint32_t source_addr, uint32_t source_length, IndexFormat format,
|
||||
VkFence fence) {
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, IndexFormat format, VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
|
@ -258,13 +278,31 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
|||
source_ptr, source_length / 4);
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, source_length);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
source_length,
|
||||
};
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||
uint32_t source_addr, uint32_t source_length, Endian endian,
|
||||
VkFence fence) {
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, Endian endian, VkFence fence) {
|
||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||
if (offset != VK_WHOLE_SIZE) {
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
|
@ -292,6 +330,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
|||
assert_always();
|
||||
}
|
||||
|
||||
transient_buffer_->Flush(offset, source_length);
|
||||
|
||||
// Append a barrier to the command buffer.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
nullptr,
|
||||
VK_ACCESS_HOST_WRITE_BIT,
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
VK_QUEUE_FAMILY_IGNORED,
|
||||
transient_buffer_->gpu_buffer(),
|
||||
offset,
|
||||
source_length,
|
||||
};
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
CacheTransientData(source_addr, source_length, offset);
|
||||
return {transient_buffer_->gpu_buffer(), offset};
|
||||
}
|
||||
|
|
|
@ -53,6 +53,7 @@ class BufferCache {
|
|||
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
||||
// The returned offsets may alias.
|
||||
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
|
||||
VkCommandBuffer command_buffer,
|
||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||
VkFence fence);
|
||||
|
@ -61,19 +62,17 @@ class BufferCache {
|
|||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
|
||||
uint32_t source_length,
|
||||
IndexFormat format,
|
||||
VkFence fence);
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, IndexFormat format, VkFence fence);
|
||||
|
||||
// Uploads vertex buffer data from guest memory, possibly eliding with
|
||||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
|
||||
uint32_t source_length,
|
||||
Endian endian,
|
||||
VkFence fence);
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
|
||||
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||
uint32_t source_length, Endian endian, VkFence fence);
|
||||
|
||||
// Flushes all pending data to the GPU.
|
||||
// Until this is called the GPU is not guaranteed to see any data.
|
||||
|
|
|
@ -626,7 +626,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
|
|||
// Upload the constants the shaders require.
|
||||
// These are optional, and if none are defined 0 will be returned.
|
||||
auto constant_offsets = buffer_cache_->UploadConstantRegisters(
|
||||
vertex_shader->constant_register_map(),
|
||||
current_setup_buffer_, vertex_shader->constant_register_map(),
|
||||
pixel_shader ? pixel_shader->constant_register_map() : dummy_map,
|
||||
current_batch_fence_);
|
||||
if (constant_offsets.first == VK_WHOLE_SIZE ||
|
||||
|
@ -681,7 +681,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
|
|||
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||
: sizeof(uint16_t));
|
||||
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
||||
source_addr, source_length, info.format, current_batch_fence_);
|
||||
current_setup_buffer_, source_addr, source_length, info.format,
|
||||
current_batch_fence_);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
return false;
|
||||
|
@ -745,8 +746,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
|
|||
// Upload (or get a cached copy of) the buffer.
|
||||
uint32_t source_length = uint32_t(valid_range);
|
||||
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
||||
physical_address, source_length, static_cast<Endian>(fetch->endian),
|
||||
current_batch_fence_);
|
||||
current_setup_buffer_, physical_address, source_length,
|
||||
static_cast<Endian>(fetch->endian), current_batch_fence_);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
return false;
|
||||
|
|
|
@ -232,6 +232,16 @@ void CircularBuffer::Flush(Allocation* allocation) {
|
|||
vkFlushMappedMemoryRanges(*device_, 1, &range);
|
||||
}
|
||||
|
||||
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
|
||||
VkMappedMemoryRange range;
|
||||
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range.pNext = nullptr;
|
||||
range.memory = gpu_memory_;
|
||||
range.offset = gpu_base_ + offset;
|
||||
range.size = length;
|
||||
vkFlushMappedMemoryRanges(*device_, 1, &range);
|
||||
}
|
||||
|
||||
void CircularBuffer::Clear() {
|
||||
for (auto alloc : allocations_) {
|
||||
delete alloc;
|
||||
|
|
|
@ -61,6 +61,7 @@ class CircularBuffer {
|
|||
// reaches the signaled state.
|
||||
Allocation* Acquire(VkDeviceSize length, VkFence fence);
|
||||
void Flush(Allocation* allocation);
|
||||
void Flush(VkDeviceSize offset, VkDeviceSize length);
|
||||
|
||||
// Clears all allocations, regardless of whether they've been consumed or not.
|
||||
void Clear();
|
||||
|
|
Loading…
Reference in New Issue