Vulkan: Fix register constants on AMD
This commit is contained in:
parent
5f5dc61428
commit
44e7c3712a
|
@ -117,6 +117,14 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
|
Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
|
||||||
Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
|
Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
|
||||||
|
|
||||||
|
// Strides
|
||||||
|
b.addDecoration(float_consts_type, spv::Decoration::DecorationArrayStride,
|
||||||
|
4 * sizeof(float));
|
||||||
|
b.addDecoration(loop_consts_type, spv::Decoration::DecorationArrayStride,
|
||||||
|
sizeof(uint32_t));
|
||||||
|
b.addDecoration(bool_consts_type, spv::Decoration::DecorationArrayStride,
|
||||||
|
sizeof(uint32_t));
|
||||||
|
|
||||||
Id consts_struct_type = b.makeStructType(
|
Id consts_struct_type = b.makeStructType(
|
||||||
{float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
|
{float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
|
||||||
b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock);
|
b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock);
|
||||||
|
@ -124,25 +132,16 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
// Constants member decorations.
|
// Constants member decorations.
|
||||||
b.addMemberDecoration(consts_struct_type, 0,
|
b.addMemberDecoration(consts_struct_type, 0,
|
||||||
spv::Decoration::DecorationOffset, 0);
|
spv::Decoration::DecorationOffset, 0);
|
||||||
b.addMemberDecoration(consts_struct_type, 0,
|
|
||||||
spv::Decoration::DecorationArrayStride,
|
|
||||||
4 * sizeof(float));
|
|
||||||
b.addMemberName(consts_struct_type, 0, "float_consts");
|
b.addMemberName(consts_struct_type, 0, "float_consts");
|
||||||
|
|
||||||
b.addMemberDecoration(consts_struct_type, 1,
|
b.addMemberDecoration(consts_struct_type, 1,
|
||||||
spv::Decoration::DecorationOffset,
|
spv::Decoration::DecorationOffset,
|
||||||
512 * 4 * sizeof(float));
|
512 * 4 * sizeof(float));
|
||||||
b.addMemberDecoration(consts_struct_type, 1,
|
|
||||||
spv::Decoration::DecorationArrayStride,
|
|
||||||
sizeof(uint32_t));
|
|
||||||
b.addMemberName(consts_struct_type, 1, "loop_consts");
|
b.addMemberName(consts_struct_type, 1, "loop_consts");
|
||||||
|
|
||||||
b.addMemberDecoration(consts_struct_type, 2,
|
b.addMemberDecoration(consts_struct_type, 2,
|
||||||
spv::Decoration::DecorationOffset,
|
spv::Decoration::DecorationOffset,
|
||||||
512 * 4 * sizeof(float) + 32 * sizeof(uint32_t));
|
512 * 4 * sizeof(float) + 32 * sizeof(uint32_t));
|
||||||
b.addMemberDecoration(consts_struct_type, 2,
|
|
||||||
spv::Decoration::DecorationArrayStride,
|
|
||||||
sizeof(uint32_t));
|
|
||||||
b.addMemberName(consts_struct_type, 2, "bool_consts");
|
b.addMemberName(consts_struct_type, 2, "bool_consts");
|
||||||
|
|
||||||
consts_ = b.createVariable(spv::StorageClass::StorageClassUniform,
|
consts_ = b.createVariable(spv::StorageClass::StorageClassUniform,
|
||||||
|
|
|
@ -110,6 +110,7 @@ BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
|
||||||
buffer_info.buffer = transient_buffer_->gpu_buffer();
|
buffer_info.buffer = transient_buffer_->gpu_buffer();
|
||||||
buffer_info.offset = 0;
|
buffer_info.offset = 0;
|
||||||
buffer_info.range = kConstantRegisterUniformRange;
|
buffer_info.range = kConstantRegisterUniformRange;
|
||||||
|
|
||||||
VkWriteDescriptorSet descriptor_writes[2];
|
VkWriteDescriptorSet descriptor_writes[2];
|
||||||
auto& vertex_uniform_binding_write = descriptor_writes[0];
|
auto& vertex_uniform_binding_write = descriptor_writes[0];
|
||||||
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||||
|
@ -147,6 +148,7 @@ BufferCache::~BufferCache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||||
|
VkCommandBuffer command_buffer,
|
||||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||||
VkFence fence) {
|
VkFence fence) {
|
||||||
|
@ -175,6 +177,24 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||||
32 * 4);
|
32 * 4);
|
||||||
dest_ptr += 32 * 4;
|
dest_ptr += 32 * 4;
|
||||||
|
|
||||||
|
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
|
||||||
|
|
||||||
|
// Append a barrier to the command buffer.
|
||||||
|
VkBufferMemoryBarrier barrier = {
|
||||||
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||||
|
nullptr,
|
||||||
|
VK_ACCESS_HOST_WRITE_BIT,
|
||||||
|
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
transient_buffer_->gpu_buffer(),
|
||||||
|
offset,
|
||||||
|
kConstantRegisterUniformRange,
|
||||||
|
};
|
||||||
|
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||||
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
|
||||||
|
&barrier, 0, nullptr);
|
||||||
|
|
||||||
return {offset, offset};
|
return {offset, offset};
|
||||||
|
|
||||||
// Packed upload code.
|
// Packed upload code.
|
||||||
|
@ -229,8 +249,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||||
uint32_t source_addr, uint32_t source_length, IndexFormat format,
|
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||||
VkFence fence) {
|
uint32_t source_length, IndexFormat format, VkFence fence) {
|
||||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||||
if (offset != VK_WHOLE_SIZE) {
|
if (offset != VK_WHOLE_SIZE) {
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
|
@ -258,13 +278,31 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||||
source_ptr, source_length / 4);
|
source_ptr, source_length / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transient_buffer_->Flush(offset, source_length);
|
||||||
|
|
||||||
|
// Append a barrier to the command buffer.
|
||||||
|
VkBufferMemoryBarrier barrier = {
|
||||||
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||||
|
nullptr,
|
||||||
|
VK_ACCESS_HOST_WRITE_BIT,
|
||||||
|
VK_ACCESS_INDEX_READ_BIT,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
transient_buffer_->gpu_buffer(),
|
||||||
|
offset,
|
||||||
|
source_length,
|
||||||
|
};
|
||||||
|
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||||
|
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||||
|
&barrier, 0, nullptr);
|
||||||
|
|
||||||
CacheTransientData(source_addr, source_length, offset);
|
CacheTransientData(source_addr, source_length, offset);
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||||
uint32_t source_addr, uint32_t source_length, Endian endian,
|
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||||
VkFence fence) {
|
uint32_t source_length, Endian endian, VkFence fence) {
|
||||||
auto offset = FindCachedTransientData(source_addr, source_length);
|
auto offset = FindCachedTransientData(source_addr, source_length);
|
||||||
if (offset != VK_WHOLE_SIZE) {
|
if (offset != VK_WHOLE_SIZE) {
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
|
@ -292,6 +330,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||||
assert_always();
|
assert_always();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transient_buffer_->Flush(offset, source_length);
|
||||||
|
|
||||||
|
// Append a barrier to the command buffer.
|
||||||
|
VkBufferMemoryBarrier barrier = {
|
||||||
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||||
|
nullptr,
|
||||||
|
VK_ACCESS_HOST_WRITE_BIT,
|
||||||
|
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
transient_buffer_->gpu_buffer(),
|
||||||
|
offset,
|
||||||
|
source_length,
|
||||||
|
};
|
||||||
|
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
|
||||||
|
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||||
|
&barrier, 0, nullptr);
|
||||||
|
|
||||||
CacheTransientData(source_addr, source_length, offset);
|
CacheTransientData(source_addr, source_length, offset);
|
||||||
return {transient_buffer_->gpu_buffer(), offset};
|
return {transient_buffer_->gpu_buffer(), offset};
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,7 @@ class BufferCache {
|
||||||
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
||||||
// The returned offsets may alias.
|
// The returned offsets may alias.
|
||||||
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
|
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
|
||||||
|
VkCommandBuffer command_buffer,
|
||||||
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
const Shader::ConstantRegisterMap& vertex_constant_register_map,
|
||||||
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
const Shader::ConstantRegisterMap& pixel_constant_register_map,
|
||||||
VkFence fence);
|
VkFence fence);
|
||||||
|
@ -61,19 +62,17 @@ class BufferCache {
|
||||||
// recently uploaded data or cached copies.
|
// recently uploaded data or cached copies.
|
||||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(uint32_t source_addr,
|
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
|
||||||
uint32_t source_length,
|
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||||
IndexFormat format,
|
uint32_t source_length, IndexFormat format, VkFence fence);
|
||||||
VkFence fence);
|
|
||||||
|
|
||||||
// Uploads vertex buffer data from guest memory, possibly eliding with
|
// Uploads vertex buffer data from guest memory, possibly eliding with
|
||||||
// recently uploaded data or cached copies.
|
// recently uploaded data or cached copies.
|
||||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(uint32_t source_addr,
|
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
|
||||||
uint32_t source_length,
|
VkCommandBuffer command_buffer, uint32_t source_addr,
|
||||||
Endian endian,
|
uint32_t source_length, Endian endian, VkFence fence);
|
||||||
VkFence fence);
|
|
||||||
|
|
||||||
// Flushes all pending data to the GPU.
|
// Flushes all pending data to the GPU.
|
||||||
// Until this is called the GPU is not guaranteed to see any data.
|
// Until this is called the GPU is not guaranteed to see any data.
|
||||||
|
|
|
@ -626,7 +626,7 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
|
||||||
// Upload the constants the shaders require.
|
// Upload the constants the shaders require.
|
||||||
// These are optional, and if none are defined 0 will be returned.
|
// These are optional, and if none are defined 0 will be returned.
|
||||||
auto constant_offsets = buffer_cache_->UploadConstantRegisters(
|
auto constant_offsets = buffer_cache_->UploadConstantRegisters(
|
||||||
vertex_shader->constant_register_map(),
|
current_setup_buffer_, vertex_shader->constant_register_map(),
|
||||||
pixel_shader ? pixel_shader->constant_register_map() : dummy_map,
|
pixel_shader ? pixel_shader->constant_register_map() : dummy_map,
|
||||||
current_batch_fence_);
|
current_batch_fence_);
|
||||||
if (constant_offsets.first == VK_WHOLE_SIZE ||
|
if (constant_offsets.first == VK_WHOLE_SIZE ||
|
||||||
|
@ -681,7 +681,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
|
||||||
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||||
: sizeof(uint16_t));
|
: sizeof(uint16_t));
|
||||||
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
auto buffer_ref = buffer_cache_->UploadIndexBuffer(
|
||||||
source_addr, source_length, info.format, current_batch_fence_);
|
current_setup_buffer_, source_addr, source_length, info.format,
|
||||||
|
current_batch_fence_);
|
||||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
// Failed to upload buffer.
|
// Failed to upload buffer.
|
||||||
return false;
|
return false;
|
||||||
|
@ -745,8 +746,8 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
|
||||||
// Upload (or get a cached copy of) the buffer.
|
// Upload (or get a cached copy of) the buffer.
|
||||||
uint32_t source_length = uint32_t(valid_range);
|
uint32_t source_length = uint32_t(valid_range);
|
||||||
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
auto buffer_ref = buffer_cache_->UploadVertexBuffer(
|
||||||
physical_address, source_length, static_cast<Endian>(fetch->endian),
|
current_setup_buffer_, physical_address, source_length,
|
||||||
current_batch_fence_);
|
static_cast<Endian>(fetch->endian), current_batch_fence_);
|
||||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
// Failed to upload buffer.
|
// Failed to upload buffer.
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -232,6 +232,16 @@ void CircularBuffer::Flush(Allocation* allocation) {
|
||||||
vkFlushMappedMemoryRanges(*device_, 1, &range);
|
vkFlushMappedMemoryRanges(*device_, 1, &range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
|
||||||
|
VkMappedMemoryRange range;
|
||||||
|
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||||
|
range.pNext = nullptr;
|
||||||
|
range.memory = gpu_memory_;
|
||||||
|
range.offset = gpu_base_ + offset;
|
||||||
|
range.size = length;
|
||||||
|
vkFlushMappedMemoryRanges(*device_, 1, &range);
|
||||||
|
}
|
||||||
|
|
||||||
void CircularBuffer::Clear() {
|
void CircularBuffer::Clear() {
|
||||||
for (auto alloc : allocations_) {
|
for (auto alloc : allocations_) {
|
||||||
delete alloc;
|
delete alloc;
|
||||||
|
|
|
@ -61,6 +61,7 @@ class CircularBuffer {
|
||||||
// reaches the signaled state.
|
// reaches the signaled state.
|
||||||
Allocation* Acquire(VkDeviceSize length, VkFence fence);
|
Allocation* Acquire(VkDeviceSize length, VkFence fence);
|
||||||
void Flush(Allocation* allocation);
|
void Flush(Allocation* allocation);
|
||||||
|
void Flush(VkDeviceSize offset, VkDeviceSize length);
|
||||||
|
|
||||||
// Clears all allocations, regardless of whether they've been consumed or not.
|
// Clears all allocations, regardless of whether they've been consumed or not.
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
Loading…
Reference in New Issue