Vulkan: Enforce 32 byte alignment for uploads

It blows up on AVX2 stores on some drivers if we don't (e.g. AMD Linux).
This commit is contained in:
Connor McLaughlin 2022-06-05 23:27:24 +10:00 committed by refractionpcsx2
parent a9819542d4
commit 394f1f2049
5 changed files with 74 additions and 36 deletions

View File

@ -73,4 +73,20 @@ namespace Common
value |= (value >> 16);
return value - (value >> 1);
}
template<typename T>
static constexpr __fi T NextPow2(T value)
{
if (value == static_cast<T>(0))
return 0;
value--;
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value++;
return value;
}
} // namespace Common

View File

@ -14,6 +14,7 @@
*/
#include "common/Vulkan/Context.h"
#include "common/Align.h"
#include "common/Assertions.h"
#include "common/Console.h"
#include "common/StringUtil.h"
@ -46,15 +47,17 @@ namespace Vulkan
vkGetPhysicalDeviceProperties(physical_device, &m_device_properties);
vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties);
// Would any drivers be this silly? I hope not...
// We need this to be at least 32 byte aligned for AVX2 stores.
m_device_properties.limits.minUniformBufferOffsetAlignment =
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.minTexelBufferOffsetAlignment =
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(1));
std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(32));
m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(1));
Common::NextPow2(std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(32)));
m_device_properties.limits.bufferImageGranularity =
std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(32));
}
Context::~Context() = default;

View File

@ -99,23 +99,34 @@ namespace Vulkan
__fi const OptionalExtensions& GetOptionalExtensions() const { return m_optional_extensions; }
// Helpers for getting constants
__fi VkDeviceSize GetUniformBufferAlignment() const
__fi u32 GetUniformBufferAlignment() const
{
return m_device_properties.limits.minUniformBufferOffsetAlignment;
return static_cast<u32>(m_device_properties.limits.minUniformBufferOffsetAlignment);
}
__fi VkDeviceSize GetTexelBufferAlignment() const
__fi u32 GetTexelBufferAlignment() const
{
return m_device_properties.limits.minTexelBufferOffsetAlignment;
return static_cast<u32>(m_device_properties.limits.minTexelBufferOffsetAlignment);
}
__fi VkDeviceSize GetStorageBufferAlignment() const
__fi u32 GetStorageBufferAlignment() const
{
return m_device_properties.limits.minStorageBufferOffsetAlignment;
return static_cast<u32>(m_device_properties.limits.minStorageBufferOffsetAlignment);
}
__fi VkDeviceSize GetBufferImageGranularity() const
__fi u32 GetBufferImageGranularity() const
{
return m_device_properties.limits.bufferImageGranularity;
return static_cast<u32>(m_device_properties.limits.bufferImageGranularity);
}
__fi u32 GetBufferCopyOffsetAlignment() const
{
return static_cast<u32>(m_device_properties.limits.optimalBufferCopyOffsetAlignment);
}
__fi u32 GetBufferCopyRowPitchAlignment() const
{
return static_cast<u32>(m_device_properties.limits.optimalBufferCopyRowPitchAlignment);
}
__fi u32 GetMaxImageDimension2D() const
{
return m_device_properties.limits.maxImageDimension2D;
}
__fi VkDeviceSize GetMaxImageDimension2D() const { return m_device_properties.limits.maxImageDimension2D; }
// Creates a simple render pass.
__ri VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format,

View File

@ -1,6 +1,7 @@
#include "PrecompiledHeader.h"
#include "VulkanHostDisplay.h"
#include "common/Align.h"
#include "common/Assertions.h"
#include "common/Console.h"
#include "common/ScopedGuard.h"
@ -194,25 +195,26 @@ std::string VulkanHostDisplay::GetDriverInfo() const
static bool UploadBufferToTexture(
Vulkan::Texture* texture, VkCommandBuffer cmdbuf, u32 width, u32 height, const void* data, u32 data_stride)
{
const u32 tight_stride = Vulkan::Util::GetTexelSize(texture->GetFormat()) * width;
const u32 tight_size = tight_stride * height;
const u32 upload_stride = Common::AlignUpPow2(Vulkan::Util::GetTexelSize(texture->GetFormat()) * width,
g_vulkan_context->GetBufferCopyRowPitchAlignment());
const u32 upload_size = upload_stride * height;
Vulkan::StreamBuffer& buf = g_vulkan_context->GetTextureUploadBuffer();
if (!buf.ReserveMemory(tight_size, g_vulkan_context->GetBufferImageGranularity()))
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{
Console.WriteLn("Executing command buffer for UploadBufferToTexture()");
g_vulkan_context->ExecuteCommandBuffer(false);
if (!buf.ReserveMemory(tight_size, g_vulkan_context->GetBufferImageGranularity()))
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{
Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", tight_size);
Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", upload_size);
return false;
}
cmdbuf = g_vulkan_context->GetCurrentInitCommandBuffer();
}
const u32 buf_offset = buf.GetCurrentOffset();
StringUtil::StrideMemCpy(buf.GetCurrentHostPointer(), tight_stride, data, data_stride, tight_stride, height);
buf.CommitMemory(tight_size);
StringUtil::StrideMemCpy(buf.GetCurrentHostPointer(), upload_stride, data, data_stride, upload_stride, height);
buf.CommitMemory(upload_size);
texture->UpdateFromBuffer(cmdbuf, 0, 0, 0, 0, width, height, width, buf.GetBuffer(), buf_offset);
return true;

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include "GSDeviceVK.h"
#include "GSTextureVK.h"
#include "common/Align.h"
#include "common/Assertions.h"
#include "common/Vulkan/Builders.h"
#include "common/Vulkan/Context.h"
@ -144,8 +145,9 @@ VkCommandBuffer GSTextureVK::GetCommandBufferForUpdate()
return g_vulkan_context->GetCurrentInitCommandBuffer();
}
static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 size)
static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height)
{
const u32 size = upload_pitch * height;
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0,
static_cast<VkDeviceSize>(size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
@ -170,7 +172,7 @@ static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 size)
g_vulkan_context->DeferBufferDestruction(buffer, allocation);
// And write the data.
std::memcpy(ai.pMappedData, data, size);
StringUtil::StrideMemCpy(ai.pMappedData, upload_pitch, data, pitch, pitch, height);
vmaFlushAllocation(g_vulkan_context->GetAllocator(), allocation, 0, size);
return buffer;
}
@ -184,8 +186,8 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
const u32 width = r.width();
const u32 height = r.height();
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
const u32 required_size = CalcUploadSize(height, pitch);
const u32 upload_pitch = Common::AlignUpPow2(pitch, g_vulkan_context->GetBufferCopyRowPitchAlignment());
const u32 required_size = CalcUploadSize(height, upload_pitch);
// If the texture is larger than half our streaming buffer size, use a separate buffer.
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
@ -194,18 +196,18 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
if (required_size > (g_vulkan_context->GetTextureUploadBuffer().GetCurrentSize() / 2))
{
buffer_offset = 0;
buffer = AllocateUploadStagingBuffer(data, required_size);
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
if (buffer == VK_NULL_HANDLE)
return false;
}
else
{
Vulkan::StreamBuffer& sbuffer = g_vulkan_context->GetTextureUploadBuffer();
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
false, "While waiting for %u bytes in texture upload buffer", required_size);
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
return false;
@ -214,7 +216,7 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
buffer = sbuffer.GetBuffer();
buffer_offset = sbuffer.GetCurrentOffset();
std::memcpy(sbuffer.GetCurrentHostPointer(), data, required_size);
StringUtil::StrideMemCpy(sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch, pitch, height);
sbuffer.CommitMemory(required_size);
}
@ -234,7 +236,8 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
m_state = State::Dirty;
}
m_texture.UpdateFromBuffer(cmdbuf, layer, 0, r.x, r.y, width, height, row_length, buffer, buffer_offset);
m_texture.UpdateFromBuffer(cmdbuf, layer, 0, r.x, r.y, width, height,
CalcUploadRowLengthFromPitch(upload_pitch), buffer, buffer_offset);
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
if (m_type == Type::Texture)
@ -252,7 +255,8 @@ bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer)
m_map_area = r ? *r : GSVector4i(0, 0, m_texture.GetWidth(), m_texture.GetHeight());
m_map_level = layer;
m.pitch = m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat());
m.pitch = Common::AlignUpPow2(m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat()),
g_vulkan_context->GetBufferCopyRowPitchAlignment());
// see note in Update() for the reason why.
const u32 required_size = m.pitch * m_map_area.height();
@ -260,11 +264,11 @@ bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer)
if (required_size >= (buffer.GetCurrentSize() / 2))
return false;
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
{
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
false, "While waiting for %u bytes in texture upload buffer", required_size);
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
pxFailRel("Failed to reserve texture upload memory");
}
@ -280,7 +284,9 @@ void GSTextureVK::Unmap()
// TODO: non-tightly-packed formats
const u32 width = static_cast<u32>(m_map_area.width());
const u32 height = static_cast<u32>(m_map_area.height());
const u32 required_size = width * height * Vulkan::Util::GetTexelSize(m_texture.GetFormat());
const u32 pitch = Common::AlignUpPow2(m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat()),
g_vulkan_context->GetBufferCopyRowPitchAlignment());
const u32 required_size = pitch * height;
Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer();
const u32 buffer_offset = buffer.GetCurrentOffset();
buffer.CommitMemory(required_size);
@ -302,8 +308,8 @@ void GSTextureVK::Unmap()
m_state = State::Dirty;
}
m_texture.UpdateFromBuffer(cmdbuf, m_map_level, 0, m_map_area.x, m_map_area.y, width, height, width,
buffer.GetBuffer(), buffer_offset);
m_texture.UpdateFromBuffer(cmdbuf, m_map_level, 0, m_map_area.x, m_map_area.y, width, height,
CalcUploadRowLengthFromPitch(pitch), buffer.GetBuffer(), buffer_offset);
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
if (m_type == Type::Texture)