mirror of https://github.com/PCSX2/pcsx2.git
Vulkan: Enforce 32 byte alignment for uploads
It blows up on AVX2 stores on some drivers if we don't (e.g. AMD Linux).
This commit is contained in:
parent
a9819542d4
commit
394f1f2049
|
@ -73,4 +73,20 @@ namespace Common
|
||||||
value |= (value >> 16);
|
value |= (value >> 16);
|
||||||
return value - (value >> 1);
|
return value - (value >> 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static constexpr __fi T NextPow2(T value)
|
||||||
|
{
|
||||||
|
if (value == static_cast<T>(0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
value--;
|
||||||
|
value |= value >> 1;
|
||||||
|
value |= value >> 2;
|
||||||
|
value |= value >> 4;
|
||||||
|
value |= value >> 8;
|
||||||
|
value |= value >> 16;
|
||||||
|
value++;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "common/Vulkan/Context.h"
|
#include "common/Vulkan/Context.h"
|
||||||
|
#include "common/Align.h"
|
||||||
#include "common/Assertions.h"
|
#include "common/Assertions.h"
|
||||||
#include "common/Console.h"
|
#include "common/Console.h"
|
||||||
#include "common/StringUtil.h"
|
#include "common/StringUtil.h"
|
||||||
|
@ -46,15 +47,17 @@ namespace Vulkan
|
||||||
vkGetPhysicalDeviceProperties(physical_device, &m_device_properties);
|
vkGetPhysicalDeviceProperties(physical_device, &m_device_properties);
|
||||||
vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties);
|
vkGetPhysicalDeviceMemoryProperties(physical_device, &m_device_memory_properties);
|
||||||
|
|
||||||
// Would any drivers be this silly? I hope not...
|
// We need this to be at least 32 byte aligned for AVX2 stores.
|
||||||
m_device_properties.limits.minUniformBufferOffsetAlignment =
|
m_device_properties.limits.minUniformBufferOffsetAlignment =
|
||||||
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
|
std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
|
||||||
m_device_properties.limits.minTexelBufferOffsetAlignment =
|
m_device_properties.limits.minTexelBufferOffsetAlignment =
|
||||||
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
|
std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(32));
|
||||||
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
|
m_device_properties.limits.optimalBufferCopyOffsetAlignment =
|
||||||
std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(1));
|
std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(32));
|
||||||
m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
|
m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
|
||||||
std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(1));
|
Common::NextPow2(std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(32)));
|
||||||
|
m_device_properties.limits.bufferImageGranularity =
|
||||||
|
std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(32));
|
||||||
}
|
}
|
||||||
|
|
||||||
Context::~Context() = default;
|
Context::~Context() = default;
|
||||||
|
|
|
@ -99,23 +99,34 @@ namespace Vulkan
|
||||||
__fi const OptionalExtensions& GetOptionalExtensions() const { return m_optional_extensions; }
|
__fi const OptionalExtensions& GetOptionalExtensions() const { return m_optional_extensions; }
|
||||||
|
|
||||||
// Helpers for getting constants
|
// Helpers for getting constants
|
||||||
__fi VkDeviceSize GetUniformBufferAlignment() const
|
__fi u32 GetUniformBufferAlignment() const
|
||||||
{
|
{
|
||||||
return m_device_properties.limits.minUniformBufferOffsetAlignment;
|
return static_cast<u32>(m_device_properties.limits.minUniformBufferOffsetAlignment);
|
||||||
}
|
}
|
||||||
__fi VkDeviceSize GetTexelBufferAlignment() const
|
__fi u32 GetTexelBufferAlignment() const
|
||||||
{
|
{
|
||||||
return m_device_properties.limits.minTexelBufferOffsetAlignment;
|
return static_cast<u32>(m_device_properties.limits.minTexelBufferOffsetAlignment);
|
||||||
}
|
}
|
||||||
__fi VkDeviceSize GetStorageBufferAlignment() const
|
__fi u32 GetStorageBufferAlignment() const
|
||||||
{
|
{
|
||||||
return m_device_properties.limits.minStorageBufferOffsetAlignment;
|
return static_cast<u32>(m_device_properties.limits.minStorageBufferOffsetAlignment);
|
||||||
}
|
}
|
||||||
__fi VkDeviceSize GetBufferImageGranularity() const
|
__fi u32 GetBufferImageGranularity() const
|
||||||
{
|
{
|
||||||
return m_device_properties.limits.bufferImageGranularity;
|
return static_cast<u32>(m_device_properties.limits.bufferImageGranularity);
|
||||||
|
}
|
||||||
|
__fi u32 GetBufferCopyOffsetAlignment() const
|
||||||
|
{
|
||||||
|
return static_cast<u32>(m_device_properties.limits.optimalBufferCopyOffsetAlignment);
|
||||||
|
}
|
||||||
|
__fi u32 GetBufferCopyRowPitchAlignment() const
|
||||||
|
{
|
||||||
|
return static_cast<u32>(m_device_properties.limits.optimalBufferCopyRowPitchAlignment);
|
||||||
|
}
|
||||||
|
__fi u32 GetMaxImageDimension2D() const
|
||||||
|
{
|
||||||
|
return m_device_properties.limits.maxImageDimension2D;
|
||||||
}
|
}
|
||||||
__fi VkDeviceSize GetMaxImageDimension2D() const { return m_device_properties.limits.maxImageDimension2D; }
|
|
||||||
|
|
||||||
// Creates a simple render pass.
|
// Creates a simple render pass.
|
||||||
__ri VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format,
|
__ri VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format,
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
|
|
||||||
#include "VulkanHostDisplay.h"
|
#include "VulkanHostDisplay.h"
|
||||||
|
#include "common/Align.h"
|
||||||
#include "common/Assertions.h"
|
#include "common/Assertions.h"
|
||||||
#include "common/Console.h"
|
#include "common/Console.h"
|
||||||
#include "common/ScopedGuard.h"
|
#include "common/ScopedGuard.h"
|
||||||
|
@ -194,25 +195,26 @@ std::string VulkanHostDisplay::GetDriverInfo() const
|
||||||
static bool UploadBufferToTexture(
|
static bool UploadBufferToTexture(
|
||||||
Vulkan::Texture* texture, VkCommandBuffer cmdbuf, u32 width, u32 height, const void* data, u32 data_stride)
|
Vulkan::Texture* texture, VkCommandBuffer cmdbuf, u32 width, u32 height, const void* data, u32 data_stride)
|
||||||
{
|
{
|
||||||
const u32 tight_stride = Vulkan::Util::GetTexelSize(texture->GetFormat()) * width;
|
const u32 upload_stride = Common::AlignUpPow2(Vulkan::Util::GetTexelSize(texture->GetFormat()) * width,
|
||||||
const u32 tight_size = tight_stride * height;
|
g_vulkan_context->GetBufferCopyRowPitchAlignment());
|
||||||
|
const u32 upload_size = upload_stride * height;
|
||||||
|
|
||||||
Vulkan::StreamBuffer& buf = g_vulkan_context->GetTextureUploadBuffer();
|
Vulkan::StreamBuffer& buf = g_vulkan_context->GetTextureUploadBuffer();
|
||||||
if (!buf.ReserveMemory(tight_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
{
|
{
|
||||||
Console.WriteLn("Executing command buffer for UploadBufferToTexture()");
|
Console.WriteLn("Executing command buffer for UploadBufferToTexture()");
|
||||||
g_vulkan_context->ExecuteCommandBuffer(false);
|
g_vulkan_context->ExecuteCommandBuffer(false);
|
||||||
if (!buf.ReserveMemory(tight_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!buf.ReserveMemory(upload_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
{
|
{
|
||||||
Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", tight_size);
|
Console.WriteLn("Failed to allocate %u bytes in stream buffer for UploadBufferToTexture()", upload_size);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
cmdbuf = g_vulkan_context->GetCurrentInitCommandBuffer();
|
cmdbuf = g_vulkan_context->GetCurrentInitCommandBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 buf_offset = buf.GetCurrentOffset();
|
const u32 buf_offset = buf.GetCurrentOffset();
|
||||||
StringUtil::StrideMemCpy(buf.GetCurrentHostPointer(), tight_stride, data, data_stride, tight_stride, height);
|
StringUtil::StrideMemCpy(buf.GetCurrentHostPointer(), upload_stride, data, data_stride, upload_stride, height);
|
||||||
buf.CommitMemory(tight_size);
|
buf.CommitMemory(upload_size);
|
||||||
|
|
||||||
texture->UpdateFromBuffer(cmdbuf, 0, 0, 0, 0, width, height, width, buf.GetBuffer(), buf_offset);
|
texture->UpdateFromBuffer(cmdbuf, 0, 0, 0, 0, width, height, width, buf.GetBuffer(), buf_offset);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "GSDeviceVK.h"
|
#include "GSDeviceVK.h"
|
||||||
#include "GSTextureVK.h"
|
#include "GSTextureVK.h"
|
||||||
|
#include "common/Align.h"
|
||||||
#include "common/Assertions.h"
|
#include "common/Assertions.h"
|
||||||
#include "common/Vulkan/Builders.h"
|
#include "common/Vulkan/Builders.h"
|
||||||
#include "common/Vulkan/Context.h"
|
#include "common/Vulkan/Context.h"
|
||||||
|
@ -144,8 +145,9 @@ VkCommandBuffer GSTextureVK::GetCommandBufferForUpdate()
|
||||||
return g_vulkan_context->GetCurrentInitCommandBuffer();
|
return g_vulkan_context->GetCurrentInitCommandBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 size)
|
static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height)
|
||||||
{
|
{
|
||||||
|
const u32 size = upload_pitch * height;
|
||||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0,
|
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0,
|
||||||
static_cast<VkDeviceSize>(size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
static_cast<VkDeviceSize>(size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||||
|
|
||||||
|
@ -170,7 +172,7 @@ static VkBuffer AllocateUploadStagingBuffer(const void* data, u32 size)
|
||||||
g_vulkan_context->DeferBufferDestruction(buffer, allocation);
|
g_vulkan_context->DeferBufferDestruction(buffer, allocation);
|
||||||
|
|
||||||
// And write the data.
|
// And write the data.
|
||||||
std::memcpy(ai.pMappedData, data, size);
|
StringUtil::StrideMemCpy(ai.pMappedData, upload_pitch, data, pitch, pitch, height);
|
||||||
vmaFlushAllocation(g_vulkan_context->GetAllocator(), allocation, 0, size);
|
vmaFlushAllocation(g_vulkan_context->GetAllocator(), allocation, 0, size);
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
@ -184,8 +186,8 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||||
|
|
||||||
const u32 width = r.width();
|
const u32 width = r.width();
|
||||||
const u32 height = r.height();
|
const u32 height = r.height();
|
||||||
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
|
const u32 upload_pitch = Common::AlignUpPow2(pitch, g_vulkan_context->GetBufferCopyRowPitchAlignment());
|
||||||
const u32 required_size = CalcUploadSize(height, pitch);
|
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
||||||
|
|
||||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||||
|
@ -194,18 +196,18 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||||
if (required_size > (g_vulkan_context->GetTextureUploadBuffer().GetCurrentSize() / 2))
|
if (required_size > (g_vulkan_context->GetTextureUploadBuffer().GetCurrentSize() / 2))
|
||||||
{
|
{
|
||||||
buffer_offset = 0;
|
buffer_offset = 0;
|
||||||
buffer = AllocateUploadStagingBuffer(data, required_size);
|
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
|
||||||
if (buffer == VK_NULL_HANDLE)
|
if (buffer == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Vulkan::StreamBuffer& sbuffer = g_vulkan_context->GetTextureUploadBuffer();
|
Vulkan::StreamBuffer& sbuffer = g_vulkan_context->GetTextureUploadBuffer();
|
||||||
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
{
|
{
|
||||||
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
||||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
||||||
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!sbuffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
{
|
{
|
||||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
||||||
return false;
|
return false;
|
||||||
|
@ -214,7 +216,7 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||||
|
|
||||||
buffer = sbuffer.GetBuffer();
|
buffer = sbuffer.GetBuffer();
|
||||||
buffer_offset = sbuffer.GetCurrentOffset();
|
buffer_offset = sbuffer.GetCurrentOffset();
|
||||||
std::memcpy(sbuffer.GetCurrentHostPointer(), data, required_size);
|
StringUtil::StrideMemCpy(sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch, pitch, height);
|
||||||
sbuffer.CommitMemory(required_size);
|
sbuffer.CommitMemory(required_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,7 +236,8 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||||
m_state = State::Dirty;
|
m_state = State::Dirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_texture.UpdateFromBuffer(cmdbuf, layer, 0, r.x, r.y, width, height, row_length, buffer, buffer_offset);
|
m_texture.UpdateFromBuffer(cmdbuf, layer, 0, r.x, r.y, width, height,
|
||||||
|
CalcUploadRowLengthFromPitch(upload_pitch), buffer, buffer_offset);
|
||||||
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
|
||||||
if (m_type == Type::Texture)
|
if (m_type == Type::Texture)
|
||||||
|
@ -252,7 +255,8 @@ bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer)
|
||||||
m_map_area = r ? *r : GSVector4i(0, 0, m_texture.GetWidth(), m_texture.GetHeight());
|
m_map_area = r ? *r : GSVector4i(0, 0, m_texture.GetWidth(), m_texture.GetHeight());
|
||||||
m_map_level = layer;
|
m_map_level = layer;
|
||||||
|
|
||||||
m.pitch = m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat());
|
m.pitch = Common::AlignUpPow2(m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat()),
|
||||||
|
g_vulkan_context->GetBufferCopyRowPitchAlignment());
|
||||||
|
|
||||||
// see note in Update() for the reason why.
|
// see note in Update() for the reason why.
|
||||||
const u32 required_size = m.pitch * m_map_area.height();
|
const u32 required_size = m.pitch * m_map_area.height();
|
||||||
|
@ -260,11 +264,11 @@ bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer)
|
||||||
if (required_size >= (buffer.GetCurrentSize() / 2))
|
if (required_size >= (buffer.GetCurrentSize() / 2))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
{
|
{
|
||||||
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
||||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
||||||
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferImageGranularity()))
|
if (!buffer.ReserveMemory(required_size, g_vulkan_context->GetBufferCopyOffsetAlignment()))
|
||||||
pxFailRel("Failed to reserve texture upload memory");
|
pxFailRel("Failed to reserve texture upload memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,7 +284,9 @@ void GSTextureVK::Unmap()
|
||||||
// TODO: non-tightly-packed formats
|
// TODO: non-tightly-packed formats
|
||||||
const u32 width = static_cast<u32>(m_map_area.width());
|
const u32 width = static_cast<u32>(m_map_area.width());
|
||||||
const u32 height = static_cast<u32>(m_map_area.height());
|
const u32 height = static_cast<u32>(m_map_area.height());
|
||||||
const u32 required_size = width * height * Vulkan::Util::GetTexelSize(m_texture.GetFormat());
|
const u32 pitch = Common::AlignUpPow2(m_map_area.width() * Vulkan::Util::GetTexelSize(m_texture.GetFormat()),
|
||||||
|
g_vulkan_context->GetBufferCopyRowPitchAlignment());
|
||||||
|
const u32 required_size = pitch * height;
|
||||||
Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer();
|
Vulkan::StreamBuffer& buffer = g_vulkan_context->GetTextureUploadBuffer();
|
||||||
const u32 buffer_offset = buffer.GetCurrentOffset();
|
const u32 buffer_offset = buffer.GetCurrentOffset();
|
||||||
buffer.CommitMemory(required_size);
|
buffer.CommitMemory(required_size);
|
||||||
|
@ -302,8 +308,8 @@ void GSTextureVK::Unmap()
|
||||||
m_state = State::Dirty;
|
m_state = State::Dirty;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_texture.UpdateFromBuffer(cmdbuf, m_map_level, 0, m_map_area.x, m_map_area.y, width, height, width,
|
m_texture.UpdateFromBuffer(cmdbuf, m_map_level, 0, m_map_area.x, m_map_area.y, width, height,
|
||||||
buffer.GetBuffer(), buffer_offset);
|
CalcUploadRowLengthFromPitch(pitch), buffer.GetBuffer(), buffer_offset);
|
||||||
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
|
||||||
if (m_type == Type::Texture)
|
if (m_type == Type::Texture)
|
||||||
|
|
Loading…
Reference in New Issue