Vulkan: Support native compressed textures

This commit is contained in:
Stenzek 2017-04-16 22:23:33 +10:00
parent 3f18c5e0f1
commit c53a60f3c3
6 changed files with 145 additions and 67 deletions

View File

@ -109,7 +109,7 @@ constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
// streaming buffer and be blocking frequently. Games are unlikely to have textures this
// large anyway, so it's only really an issue for HD texture packs, and memory is not
// a limiting factor in these scenarios anyway.
constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4;
constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8;
// Streaming uniform buffer size
constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;

View File

@ -127,7 +127,7 @@ void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size)
void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches)
{
_assert_((offset + size) <= m_size);
_assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
_assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
if (invalidate_caches)
InvalidateCPUCache(offset, size);
@ -138,7 +138,7 @@ void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size,
bool invalidate_caches)
{
_assert_((offset + size) <= m_size);
_assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
_assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
memcpy(m_map_pointer + (offset - m_map_offset), data, size);
if (invalidate_caches)

View File

@ -9,6 +9,7 @@
#include <string>
#include <vector>
#include "Common/Align.h"
#include "Common/Assert.h"
#include "Common/CommonFuncs.h"
#include "Common/Logging/Log.h"
@ -238,9 +239,10 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
// Allocate texture object
VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(config.format);
std::unique_ptr<Texture2D> texture = Texture2D::Create(
config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
config.width, config.height, config.levels, config.layers, vk_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
if (!texture)
return nullptr;
@ -366,87 +368,68 @@ void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_l
m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// Does this texture data fit within the streaming buffer?
u32 upload_width = width;
u32 upload_pitch = upload_width * sizeof(u32);
u32 upload_size = upload_pitch * height;
// For unaligned textures, we can save some memory in the transfer buffer by skipping the rows
// that lie outside of the texture's dimensions.
u32 upload_alignment = static_cast<u32>(g_vulkan_context->GetBufferImageGranularity());
u32 source_pitch = row_length * 4;
if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
(upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
{
// Assume tightly packed rows, with no padding as the buffer source.
StreamBuffer* upload_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
u32 block_size = Util::GetBlockSize(m_texture->GetFormat());
u32 num_rows = Common::AlignUp(height, block_size) / block_size;
size_t source_pitch = Util::GetPitchForTexture(m_texture->GetFormat(), row_length);
size_t upload_size = source_pitch * num_rows;
std::unique_ptr<StagingBuffer> temp_buffer;
VkBuffer upload_buffer;
VkDeviceSize upload_buffer_offset;
// Allocate memory from the streaming buffer for the texture data.
if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
// Does this texture data fit within the streaming buffer?
if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
{
StreamBuffer* stream_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
{
// Execute the command buffer first.
WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer");
Util::ExecuteCurrentCommandsAndRestoreState(false);
// Try allocating again. This may cause a fence wait.
if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
PanicAlert("Failed to allocate space in texture upload buffer");
}
// Grab buffer pointers
VkBuffer image_upload_buffer = upload_buffer->GetBuffer();
VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset();
u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer();
// Copy to the buffer using the stride from the subresource layout
const u8* source_ptr = buffer;
if (upload_pitch != source_pitch)
{
VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch);
for (unsigned int row = 0; row < height; row++)
{
memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch,
copy_pitch);
}
// Copy to the streaming buffer.
upload_buffer = stream_buffer->GetBuffer();
upload_buffer_offset = stream_buffer->GetCurrentOffset();
std::memcpy(stream_buffer->GetCurrentHostPointer(), buffer, upload_size);
stream_buffer->CommitMemory(upload_size);
}
else
{
// Can copy the whole thing in one block, the pitch matches
memcpy(image_upload_buffer_pointer, source_ptr, upload_size);
}
// Flush buffer memory if necessary
upload_buffer->CommitMemory(upload_size);
// Copy from the streaming buffer to the actual image.
VkBufferImageCopy image_copy = {
image_upload_buffer_offset, // VkDeviceSize bufferOffset
0, // uint32_t bufferRowLength
0, // uint32_t bufferImageHeight
{VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource
{0, 0, 0}, // VkOffset3D imageOffset
{width, height, 1} // VkExtent3D imageExtent
};
vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer,
m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&image_copy);
}
else
{
// Slow path. The data for the image is too large to fit in the streaming buffer, so we need
// to allocate a temporary texture to store the data in, then copy to the real texture.
std::unique_ptr<StagingTexture2D> staging_texture = StagingTexture2D::Create(
STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT);
if (!staging_texture || !staging_texture->Map())
// Create a temporary staging buffer that is destroyed after the image is copied.
temp_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_UPLOAD, upload_size,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
if (!temp_buffer || !temp_buffer->Map())
{
PanicAlert("Failed to allocate staging texture for large texture upload.");
return;
}
// Copy data to staging texture first, then to the "real" texture.
staging_texture->WriteTexels(0, 0, width, height, buffer, source_pitch);
staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width,
height, level, 0);
upload_buffer = temp_buffer->GetBuffer();
upload_buffer_offset = 0;
temp_buffer->Write(0, buffer, upload_size, true);
temp_buffer->Unmap();
}
// Copy from the streaming buffer to the actual image.
VkBufferImageCopy image_copy = {
upload_buffer_offset, // VkDeviceSize bufferOffset
row_length, // uint32_t bufferRowLength
0, // uint32_t bufferImageHeight
{VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource
{0, 0, 0}, // VkOffset3D imageOffset
{width, height, 1} // VkExtent3D imageExtent
};
vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer,
m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&image_copy);
}
void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect,
@ -544,6 +527,11 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
{
_assert_(level < config.levels);
// We can't dump compressed textures currently (it would mean drawing them to a RGBA8
// framebuffer, and saving that). TextureCache does not call Save for custom textures
// anyway, so this is fine for now.
_assert_(config.format == HostTextureFormat::RGBA8);
// Determine dimensions of image we want to save.
u32 level_width = std::max(1u, config.width >> level);
u32 level_height = std::max(1u, config.height >> level);
@ -582,7 +570,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
// It's okay to throw this texture away immediately, since we're done with it, and
// we blocked until the copy completed on the GPU anyway.
bool result = TextureToPng(reinterpret_cast<u8*>(staging_texture->GetMapPointer()),
staging_texture->GetRowStride(), filename, level_width, level_height);
static_cast<u32>(staging_texture->GetRowStride()), filename,
level_width, level_height);
staging_texture->Unmap();
return result;

View File

@ -53,6 +53,20 @@ bool IsDepthFormat(VkFormat format)
}
}
bool IsCompressedFormat(VkFormat format)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return true;
default:
return false;
}
}
VkFormat GetLinearFormat(VkFormat format)
{
switch (format)
@ -74,6 +88,25 @@ VkFormat GetLinearFormat(VkFormat format)
}
}
VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format)
{
switch (format)
{
case HostTextureFormat::DXT1:
return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case HostTextureFormat::DXT3:
return VK_FORMAT_BC2_UNORM_BLOCK;
case HostTextureFormat::DXT5:
return VK_FORMAT_BC3_UNORM_BLOCK;
case HostTextureFormat::RGBA8:
default:
return VK_FORMAT_R8G8B8A8_UNORM;
}
}
u32 GetTexelSize(VkFormat format)
{
// Only contains pixel formats we use.
@ -91,12 +124,59 @@ u32 GetTexelSize(VkFormat format)
case VK_FORMAT_B8G8R8A8_UNORM:
return 4;
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
return 8;
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return 16;
default:
PanicAlert("Unhandled pixel format");
return 1;
}
}
u32 GetBlockSize(VkFormat format)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return 4;
default:
return 1;
}
}
size_t GetPitchForTexture(VkFormat format, u32 row_length)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 8;
case VK_FORMAT_BC2_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
case VK_FORMAT_BC3_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_D32_SFLOAT:
return static_cast<size_t>(row_length) * 4;
default:
PanicAlert("Unhandled pixel format");
return row_length;
}
}
VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height)
{
VkRect2D out;

View File

@ -25,8 +25,13 @@ size_t AlignBufferOffset(size_t offset, size_t alignment);
u32 MakeRGBA8Color(float r, float g, float b, float a);
bool IsDepthFormat(VkFormat format);
bool IsCompressedFormat(VkFormat format);
VkFormat GetLinearFormat(VkFormat format);
VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format);
u32 GetTexelSize(VkFormat format);
u32 GetBlockSize(VkFormat format);
size_t GetPitchForTexture(VkFormat format, u32 row_length);
// Clamps a VkRect2D to the specified dimensions.
VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height);

View File

@ -284,6 +284,9 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
config->backend_info.bSupportsDepthClamp =
(features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE);
// textureCompressionBC implies BC1 through BC7, which is a superset of DXT1/3/5, which we need.
config->backend_info.bSupportsST3CTextures = features.textureCompressionBC == VK_TRUE;
// Our usage of primitive restart appears to be broken on AMD's binary drivers.
// Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4.
if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART))
@ -460,6 +463,7 @@ bool VulkanContext::SelectDeviceFeatures()
m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise;
m_device_features.shaderClipDistance = available_features.shaderClipDistance;
m_device_features.depthClamp = available_features.depthClamp;
m_device_features.textureCompressionBC = available_features.textureCompressionBC;
return true;
}