Vulkan: Support native compressed textures

This commit is contained in:
Stenzek 2017-04-16 22:23:33 +10:00
parent 3f18c5e0f1
commit c53a60f3c3
6 changed files with 145 additions and 67 deletions

View File

@ -109,7 +109,7 @@ constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
// streaming buffer and be blocking frequently. Games are unlikely to have textures this // streaming buffer and be blocking frequently. Games are unlikely to have textures this
// large anyway, so it's only really an issue for HD texture packs, and memory is not // large anyway, so it's only really an issue for HD texture packs, and memory is not
// a limiting factor in these scenarios anyway. // a limiting factor in these scenarios anyway.
constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4; constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8;
// Streaming uniform buffer size // Streaming uniform buffer size
constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;

View File

@ -127,7 +127,7 @@ void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size)
void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches) void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches)
{ {
_assert_((offset + size) <= m_size); _assert_((offset + size) <= m_size);
_assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
if (invalidate_caches) if (invalidate_caches)
InvalidateCPUCache(offset, size); InvalidateCPUCache(offset, size);
@ -138,7 +138,7 @@ void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size,
bool invalidate_caches) bool invalidate_caches)
{ {
_assert_((offset + size) <= m_size); _assert_((offset + size) <= m_size);
_assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
memcpy(m_map_pointer + (offset - m_map_offset), data, size); memcpy(m_map_pointer + (offset - m_map_offset), data, size);
if (invalidate_caches) if (invalidate_caches)

View File

@ -9,6 +9,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "Common/Align.h"
#include "Common/Assert.h" #include "Common/Assert.h"
#include "Common/CommonFuncs.h" #include "Common/CommonFuncs.h"
#include "Common/Logging/Log.h" #include "Common/Logging/Log.h"
@ -238,9 +239,10 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
// Allocate texture object // Allocate texture object
VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(config.format);
std::unique_ptr<Texture2D> texture = Texture2D::Create( std::unique_ptr<Texture2D> texture = Texture2D::Create(
config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT, config.width, config.height, config.levels, config.layers, vk_format, VK_SAMPLE_COUNT_1_BIT,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
if (!texture) if (!texture)
return nullptr; return nullptr;
@ -366,87 +368,68 @@ void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_l
m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
// Does this texture data fit within the streaming buffer? // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows
u32 upload_width = width; // that lie outside of the texture's dimensions.
u32 upload_pitch = upload_width * sizeof(u32);
u32 upload_size = upload_pitch * height;
u32 upload_alignment = static_cast<u32>(g_vulkan_context->GetBufferImageGranularity()); u32 upload_alignment = static_cast<u32>(g_vulkan_context->GetBufferImageGranularity());
u32 source_pitch = row_length * 4; u32 block_size = Util::GetBlockSize(m_texture->GetFormat());
if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD && u32 num_rows = Common::AlignUp(height, block_size) / block_size;
(upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) size_t source_pitch = Util::GetPitchForTexture(m_texture->GetFormat(), row_length);
{ size_t upload_size = source_pitch * num_rows;
// Assume tightly packed rows, with no padding as the buffer source. std::unique_ptr<StagingBuffer> temp_buffer;
StreamBuffer* upload_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get(); VkBuffer upload_buffer;
VkDeviceSize upload_buffer_offset;
// Allocate memory from the streaming buffer for the texture data. // Does this texture data fit within the streaming buffer?
if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
{
StreamBuffer* stream_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
{ {
// Execute the command buffer first. // Execute the command buffer first.
WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer");
Util::ExecuteCurrentCommandsAndRestoreState(false); Util::ExecuteCurrentCommandsAndRestoreState(false);
// Try allocating again. This may cause a fence wait. // Try allocating again. This may cause a fence wait.
if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
PanicAlert("Failed to allocate space in texture upload buffer"); PanicAlert("Failed to allocate space in texture upload buffer");
} }
// Grab buffer pointers // Copy to the streaming buffer.
VkBuffer image_upload_buffer = upload_buffer->GetBuffer(); upload_buffer = stream_buffer->GetBuffer();
VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset(); upload_buffer_offset = stream_buffer->GetCurrentOffset();
u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer(); std::memcpy(stream_buffer->GetCurrentHostPointer(), buffer, upload_size);
stream_buffer->CommitMemory(upload_size);
// Copy to the buffer using the stride from the subresource layout
const u8* source_ptr = buffer;
if (upload_pitch != source_pitch)
{
VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch);
for (unsigned int row = 0; row < height; row++)
{
memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch,
copy_pitch);
}
} }
else else
{ {
// Can copy the whole thing in one block, the pitch matches // Create a temporary staging buffer that is destroyed after the image is copied.
memcpy(image_upload_buffer_pointer, source_ptr, upload_size); temp_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_UPLOAD, upload_size,
} VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
if (!temp_buffer || !temp_buffer->Map())
// Flush buffer memory if necessary
upload_buffer->CommitMemory(upload_size);
// Copy from the streaming buffer to the actual image.
VkBufferImageCopy image_copy = {
image_upload_buffer_offset, // VkDeviceSize bufferOffset
0, // uint32_t bufferRowLength
0, // uint32_t bufferImageHeight
{VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource
{0, 0, 0}, // VkOffset3D imageOffset
{width, height, 1} // VkExtent3D imageExtent
};
vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer,
m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&image_copy);
}
else
{
// Slow path. The data for the image is too large to fit in the streaming buffer, so we need
// to allocate a temporary texture to store the data in, then copy to the real texture.
std::unique_ptr<StagingTexture2D> staging_texture = StagingTexture2D::Create(
STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT);
if (!staging_texture || !staging_texture->Map())
{ {
PanicAlert("Failed to allocate staging texture for large texture upload."); PanicAlert("Failed to allocate staging texture for large texture upload.");
return; return;
} }
// Copy data to staging texture first, then to the "real" texture. upload_buffer = temp_buffer->GetBuffer();
staging_texture->WriteTexels(0, 0, width, height, buffer, source_pitch); upload_buffer_offset = 0;
staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), temp_buffer->Write(0, buffer, upload_size, true);
m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width, temp_buffer->Unmap();
height, level, 0);
} }
// Copy from the streaming buffer to the actual image.
VkBufferImageCopy image_copy = {
upload_buffer_offset, // VkDeviceSize bufferOffset
row_length, // uint32_t bufferRowLength
0, // uint32_t bufferImageHeight
{VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource
{0, 0, 0}, // VkOffset3D imageOffset
{width, height, 1} // VkExtent3D imageExtent
};
vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer,
m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&image_copy);
} }
void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect, void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect,
@ -544,6 +527,11 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
{ {
_assert_(level < config.levels); _assert_(level < config.levels);
// We can't dump compressed textures currently (it would mean drawing them to a RGBA8
// framebuffer, and saving that). TextureCache does not call Save for custom textures
// anyway, so this is fine for now.
_assert_(config.format == HostTextureFormat::RGBA8);
// Determine dimensions of image we want to save. // Determine dimensions of image we want to save.
u32 level_width = std::max(1u, config.width >> level); u32 level_width = std::max(1u, config.width >> level);
u32 level_height = std::max(1u, config.height >> level); u32 level_height = std::max(1u, config.height >> level);
@ -582,7 +570,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
// It's okay to throw this texture away immediately, since we're done with it, and // It's okay to throw this texture away immediately, since we're done with it, and
// we blocked until the copy completed on the GPU anyway. // we blocked until the copy completed on the GPU anyway.
bool result = TextureToPng(reinterpret_cast<u8*>(staging_texture->GetMapPointer()), bool result = TextureToPng(reinterpret_cast<u8*>(staging_texture->GetMapPointer()),
staging_texture->GetRowStride(), filename, level_width, level_height); static_cast<u32>(staging_texture->GetRowStride()), filename,
level_width, level_height);
staging_texture->Unmap(); staging_texture->Unmap();
return result; return result;

View File

@ -53,6 +53,20 @@ bool IsDepthFormat(VkFormat format)
} }
} }
bool IsCompressedFormat(VkFormat format)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return true;
default:
return false;
}
}
VkFormat GetLinearFormat(VkFormat format) VkFormat GetLinearFormat(VkFormat format)
{ {
switch (format) switch (format)
@ -74,6 +88,25 @@ VkFormat GetLinearFormat(VkFormat format)
} }
} }
VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format)
{
switch (format)
{
case HostTextureFormat::DXT1:
return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
case HostTextureFormat::DXT3:
return VK_FORMAT_BC2_UNORM_BLOCK;
case HostTextureFormat::DXT5:
return VK_FORMAT_BC3_UNORM_BLOCK;
case HostTextureFormat::RGBA8:
default:
return VK_FORMAT_R8G8B8A8_UNORM;
}
}
u32 GetTexelSize(VkFormat format) u32 GetTexelSize(VkFormat format)
{ {
// Only contains pixel formats we use. // Only contains pixel formats we use.
@ -91,12 +124,59 @@ u32 GetTexelSize(VkFormat format)
case VK_FORMAT_B8G8R8A8_UNORM: case VK_FORMAT_B8G8R8A8_UNORM:
return 4; return 4;
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
return 8;
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return 16;
default: default:
PanicAlert("Unhandled pixel format"); PanicAlert("Unhandled pixel format");
return 1; return 1;
} }
} }
u32 GetBlockSize(VkFormat format)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
case VK_FORMAT_BC2_UNORM_BLOCK:
case VK_FORMAT_BC3_UNORM_BLOCK:
return 4;
default:
return 1;
}
}
size_t GetPitchForTexture(VkFormat format, u32 row_length)
{
switch (format)
{
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 8;
case VK_FORMAT_BC2_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
case VK_FORMAT_BC3_UNORM_BLOCK:
return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_R32_SFLOAT:
case VK_FORMAT_D32_SFLOAT:
return static_cast<size_t>(row_length) * 4;
default:
PanicAlert("Unhandled pixel format");
return row_length;
}
}
VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height) VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height)
{ {
VkRect2D out; VkRect2D out;

View File

@ -25,8 +25,13 @@ size_t AlignBufferOffset(size_t offset, size_t alignment);
u32 MakeRGBA8Color(float r, float g, float b, float a); u32 MakeRGBA8Color(float r, float g, float b, float a);
bool IsDepthFormat(VkFormat format); bool IsDepthFormat(VkFormat format);
bool IsCompressedFormat(VkFormat format);
VkFormat GetLinearFormat(VkFormat format); VkFormat GetLinearFormat(VkFormat format);
VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format);
u32 GetTexelSize(VkFormat format); u32 GetTexelSize(VkFormat format);
u32 GetBlockSize(VkFormat format);
size_t GetPitchForTexture(VkFormat format, u32 row_length);
// Clamps a VkRect2D to the specified dimensions. // Clamps a VkRect2D to the specified dimensions.
VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height); VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height);

View File

@ -284,6 +284,9 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
config->backend_info.bSupportsDepthClamp = config->backend_info.bSupportsDepthClamp =
(features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE); (features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE);
// textureCompressionBC implies BC1 through BC7, which is a superset of DXT1/3/5, which we need.
config->backend_info.bSupportsST3CTextures = features.textureCompressionBC == VK_TRUE;
// Our usage of primitive restart appears to be broken on AMD's binary drivers. // Our usage of primitive restart appears to be broken on AMD's binary drivers.
// Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4. // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4.
if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART)) if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART))
@ -460,6 +463,7 @@ bool VulkanContext::SelectDeviceFeatures()
m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise; m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise;
m_device_features.shaderClipDistance = available_features.shaderClipDistance; m_device_features.shaderClipDistance = available_features.shaderClipDistance;
m_device_features.depthClamp = available_features.depthClamp; m_device_features.depthClamp = available_features.depthClamp;
m_device_features.textureCompressionBC = available_features.textureCompressionBC;
return true; return true;
} }