Vulkan: Support native compressed textures

2017-04-16 22:23:33 +10:00 · 2017-04-16 22:23:33 +10:00 · c53a60f3c3
parent 3f18c5e0f1
commit c53a60f3c3
6 changed files with 145 additions and 67 deletions
--- a/Source/Core/VideoBackends/Vulkan/Constants.h
+++ b/Source/Core/VideoBackends/Vulkan/Constants.h
@ -109,7 +109,7 @@ constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
 // streaming buffer and be blocking frequently. Games are unlikely to have textures this
 // large anyway, so it's only really an issue for HD texture packs, and memory is not
 // a limiting factor in these scenarios anyway.
-constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4;
+constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8;
 // Streaming uniform buffer size
 constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
--- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
+++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
@ -127,7 +127,7 @@ void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size)
 void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches)
 {
  _assert_((offset + size) <= m_size);
-  _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
+  _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
  if (invalidate_caches)
    InvalidateCPUCache(offset, size);
@ -138,7 +138,7 @@ void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size,
                          bool invalidate_caches)
 {
  _assert_((offset + size) <= m_size);
-  _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
+  _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
  memcpy(m_map_pointer + (offset - m_map_offset), data, size);
  if (invalidate_caches)
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
@ -9,6 +9,7 @@
 #include <string>
 #include <vector>
 #include "Common/Align.h"
 #include "Common/Assert.h"
 #include "Common/CommonFuncs.h"
 #include "Common/Logging/Log.h"
@ -238,9 +239,10 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry
    usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
  // Allocate texture object
  VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(config.format);
  std::unique_ptr<Texture2D> texture = Texture2D::Create(
-      config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT,
+      config.width, config.height, config.levels, config.layers, vk_format, VK_SAMPLE_COUNT_1_BIT,
-      VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
+      VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
  if (!texture)
    return nullptr;
@ -366,87 +368,68 @@ void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_l
  m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
-  // Does this texture data fit within the streaming buffer?
+  // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows
-  u32 upload_width = width;
+  // that lie outside of the texture's dimensions.
  u32 upload_pitch = upload_width * sizeof(u32);
  u32 upload_size = upload_pitch * height;
  u32 upload_alignment = static_cast<u32>(g_vulkan_context->GetBufferImageGranularity());
-  u32 source_pitch = row_length * 4;
+  u32 block_size = Util::GetBlockSize(m_texture->GetFormat());
-  if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
+  u32 num_rows = Common::AlignUp(height, block_size) / block_size;
-      (upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
+  size_t source_pitch = Util::GetPitchForTexture(m_texture->GetFormat(), row_length);
-  {
+  size_t upload_size = source_pitch * num_rows;
-    // Assume tightly packed rows, with no padding as the buffer source.
+  std::unique_ptr<StagingBuffer> temp_buffer;
-    StreamBuffer* upload_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
+  VkBuffer upload_buffer;
  VkDeviceSize upload_buffer_offset;
-    // Allocate memory from the streaming buffer for the texture data.
+  // Does this texture data fit within the streaming buffer?
-    if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
+  if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
      upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
  {
    StreamBuffer* stream_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
    if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
    {
      // Execute the command buffer first.
      WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer");
      Util::ExecuteCurrentCommandsAndRestoreState(false);
      // Try allocating again. This may cause a fence wait.
-      if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
+      if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
        PanicAlert("Failed to allocate space in texture upload buffer");
    }
-    // Grab buffer pointers
+    // Copy to the streaming buffer.
-    VkBuffer image_upload_buffer = upload_buffer->GetBuffer();
+    upload_buffer = stream_buffer->GetBuffer();
-    VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset();
+    upload_buffer_offset = stream_buffer->GetCurrentOffset();
-    u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer();
+    std::memcpy(stream_buffer->GetCurrentHostPointer(), buffer, upload_size);
-
+    stream_buffer->CommitMemory(upload_size);
    // Copy to the buffer using the stride from the subresource layout
    const u8* source_ptr = buffer;
    if (upload_pitch != source_pitch)
    {
      VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch);
      for (unsigned int row = 0; row < height; row++)
      {
        memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch,
               copy_pitch);
      }
  }
  else
  {
-      // Can copy the whole thing in one block, the pitch matches
+    // Create a temporary staging buffer that is destroyed after the image is copied.
-      memcpy(image_upload_buffer_pointer, source_ptr, upload_size);
+    temp_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_UPLOAD, upload_size,
-    }
+                                        VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
-
+    if (!temp_buffer || !temp_buffer->Map())
    // Flush buffer memory if necessary
    upload_buffer->CommitMemory(upload_size);
    // Copy from the streaming buffer to the actual image.
    VkBufferImageCopy image_copy = {
        image_upload_buffer_offset,                // VkDeviceSize                bufferOffset
        0,                                         // uint32_t                    bufferRowLength
        0,                                         // uint32_t                    bufferImageHeight
        {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1},  // VkImageSubresourceLayers    imageSubresource
        {0, 0, 0},                                 // VkOffset3D                  imageOffset
        {width, height, 1}                         // VkExtent3D                  imageExtent
    };
    vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer,
                           m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
                           &image_copy);
  }
  else
  {
    // Slow path. The data for the image is too large to fit in the streaming buffer, so we need
    // to allocate a temporary texture to store the data in, then copy to the real texture.
    std::unique_ptr<StagingTexture2D> staging_texture = StagingTexture2D::Create(
        STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT);
    if (!staging_texture || !staging_texture->Map())
    {
      PanicAlert("Failed to allocate staging texture for large texture upload.");
      return;
    }
-    // Copy data to staging texture first, then to the "real" texture.
+    upload_buffer = temp_buffer->GetBuffer();
-    staging_texture->WriteTexels(0, 0, width, height, buffer, source_pitch);
+    upload_buffer_offset = 0;
-    staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
+    temp_buffer->Write(0, buffer, upload_size, true);
-                                 m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width,
+    temp_buffer->Unmap();
                                 height, level, 0);
  }
  // Copy from the streaming buffer to the actual image.
  VkBufferImageCopy image_copy = {
      upload_buffer_offset,                      // VkDeviceSize                bufferOffset
      row_length,                                // uint32_t                    bufferRowLength
      0,                                         // uint32_t                    bufferImageHeight
      {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1},  // VkImageSubresourceLayers    imageSubresource
      {0, 0, 0},                                 // VkOffset3D                  imageOffset
      {width, height, 1}                         // VkExtent3D                  imageExtent
  };
  vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer,
                         m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
                         &image_copy);
 }
 void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect,
@ -544,6 +527,11 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
 {
  _assert_(level < config.levels);
  // We can't dump compressed textures currently (it would mean drawing them to a RGBA8
  // framebuffer, and saving that). TextureCache does not call Save for custom textures
  // anyway, so this is fine for now.
  _assert_(config.format == HostTextureFormat::RGBA8);
  // Determine dimensions of image we want to save.
  u32 level_width = std::max(1u, config.width >> level);
  u32 level_height = std::max(1u, config.height >> level);
@ -582,7 +570,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
  // It's okay to throw this texture away immediately, since we're done with it, and
  // we blocked until the copy completed on the GPU anyway.
  bool result = TextureToPng(reinterpret_cast<u8*>(staging_texture->GetMapPointer()),
-                             staging_texture->GetRowStride(), filename, level_width, level_height);
+                             static_cast<u32>(staging_texture->GetRowStride()), filename,
                             level_width, level_height);
  staging_texture->Unmap();
  return result;
--- a/Source/Core/VideoBackends/Vulkan/Util.cpp
+++ b/Source/Core/VideoBackends/Vulkan/Util.cpp
@ -53,6 +53,20 @@ bool IsDepthFormat(VkFormat format)
  }
 }
 bool IsCompressedFormat(VkFormat format)
 {
  switch (format)
  {
  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
  case VK_FORMAT_BC2_UNORM_BLOCK:
  case VK_FORMAT_BC3_UNORM_BLOCK:
    return true;
  default:
    return false;
  }
 }
 VkFormat GetLinearFormat(VkFormat format)
 {
  switch (format)
@ -74,6 +88,25 @@ VkFormat GetLinearFormat(VkFormat format)
  }
 }
 VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format)
 {
  switch (format)
  {
  case HostTextureFormat::DXT1:
    return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
  case HostTextureFormat::DXT3:
    return VK_FORMAT_BC2_UNORM_BLOCK;
  case HostTextureFormat::DXT5:
    return VK_FORMAT_BC3_UNORM_BLOCK;
  case HostTextureFormat::RGBA8:
  default:
    return VK_FORMAT_R8G8B8A8_UNORM;
  }
 }
 u32 GetTexelSize(VkFormat format)
 {
  // Only contains pixel formats we use.
@ -91,12 +124,59 @@ u32 GetTexelSize(VkFormat format)
  case VK_FORMAT_B8G8R8A8_UNORM:
    return 4;
  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
    return 8;
  case VK_FORMAT_BC2_UNORM_BLOCK:
  case VK_FORMAT_BC3_UNORM_BLOCK:
    return 16;
  default:
    PanicAlert("Unhandled pixel format");
    return 1;
  }
 }
 u32 GetBlockSize(VkFormat format)
 {
  switch (format)
  {
  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
  case VK_FORMAT_BC2_UNORM_BLOCK:
  case VK_FORMAT_BC3_UNORM_BLOCK:
    return 4;
  default:
    return 1;
  }
 }
 size_t GetPitchForTexture(VkFormat format, u32 row_length)
 {
  switch (format)
  {
  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
    return static_cast<size_t>(std::max(1u, row_length / 4)) * 8;
  case VK_FORMAT_BC2_UNORM_BLOCK:
    return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
  case VK_FORMAT_BC3_UNORM_BLOCK:
    return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
  case VK_FORMAT_R8G8B8A8_UNORM:
  case VK_FORMAT_B8G8R8A8_UNORM:
  case VK_FORMAT_R32_SFLOAT:
  case VK_FORMAT_D32_SFLOAT:
    return static_cast<size_t>(row_length) * 4;
  default:
    PanicAlert("Unhandled pixel format");
    return row_length;
  }
 }
 VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height)
 {
  VkRect2D out;
--- a/Source/Core/VideoBackends/Vulkan/Util.h
+++ b/Source/Core/VideoBackends/Vulkan/Util.h
@ -25,8 +25,13 @@ size_t AlignBufferOffset(size_t offset, size_t alignment);
 u32 MakeRGBA8Color(float r, float g, float b, float a);
 bool IsDepthFormat(VkFormat format);
 bool IsCompressedFormat(VkFormat format);
 VkFormat GetLinearFormat(VkFormat format);
 VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format);
 u32 GetTexelSize(VkFormat format);
 u32 GetBlockSize(VkFormat format);
 size_t GetPitchForTexture(VkFormat format, u32 row_length);
 // Clamps a VkRect2D to the specified dimensions.
 VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height);
--- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
+++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
@ -284,6 +284,9 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
  config->backend_info.bSupportsDepthClamp =
      (features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE);
  // textureCompressionBC implies BC1 through BC7, which is a superset of DXT1/3/5, which we need.
  config->backend_info.bSupportsST3CTextures = features.textureCompressionBC == VK_TRUE;
  // Our usage of primitive restart appears to be broken on AMD's binary drivers.
  // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4.
  if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART))
@ -460,6 +463,7 @@ bool VulkanContext::SelectDeviceFeatures()
  m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise;
  m_device_features.shaderClipDistance = available_features.shaderClipDistance;
  m_device_features.depthClamp = available_features.depthClamp;
  m_device_features.textureCompressionBC = available_features.textureCompressionBC;
  return true;
 }