diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h
index 7b6b6d3583..9d1ab3fecd 100644
--- a/Source/Core/VideoBackends/Vulkan/Constants.h
+++ b/Source/Core/VideoBackends/Vulkan/Constants.h
@@ -109,7 +109,7 @@ constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
 // streaming buffer and be blocking frequently. Games are unlikely to have textures this
 // large anyway, so it's only really an issue for HD texture packs, and memory is not
 // a limiting factor in these scenarios anyway.
-constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4;
+constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8;
 
 // Streaming uniform buffer size
 constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
index 6bd8170fc9..6924f41eca 100644
--- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
+++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp
@@ -127,7 +127,7 @@ void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size)
 void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches)
 {
   _assert_((offset + size) <= m_size);
-  _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
+  _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
   if (invalidate_caches)
     InvalidateCPUCache(offset, size);
 
@@ -138,7 +138,7 @@ void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size,
                           bool invalidate_caches)
 {
   _assert_((offset + size) <= m_size);
-  _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset)));
+  _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset)));
 
   memcpy(m_map_pointer + (offset - m_map_offset), data, size);
   if (invalidate_caches)
diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
index 62c0228010..fad7e789cd 100644
--- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
+++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp
@@ -9,6 +9,7 @@
 #include <string>
 #include <vector>
 
+#include "Common/Align.h"
 #include "Common/Assert.h"
 #include "Common/CommonFuncs.h"
 #include "Common/Logging/Log.h"
@@ -238,9 +239,10 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry
     usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 
   // Allocate texture object
+  VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(config.format);
   std::unique_ptr<Texture2D> texture = Texture2D::Create(
-      config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT,
-      VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
+      config.width, config.height, config.levels, config.layers, vk_format, VK_SAMPLE_COUNT_1_BIT,
+      VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage);
 
   if (!texture)
     return nullptr;
@@ -366,87 +368,68 @@ void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_l
   m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
 
-  // Does this texture data fit within the streaming buffer?
-  u32 upload_width = width;
-  u32 upload_pitch = upload_width * sizeof(u32);
-  u32 upload_size = upload_pitch * height;
+  // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows
+  // that lie outside of the texture's dimensions.
   u32 upload_alignment = static_cast<u32>(g_vulkan_context->GetBufferImageGranularity());
-  u32 source_pitch = row_length * 4;
-  if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
-      (upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
-  {
-    // Assume tightly packed rows, with no padding as the buffer source.
-    StreamBuffer* upload_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
+  u32 block_size = Util::GetBlockSize(m_texture->GetFormat());
+  u32 num_rows = Common::AlignUp(height, block_size) / block_size;
+  size_t source_pitch = Util::GetPitchForTexture(m_texture->GetFormat(), row_length);
+  size_t upload_size = source_pitch * num_rows;
+  std::unique_ptr<StagingBuffer> temp_buffer;
+  VkBuffer upload_buffer;
+  VkDeviceSize upload_buffer_offset;
 
-    // Allocate memory from the streaming buffer for the texture data.
-    if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
+  // Does this texture data fit within the streaming buffer?
+  if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD &&
+      upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE)
+  {
+    StreamBuffer* stream_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get();
+    if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
     {
       // Execute the command buffer first.
       WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer");
       Util::ExecuteCurrentCommandsAndRestoreState(false);
 
       // Try allocating again. This may cause a fence wait.
-      if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity()))
+      if (!stream_buffer->ReserveMemory(upload_size, upload_alignment))
         PanicAlert("Failed to allocate space in texture upload buffer");
     }
 
-    // Grab buffer pointers
-    VkBuffer image_upload_buffer = upload_buffer->GetBuffer();
-    VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset();
-    u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer();
-
-    // Copy to the buffer using the stride from the subresource layout
-    const u8* source_ptr = buffer;
-    if (upload_pitch != source_pitch)
-    {
-      VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch);
-      for (unsigned int row = 0; row < height; row++)
-      {
-        memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch,
-               copy_pitch);
-      }
-    }
-    else
-    {
-      // Can copy the whole thing in one block, the pitch matches
-      memcpy(image_upload_buffer_pointer, source_ptr, upload_size);
-    }
-
-    // Flush buffer memory if necessary
-    upload_buffer->CommitMemory(upload_size);
-
-    // Copy from the streaming buffer to the actual image.
-    VkBufferImageCopy image_copy = {
-        image_upload_buffer_offset,                // VkDeviceSize                bufferOffset
-        0,                                         // uint32_t                    bufferRowLength
-        0,                                         // uint32_t                    bufferImageHeight
-        {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1},  // VkImageSubresourceLayers    imageSubresource
-        {0, 0, 0},                                 // VkOffset3D                  imageOffset
-        {width, height, 1}                         // VkExtent3D                  imageExtent
-    };
-    vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer,
-                           m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
-                           &image_copy);
+    // Copy to the streaming buffer.
+    upload_buffer = stream_buffer->GetBuffer();
+    upload_buffer_offset = stream_buffer->GetCurrentOffset();
+    std::memcpy(stream_buffer->GetCurrentHostPointer(), buffer, upload_size);
+    stream_buffer->CommitMemory(upload_size);
   }
   else
   {
-    // Slow path. The data for the image is too large to fit in the streaming buffer, so we need
-    // to allocate a temporary texture to store the data in, then copy to the real texture.
-    std::unique_ptr<StagingTexture2D> staging_texture = StagingTexture2D::Create(
-        STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT);
-
-    if (!staging_texture || !staging_texture->Map())
+    // Create a temporary staging buffer that is destroyed after the image is copied.
+    temp_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_UPLOAD, upload_size,
+                                        VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
+    if (!temp_buffer || !temp_buffer->Map())
     {
       PanicAlert("Failed to allocate staging texture for large texture upload.");
       return;
     }
 
-    // Copy data to staging texture first, then to the "real" texture.
-    staging_texture->WriteTexels(0, 0, width, height, buffer, source_pitch);
-    staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
-                                 m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width,
-                                 height, level, 0);
+    upload_buffer = temp_buffer->GetBuffer();
+    upload_buffer_offset = 0;
+    temp_buffer->Write(0, buffer, upload_size, true);
+    temp_buffer->Unmap();
   }
+
+  // Copy from the streaming buffer to the actual image.
+  VkBufferImageCopy image_copy = {
+      upload_buffer_offset,                      // VkDeviceSize                bufferOffset
+      row_length,                                // uint32_t                    bufferRowLength
+      0,                                         // uint32_t                    bufferImageHeight
+      {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1},  // VkImageSubresourceLayers    imageSubresource
+      {0, 0, 0},                                 // VkOffset3D                  imageOffset
+      {width, height, 1}                         // VkExtent3D                  imageExtent
+  };
+  vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer,
+                         m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                         &image_copy);
 }
 
 void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect,
@@ -544,6 +527,11 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
 {
   _assert_(level < config.levels);
 
+  // We can't dump compressed textures currently (it would mean drawing them to a RGBA8
+  // framebuffer, and saving that). TextureCache does not call Save for custom textures
+  // anyway, so this is fine for now.
+  _assert_(config.format == HostTextureFormat::RGBA8);
+
   // Determine dimensions of image we want to save.
   u32 level_width = std::max(1u, config.width >> level);
   u32 level_height = std::max(1u, config.height >> level);
@@ -582,7 +570,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l
   // It's okay to throw this texture away immediately, since we're done with it, and
   // we blocked until the copy completed on the GPU anyway.
   bool result = TextureToPng(reinterpret_cast<u8*>(staging_texture->GetMapPointer()),
-                             staging_texture->GetRowStride(), filename, level_width, level_height);
+                             static_cast<u32>(staging_texture->GetRowStride()), filename,
+                             level_width, level_height);
 
   staging_texture->Unmap();
   return result;
diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp
index 5de96e23cb..a060cf3485 100644
--- a/Source/Core/VideoBackends/Vulkan/Util.cpp
+++ b/Source/Core/VideoBackends/Vulkan/Util.cpp
@@ -53,6 +53,20 @@ bool IsDepthFormat(VkFormat format)
   }
 }
 
+bool IsCompressedFormat(VkFormat format)
+{
+  switch (format)
+  {
+  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+  case VK_FORMAT_BC2_UNORM_BLOCK:
+  case VK_FORMAT_BC3_UNORM_BLOCK:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
 VkFormat GetLinearFormat(VkFormat format)
 {
   switch (format)
@@ -74,6 +88,25 @@ VkFormat GetLinearFormat(VkFormat format)
   }
 }
 
+VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format)
+{
+  switch (format)
+  {
+  case HostTextureFormat::DXT1:
+    return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
+
+  case HostTextureFormat::DXT3:
+    return VK_FORMAT_BC2_UNORM_BLOCK;
+
+  case HostTextureFormat::DXT5:
+    return VK_FORMAT_BC3_UNORM_BLOCK;
+
+  case HostTextureFormat::RGBA8:
+  default:
+    return VK_FORMAT_R8G8B8A8_UNORM;
+  }
+}
+
 u32 GetTexelSize(VkFormat format)
 {
   // Only contains pixel formats we use.
@@ -91,12 +124,59 @@ u32 GetTexelSize(VkFormat format)
   case VK_FORMAT_B8G8R8A8_UNORM:
     return 4;
 
+  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+    return 8;
+
+  case VK_FORMAT_BC2_UNORM_BLOCK:
+  case VK_FORMAT_BC3_UNORM_BLOCK:
+    return 16;
+
   default:
     PanicAlert("Unhandled pixel format");
     return 1;
   }
 }
 
+u32 GetBlockSize(VkFormat format)
+{
+  switch (format)
+  {
+  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+  case VK_FORMAT_BC2_UNORM_BLOCK:
+  case VK_FORMAT_BC3_UNORM_BLOCK:
+    return 4;
+
+  default:
+    return 1;
+  }
+}
+
+
+size_t GetPitchForTexture(VkFormat format, u32 row_length)
+{
+  switch (format)
+  {
+  case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+    return static_cast<size_t>(std::max(1u, row_length / 4)) * 8;
+
+  case VK_FORMAT_BC2_UNORM_BLOCK:
+    return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
+
+  case VK_FORMAT_BC3_UNORM_BLOCK:
+    return static_cast<size_t>(std::max(1u, row_length / 4)) * 16;
+
+  case VK_FORMAT_R8G8B8A8_UNORM:
+  case VK_FORMAT_B8G8R8A8_UNORM:
+  case VK_FORMAT_R32_SFLOAT:
+  case VK_FORMAT_D32_SFLOAT:
+    return static_cast<size_t>(row_length) * 4;
+
+  default:
+    PanicAlert("Unhandled pixel format");
+    return row_length;
+  }
+}
+
 VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height)
 {
   VkRect2D out;
diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h
index d61c699bfe..f4596b89f7 100644
--- a/Source/Core/VideoBackends/Vulkan/Util.h
+++ b/Source/Core/VideoBackends/Vulkan/Util.h
@@ -25,8 +25,13 @@ size_t AlignBufferOffset(size_t offset, size_t alignment);
 u32 MakeRGBA8Color(float r, float g, float b, float a);
 
 bool IsDepthFormat(VkFormat format);
+bool IsCompressedFormat(VkFormat format);
 VkFormat GetLinearFormat(VkFormat format);
+VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format);
 u32 GetTexelSize(VkFormat format);
+u32 GetBlockSize(VkFormat format);
+
+size_t GetPitchForTexture(VkFormat format, u32 row_length);
 
 // Clamps a VkRect2D to the specified dimensions.
 VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height);
diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
index e51dad8091..8978cd2df6 100644
--- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
+++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
@@ -284,6 +284,9 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD
   config->backend_info.bSupportsDepthClamp =
       (features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE);
 
+  // textureCompressionBC implies BC1 through BC7, which is a superset of DXT1/3/5, which we need.
+  config->backend_info.bSupportsST3CTextures = features.textureCompressionBC == VK_TRUE;
+
   // Our usage of primitive restart appears to be broken on AMD's binary drivers.
   // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4.
   if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART))
@@ -460,6 +463,7 @@ bool VulkanContext::SelectDeviceFeatures()
   m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise;
   m_device_features.shaderClipDistance = available_features.shaderClipDistance;
   m_device_features.depthClamp = available_features.depthClamp;
+  m_device_features.textureCompressionBC = available_features.textureCompressionBC;
   return true;
 }