GPUDevice: Support compressed textures

2024-11-24 18:10:59 +10:00 · 2024-11-24 18:10:59 +10:00 · 7eb1d4e092
parent 24dfd30839
commit 7eb1d4e092
18 changed files with 841 additions and 196 deletions
--- a/src/util/d3d11_device.cpp
+++ b/src/util/d3d11_device.cpp
@ -200,6 +200,13 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
      (SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) &&
       data.ROVsSupported);
  }
+
+  m_features.dxt_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
+     (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
+      SupportsTextureFormat(GPUTexture::Format::BC3)));
+  m_features.bptc_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
 }

 D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle,
--- a/src/util/d3d11_texture.cpp
+++ b/src/util/d3d11_texture.cpp
@ -147,23 +147,24 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
  if (HasFlag(Flags::AllowMap))
  {
    void* map;
-    u32 map_stride;
-    if (!Map(&map, &map_stride, x, y, width, height, layer, level))
+    u32 map_pitch;
+    if (!Map(&map, &map_pitch, x, y, width, height, layer, level))
      return false;

-    StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height);
+    CopyTextureDataForUpload(width, height, m_format, map, map_pitch, data, pitch);
    Unmap();
    return true;
  }

-  const CD3D11_BOX box(static_cast<LONG>(x), static_cast<LONG>(y), 0, static_cast<LONG>(x + width),
-                       static_cast<LONG>(y + height), 1);
+  const u32 bs = GetBlockSize();
+  const D3D11_BOX box = {Common::AlignDownPow2(x, bs),       Common::AlignDownPow2(y, bs),        0U,
+                         Common::AlignUpPow2(x + width, bs), Common::AlignUpPow2(y + height, bs), 1U};
  const u32 srnum = D3D11CalcSubresource(level, layer, m_levels);

  ID3D11DeviceContext1* context = D3D11Device::GetD3DContext();
  CommitClear(context);

-  GPUDevice::GetStatistics().buffer_streamed += height * pitch;
+  GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, pitch);
  GPUDevice::GetStatistics().num_uploads++;

  context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0);
@ -194,10 +195,18 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
    return false;
  }

-  GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch;
+  GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, sr.RowPitch);
  GPUDevice::GetStatistics().num_uploads++;

-  *map = static_cast<u8*>(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize());
+  if (IsCompressedFormat(m_format))
+  {
+    *map = static_cast<u8*>(sr.pData) + ((y / GetBlockSize()) * sr.RowPitch) +
+           ((x / GetBlockSize()) * GetPixelSize());
+  }
+  else
+  {
+    *map = static_cast<u8*>(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize());
+  }
  *map_stride = sr.RowPitch;
  m_mapped_subresource = srnum;
  m_state = GPUTexture::State::Dirty;
@ -294,7 +303,7 @@ std::unique_ptr<D3D11Texture> D3D11Texture::Create(ID3D11Device* device, u32 wid

  if (initial_data)
  {
-    GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride;
+    GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, initial_data_stride);
    GPUDevice::GetStatistics().num_uploads++;
  }

--- a/src/util/d3d12_device.cpp
+++ b/src/util/d3d12_device.cpp
@ -1366,6 +1366,13 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
      SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
      options.ROVsSupported;
  }
+
+  m_features.dxt_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
+     (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
+      SupportsTextureFormat(GPUTexture::Format::BC3)));
+  m_features.bptc_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
 }

 void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
--- a/src/util/d3d12_texture.cpp
+++ b/src/util/d3d12_texture.cpp
@ -340,24 +340,24 @@ ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate()
  return dev.GetInitCommandList();
 }

-void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
-                                            u32 upload_pitch) const
-{
-  StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
-}
-
 ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
-                                                          u32 height) const
+                                                          u32 height, u32 buffer_size) const
 {
-  const u32 size = upload_pitch * height;
  ComPtr<ID3D12Resource> resource;
  ComPtr<D3D12MA::Allocation> allocation;

  const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD,
                                                    D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
-  const D3D12_RESOURCE_DESC resource_desc = {
-    D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
-    D3D12_RESOURCE_FLAG_NONE};
+  const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
+                                             0,
+                                             buffer_size,
+                                             1,
+                                             1,
+                                             1,
+                                             DXGI_FORMAT_UNKNOWN,
+                                             {1, 0},
+                                             D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+                                             D3D12_RESOURCE_FLAG_NONE};
  HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
    &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(),
    IID_PPV_ARGS(resource.GetAddressOf()));
@ -375,9 +375,9 @@ ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32
    return nullptr;
  }

-  CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch);
+  CopyTextureDataForUpload(width, height, m_format, map_ptr, upload_pitch, data, pitch);

-  const D3D12_RANGE write_range = {0, size};
+  const D3D12_RANGE write_range = {0, buffer_size};
  resource->Unmap(0, &write_range);

  // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
@ -395,8 +395,8 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
  D3D12Device& dev = D3D12Device::GetInstance();
  D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer();

-  const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-  const u32 required_size = height * upload_pitch;
+  const u32 upload_pitch = Common::AlignUpPow2<u32>(CalcUploadPitch(width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+  const u32 required_size = CalcUploadSize(height, upload_pitch);

  D3D12_TEXTURE_COPY_LOCATION srcloc;
  srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
@ -410,7 +410,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
  // Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
  if (required_size > (sbuffer.GetSize() / 2))
  {
-    srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height);
+    srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
    if (!srcloc.pResource)
      return false;

@ -431,7 +431,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,

    srcloc.pResource = sbuffer.GetBuffer();
    srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset();
-    CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch);
+    CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
    sbuffer.CommitMemory(required_size);
  }

@ -482,8 +482,8 @@ bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
    CommitClear(GetCommandBufferForUpdate());

  // see note in Update() for the reason why.
-  const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-  const u32 req_size = height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+  const u32 req_size = CalcUploadSize(m_height, aligned_pitch);
  D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer();
  if (req_size >= (buffer.GetSize() / 2))
    return false;
@ -512,8 +512,8 @@ void D3D12Texture::Unmap()
 {
  D3D12Device& dev = D3D12Device::GetInstance();
  D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer();
-  const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-  const u32 req_size = m_map_height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+  const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
  const u32 offset = sb.GetCurrentOffset();
  sb.CommitMemory(req_size);

--- a/src/util/d3d12_texture.h
+++ b/src/util/d3d12_texture.h
@ -80,8 +80,7 @@ private:

  ID3D12GraphicsCommandList4* GetCommandBufferForUpdate();
  ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
-                                              u32 height) const;
-  void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const;
+                                              u32 height, u32 buffer_size) const;
  void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist);

  ComPtr<ID3D12Resource> m_resource;
--- a/src/util/d3d_common.cpp
+++ b/src/util/d3d_common.cpp
@ -650,6 +650,10 @@ static constexpr std::array<D3DCommon::DXGIFormatMapping, static_cast<int>(GPUTe
  {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,      DXGI_FORMAT_R16G16B16A16_FLOAT,       DXGI_FORMAT_UNKNOWN               }, // RGBA16F
  {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT,      DXGI_FORMAT_R32G32B32A32_FLOAT,       DXGI_FORMAT_UNKNOWN               }, // RGBA32F
  {DXGI_FORMAT_R10G10B10A2_UNORM,  DXGI_FORMAT_R10G10B10A2_UNORM,       DXGI_FORMAT_R10G10B10A2_UNORM,        DXGI_FORMAT_UNKNOWN               }, // RGB10A2
+  {DXGI_FORMAT_BC1_UNORM,          DXGI_FORMAT_BC1_UNORM,               DXGI_FORMAT_UNKNOWN,                  DXGI_FORMAT_UNKNOWN               }, // BC1
+  {DXGI_FORMAT_BC2_UNORM,          DXGI_FORMAT_BC2_UNORM,               DXGI_FORMAT_UNKNOWN,                  DXGI_FORMAT_UNKNOWN               }, // BC2
+  {DXGI_FORMAT_BC3_UNORM,          DXGI_FORMAT_BC3_UNORM,               DXGI_FORMAT_UNKNOWN,                  DXGI_FORMAT_UNKNOWN               }, // BC3
+  {DXGI_FORMAT_BC7_UNORM,          DXGI_FORMAT_BC7_UNORM,               DXGI_FORMAT_UNKNOWN,                  DXGI_FORMAT_UNKNOWN               }, // BC7
    // clang-format on
  }};

--- a/src/util/gpu_device.cpp
+++ b/src/util/gpu_device.cpp
@ -1057,8 +1057,22 @@ std::unique_ptr<GPUTexture> GPUDevice::FetchAndUploadTextureImage(const Image& i
 {
  const Image* image_to_upload = &image;
  GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat());
+  bool gpu_format_supported;
+
+  // avoid device query for compressed formats that we've already pretested
+  if (gpu_format >= GPUTexture::Format::BC1 && gpu_format <= GPUTexture::Format::BC3)
+    gpu_format_supported = m_features.dxt_textures;
+  else if (gpu_format == GPUTexture::Format::BC7)
+    gpu_format_supported = m_features.bptc_textures;
+  else if (gpu_format == GPUTexture::Format::RGBA8) // always supported
+    gpu_format_supported = true;
+  else if (gpu_format != GPUTexture::Format::Unknown)
+    gpu_format_supported = SupportsTextureFormat(gpu_format);
+  else
+    gpu_format_supported = false;
+
  std::optional<Image> converted_image;
-  if (!SupportsTextureFormat(gpu_format))
+  if (!gpu_format_supported)
  {
    converted_image = image.ConvertToRGBA8(error);
    if (!converted_image.has_value())
--- a/src/util/gpu_device.h
+++ b/src/util/gpu_device.h
@ -515,6 +515,7 @@ public:
    FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
    FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
    FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
+    FEATURE_MASK_COMPRESSED_TEXTURES = (1 << 9),
  };

  enum class DrawBarrier : u32
@ -553,6 +554,8 @@ public:
    bool pipeline_cache : 1;
    bool prefer_unused_textures : 1;
    bool raster_order_views : 1;
+    bool dxt_textures : 1;
+    bool bptc_textures : 1;
  };

  struct Statistics
--- a/src/util/gpu_texture.cpp
+++ b/src/util/gpu_texture.cpp
@ -25,7 +25,7 @@ GPUTexture::~GPUTexture()

 const char* GPUTexture::GetFormatName(Format format)
 {
-  static constexpr const char* format_names[static_cast<u8>(Format::MaxCount)] = {
+  static constexpr const std::array<const char*, static_cast<size_t>(Format::MaxCount)> format_names = {{
    "Unknown", // Unknown
    "RGBA8",   // RGBA8
    "BGRA8",   // BGRA8
@ -51,43 +51,35 @@ const char* GPUTexture::GetFormatName(Format format)
    "RGBA16F", // RGBA16F
    "RGBA32F", // RGBA32F
    "RGB10A2", // RGB10A2
-  };
+    "BC1",     // BC1
+    "BC2",     // BC2
+    "BC3",     // BC3
+    "BC7",     // BC7
+  }};

  return format_names[static_cast<u8>(format)];
 }

-u32 GPUTexture::GetCompressedBytesPerBlock() const
+u32 GPUTexture::GetBlockSize() const
 {
-  return GetCompressedBytesPerBlock(m_format);
+  return GetBlockSize(m_format);
 }

-u32 GPUTexture::GetCompressedBytesPerBlock(Format format)
+u32 GPUTexture::GetBlockSize(Format format)
 {
-  // TODO: Implement me
-  return GetPixelSize(format);
-}
-
-u32 GPUTexture::GetCompressedBlockSize() const
-{
-  return GetCompressedBlockSize(m_format);
-}
-
-u32 GPUTexture::GetCompressedBlockSize(Format format)
-{
-  // TODO: Implement me
-  /*if (format >= Format::BC1 && format <= Format::BC7)
+  if (format >= Format::BC1 && format <= Format::BC7)
    return 4;
-  else*/
-  return 1;
+  else
+    return 1;
 }

 u32 GPUTexture::CalcUploadPitch(Format format, u32 width)
 {
-  /*
+  // convert to blocks
  if (format >= Format::BC1 && format <= Format::BC7)
    width = Common::AlignUpPow2(width, 4) / 4;
-  */
-  return width * GetCompressedBytesPerBlock(format);
+
+  return width * GetPixelSize(format);
 }

 u32 GPUTexture::CalcUploadPitch(u32 width) const
@ -102,9 +94,11 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const

 u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch)
 {
-  const u32 block_size = GetCompressedBlockSize(format);
-  const u32 bytes_per_block = GetCompressedBytesPerBlock(format);
-  return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size;
+  const u32 pixel_size = GetPixelSize(format);
+  if (IsCompressedFormat(format))
+    return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4;
+  else
+    return pitch / pixel_size;
 }

 u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
@ -114,36 +108,64 @@ u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const

 u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch)
 {
-  const u32 block_size = GetCompressedBlockSize(format);
+  const u32 block_size = GetBlockSize(format);
  return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
 }

+bool GPUTexture::IsCompressedFormat(Format format)
+{
+  return (format >= Format::BC1);
+}
+
+bool GPUTexture::IsCompressedFormat() const
+{
+  return IsCompressedFormat(m_format);
+}
+
 u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height)
 {
  const u32 max_dim = Common::PreviousPow2(std::max(width, height));
  return (std::countr_zero(max_dim) + 1);
 }

+void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch,
+                                          const void* src, u32 src_pitch)
+{
+  if (IsCompressedFormat(format))
+  {
+    const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4;
+    const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4;
+    const u32 block_size = GetPixelSize(format);
+    StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high);
+  }
+  else
+  {
+    StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, width * GetPixelSize(format), height);
+  }
+}
+
 GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format)
 {
-  static constexpr const std::array<Format, static_cast<size_t>(ImageFormat::MaxCount)> mapping = {{
-    Format::Unknown, // None
-    Format::RGBA8,   // RGBA8
-    Format::BGRA8,   // BGRA8
-    Format::RGB565,  // RGB565
-    Format::Unknown, // RGBA5551
-    Format::Unknown, // BC1
-    Format::Unknown, // BC2
-    Format::Unknown, // BC3
-    Format::Unknown, // BC7
-  }};
+  static constexpr const std::array mapping = {
+    Format::Unknown,  // None
+    Format::RGBA8,    // RGBA8
+    Format::BGRA8,    // BGRA8
+    Format::RGB565,   // RGB565
+    Format::RGBA5551, // RGBA5551
+    Format::Unknown,  // BGR8
+    Format::BC1,      // BC1
+    Format::BC2,      // BC2
+    Format::BC3,      // BC3
+    Format::BC7,      // BC7
+  };
+  static_assert(mapping.size() == static_cast<size_t>(ImageFormat::MaxCount));

  return mapping[static_cast<size_t>(format)];
 }

 ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
 {
-  static constexpr const std::array<ImageFormat, static_cast<size_t>(Format::MaxCount)> mapping = {{
+  static constexpr const std::array mapping = {
    ImageFormat::None,     // Unknown
    ImageFormat::RGBA8,    // RGBA8
    ImageFormat::BGRA8,    // BGRA8
@ -169,7 +191,12 @@ ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
    ImageFormat::None,     // RGBA16F
    ImageFormat::None,     // RGBA32F
    ImageFormat::None,     // RGB10A2
-  }};
+    ImageFormat::BC1,      // BC1
+    ImageFormat::BC2,      // BC2
+    ImageFormat::BC3,      // BC3
+    ImageFormat::BC7,      // BC7
+  };
+  static_assert(mapping.size() == static_cast<size_t>(Format::MaxCount));

  return mapping[static_cast<size_t>(format)];
 }
@ -226,6 +253,10 @@ u32 GPUTexture::GetPixelSize(GPUTexture::Format format)
    8,  // RGBA16F
    16, // RGBA32F
    4,  // RGB10A2
+    8,  // BC1 - 16 pixels in 64 bits
+    16, // BC2 - 16 pixels in 128 bits
+    16, // BC3 - 16 pixels in 128 bits
+    16, // BC4 - 16 pixels in 128 bits
  }};

  return sizes[static_cast<size_t>(format)];
@ -241,12 +272,6 @@ bool GPUTexture::IsDepthStencilFormat(Format format)
  return (format == Format::D24S8 || format == Format::D32FS8);
 }

-bool GPUTexture::IsCompressedFormat(Format format)
-{
-  // TODO: Implement me
-  return false;
-}
-
 bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
                                Flags flags, Error* error)
 {
@ -318,6 +343,12 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u
    return false;
  }

+  if (IsCompressedFormat(format) && (type != Type::Texture || ((flags & Flags::AllowBindAsImage) != Flags::None)))
+  {
+    Error::SetStringView(error, "Compressed formats are only supported for textures.");
+    return false;
+  }
+
  return true;
 }

--- a/src/util/gpu_texture.h
+++ b/src/util/gpu_texture.h
@ -61,7 +61,11 @@ public:
    RGBA16F,
    RGBA32F,
    RGB10A2,
-    MaxCount
+    BC1, ///< BC1, aka DXT1 compressed texture
+    BC2, ///< BC2, aka DXT2/3 compressed texture
+    BC3, ///< BC3, aka DXT4/5 compressed texture
+    BC7, ///< BC7, aka BPTC compressed texture
+    MaxCount,
  };

  enum class State : u8
@ -95,12 +99,13 @@ public:
  static bool IsDepthFormat(Format format);
  static bool IsDepthStencilFormat(Format format);
  static bool IsCompressedFormat(Format format);
-  static u32 GetCompressedBytesPerBlock(Format format);
-  static u32 GetCompressedBlockSize(Format format);
+  static u32 GetBlockSize(Format format);
  static u32 CalcUploadPitch(Format format, u32 width);
  static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch);
  static u32 CalcUploadSize(Format format, u32 height, u32 pitch);
  static u32 GetFullMipmapCount(u32 width, u32 height);
+  static void CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, const void* src,
+                                       u32 src_pitch);

  static Format GetTextureFormatForImageFormat(ImageFormat format);
  static ImageFormat GetImageFormatForTextureFormat(Format format);
@ -160,8 +165,8 @@ public:

  size_t GetVRAMUsage() const;

-  u32 GetCompressedBytesPerBlock() const;
-  u32 GetCompressedBlockSize() const;
+  bool IsCompressedFormat() const;
+  u32 GetBlockSize() const;
  u32 CalcUploadPitch(u32 width) const;
  u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
  u32 CalcUploadSize(u32 height, u32 pitch) const;
--- a/src/util/image.cpp
+++ b/src/util/image.cpp
@ -46,6 +46,10 @@ static bool WebPBufferSaver(const Image& image, DynamicHeapArray<u8>* data, u8 q
 static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
 static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error);

+static bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error);
+static bool DDSFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
+
+namespace {
 struct FormatHandler
 {
  const char* extension;
@ -54,12 +58,14 @@ struct FormatHandler
  bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*);
  bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*);
 };
+} // namespace

 static constexpr FormatHandler s_format_handlers[] = {
  {"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver},
  {"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
  {"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
  {"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver},
+  {"dds", DDSBufferLoader, nullptr, DDSFileLoader, nullptr},
 };

 static const FormatHandler* GetFormatHandler(std::string_view extension)
@ -155,17 +161,19 @@ Image& Image::operator=(Image&& move)

 const char* Image::GetFormatName(ImageFormat format)
 {
-  static constexpr std::array<const char*, static_cast<size_t>(ImageFormat::MaxCount)> names = {
+  static constexpr std::array names = {
    "None",    // None
    "RGBA8",   // RGBA8
    "BGRA8",   // BGRA8
    "RGB565",  // RGB565
    "RGB5551", // RGBA5551
+    "BGR8",    // BGR8
    "BC1",     // BC1
    "BC2",     // BC2
    "BC3",     // BC3
    "BC7",     // BC7
  };
+  static_assert(names.size() == static_cast<size_t>(ImageFormat::MaxCount));

  return names[static_cast<size_t>(format)];
 }
@ -178,6 +186,7 @@ u32 Image::GetPixelSize(ImageFormat format)
    4,  // BGRA8
    2,  // RGB565
    2,  // RGBA5551
+    3,  // BGR8
    8,  // BC1 - 16 pixels in 64 bits
    16, // BC2 - 16 pixels in 128 bits
    16, // BC3 - 16 pixels in 128 bits
@ -563,6 +572,27 @@ std::optional<Image> Image::ConvertToRGBA8(Error* error) const
        }
      }
    }
+    break;
+
+    case ImageFormat::BGR8:
+    {
+      ret = Image(m_width, m_height, ImageFormat::RGBA8);
+      for (u32 y = 0; y < m_height; y++)
+      {
+        const u8* pixels_in = GetRowPixels(y);
+        u8* pixels_out = ret->GetRowPixels(y);
+
+        for (u32 x = 0; x < m_width; x++)
+        {
+          // Set alpha channel to full intensity.
+          const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) |
+                            (ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u);
+          std::memcpy(pixels_out, &rgba, sizeof(rgba));
+          pixels_in += 3;
+          pixels_out += sizeof(rgba);
+        }
+      }
+    }
    break;

      // TODO: Block format decompression
@ -1220,3 +1250,415 @@ bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp,

  return true;
 }
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// DDS Handler
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// From https://raw.githubusercontent.com/Microsoft/DirectXTex/master/DirectXTex/DDS.h
+//
+// This header defines constants and structures that are useful when parsing
+// DDS files.  DDS files were originally designed to use several structures
+// and constants that are native to DirectDraw and are defined in ddraw.h,
+// such as DDSURFACEDESC2 and DDSCAPS2.  This file defines similar
+// (compatible) constants and structures so that one can use DDS files
+// without needing to include ddraw.h.
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//
+// http://go.microsoft.com/fwlink/?LinkId=248926
+
+#pragma pack(push, 1)
+
+static constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS "
+
+struct DDS_PIXELFORMAT
+{
+  uint32_t dwSize;
+  uint32_t dwFlags;
+  uint32_t dwFourCC;
+  uint32_t dwRGBBitCount;
+  uint32_t dwRBitMask;
+  uint32_t dwGBitMask;
+  uint32_t dwBBitMask;
+  uint32_t dwABitMask;
+};
+
+#define DDS_FOURCC 0x00000004     // DDPF_FOURCC
+#define DDS_RGB 0x00000040        // DDPF_RGB
+#define DDS_RGBA 0x00000041       // DDPF_RGB | DDPF_ALPHAPIXELS
+#define DDS_LUMINANCE 0x00020000  // DDPF_LUMINANCE
+#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS
+#define DDS_ALPHA 0x00000002      // DDPF_ALPHA
+#define DDS_PAL8 0x00000020       // DDPF_PALETTEINDEXED8
+#define DDS_PAL8A 0x00000021      // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS
+#define DDS_BUMPDUDV 0x00080000   // DDPF_BUMPDUDV
+
+#ifndef MAKEFOURCC
+#define MAKEFOURCC(ch0, ch1, ch2, ch3)                                                                                 \
+  ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) |                     \
+   ((uint32_t)(uint8_t)(ch3) << 24))
+#endif /* defined(MAKEFOURCC) */
+
+#define DDS_HEADER_FLAGS_TEXTURE 0x00001007    // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT
+#define DDS_HEADER_FLAGS_MIPMAP 0x00020000     // DDSD_MIPMAPCOUNT
+#define DDS_HEADER_FLAGS_VOLUME 0x00800000     // DDSD_DEPTH
+#define DDS_HEADER_FLAGS_PITCH 0x00000008      // DDSD_PITCH
+#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE
+#define DDS_MAX_TEXTURE_SIZE 32768
+
+// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION
+enum DDS_RESOURCE_DIMENSION
+{
+  DDS_DIMENSION_TEXTURE1D = 2,
+  DDS_DIMENSION_TEXTURE2D = 3,
+  DDS_DIMENSION_TEXTURE3D = 4,
+};
+
+struct DDS_HEADER
+{
+  uint32_t dwSize;
+  uint32_t dwFlags;
+  uint32_t dwHeight;
+  uint32_t dwWidth;
+  uint32_t dwPitchOrLinearSize;
+  uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags
+  uint32_t dwMipMapCount;
+  uint32_t dwReserved1[11];
+  DDS_PIXELFORMAT ddspf;
+  uint32_t dwCaps;
+  uint32_t dwCaps2;
+  uint32_t dwCaps3;
+  uint32_t dwCaps4;
+  uint32_t dwReserved2;
+};
+
+struct DDS_HEADER_DXT10
+{
+  uint32_t dxgiFormat;
+  uint32_t resourceDimension;
+  uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG
+  uint32_t arraySize;
+  uint32_t miscFlags2; // see DDS_MISC_FLAGS2
+};
+
+#pragma pack(pop)
+
+static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch");
+static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch");
+
+constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = {
+  sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000};
+constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = {
+  sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
+constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = {
+  sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000};
+constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = {
+  sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000};
+constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = {
+  sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
+
+// End of Microsoft code from DDS.h.
+
+static bool DDSPixelFormatMatches(const DDS_PIXELFORMAT& pf1, const DDS_PIXELFORMAT& pf2)
+{
+  return std::tie(pf1.dwSize, pf1.dwFlags, pf1.dwFourCC, pf1.dwRGBBitCount, pf1.dwRBitMask, pf1.dwGBitMask,
+                  pf1.dwGBitMask, pf1.dwBBitMask,
+                  pf1.dwABitMask) == std::tie(pf2.dwSize, pf2.dwFlags, pf2.dwFourCC, pf2.dwRGBBitCount, pf2.dwRBitMask,
+                                              pf2.dwGBitMask, pf2.dwGBitMask, pf2.dwBBitMask, pf2.dwABitMask);
+}
+
+struct DDSLoadInfo
+{
+  u32 block_size = 1;
+  u32 bytes_per_block = 4;
+  u32 width = 0;
+  u32 height = 0;
+  u32 mip_count = 0;
+  ImageFormat format = ImageFormat::RGBA8;
+  s64 base_image_offset = 0;
+  u32 base_image_size = 0;
+  u32 base_image_pitch = 0;
+  bool clear_alpha = false;
+};
+
+template<typename ReadFunction>
+static bool ParseDDSHeader(const ReadFunction& RF, DDSLoadInfo* info, Error* error)
+{
+  u32 magic;
+  if (!RF(&magic, sizeof(magic), error) || magic != DDS_MAGIC)
+  {
+    Error::AddPrefix(error, "Failed to read magic: ");
+    return false;
+  }
+
+  DDS_HEADER header;
+  u32 header_size = sizeof(header);
+  if (!RF(&header, header_size, error) || header.dwSize < header_size)
+  {
+    Error::AddPrefix(error, "Failed to read header: ");
+    return false;
+  }
+
+  // We should check for DDS_HEADER_FLAGS_TEXTURE here, but some tools don't seem
+  // to set it (e.g. compressonator). But we can still validate the size.
+  if (header.dwWidth == 0 || header.dwWidth >= DDS_MAX_TEXTURE_SIZE || header.dwHeight == 0 ||
+      header.dwHeight >= DDS_MAX_TEXTURE_SIZE)
+  {
+    Error::SetStringFmt(error, "Size is invalid: {}x{}", header.dwWidth, header.dwHeight);
+    return false;
+  }
+
+  // Image should be 2D.
+  if (header.dwFlags & DDS_HEADER_FLAGS_VOLUME)
+  {
+    Error::SetStringView(error, "Volume textures are not supported.");
+    return false;
+  }
+
+  // Presence of width/height fields is already tested by DDS_HEADER_FLAGS_TEXTURE.
+  info->width = header.dwWidth;
+  info->height = header.dwHeight;
+
+  // Check for mip levels.
+  if (header.dwFlags & DDS_HEADER_FLAGS_MIPMAP)
+  {
+    info->mip_count = header.dwMipMapCount;
+    if (header.dwMipMapCount != 0)
+    {
+      info->mip_count = header.dwMipMapCount;
+    }
+    else
+    {
+      const u32 max_dim = Common::PreviousPow2(std::max(header.dwWidth, header.dwHeight));
+      info->mip_count = (std::countr_zero(max_dim) + 1);
+    }
+  }
+  else
+  {
+    info->mip_count = 1;
+  }
+
+  // Handle fourcc formats vs uncompressed formats.
+  const bool has_fourcc = (header.ddspf.dwFlags & DDS_FOURCC) != 0;
+  if (has_fourcc)
+  {
+    // Handle DX10 extension header.
+    u32 dxt10_format = 0;
+    if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0'))
+    {
+      DDS_HEADER_DXT10 dxt10_header;
+      if (!RF(&dxt10_header, sizeof(dxt10_header), error))
+      {
+        Error::AddPrefix(error, "Failed to read DXT10 header: ");
+        return false;
+      }
+
+      // Can't handle array textures here. Doesn't make sense to use them, anyway.
+      if (dxt10_header.resourceDimension != DDS_DIMENSION_TEXTURE2D || dxt10_header.arraySize != 1)
+      {
+        Error::SetStringView(error, "Only 2D textures are supported.");
+        return false;
+      }
+
+      header_size += sizeof(dxt10_header);
+      dxt10_format = dxt10_header.dxgiFormat;
+    }
+
+    if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '1') || dxt10_format == 71)
+    {
+      info->format = ImageFormat::BC1;
+      info->block_size = 4;
+      info->bytes_per_block = 8;
+    }
+    else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '2') ||
+             header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '3') || dxt10_format == 74)
+    {
+      info->format = ImageFormat::BC2;
+      info->block_size = 4;
+      info->bytes_per_block = 16;
+    }
+    else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '4') ||
+             header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '5') || dxt10_format == 77)
+    {
+      info->format = ImageFormat::BC3;
+      info->block_size = 4;
+      info->bytes_per_block = 16;
+    }
+    else if (dxt10_format == 98)
+    {
+      info->format = ImageFormat::BC7;
+      info->block_size = 4;
+      info->bytes_per_block = 16;
+    }
+    else
+    {
+      Error::SetStringFmt(error, "Unknown format with FOURCC 0x{:08X} / DXT10 format {}", header.ddspf.dwFourCC,
+                          dxt10_format);
+      return false;
+    }
+  }
+  else
+  {
+    if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8R8G8B8))
+    {
+      info->format = ImageFormat::BGRA8;
+    }
+    else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8R8G8B8))
+    {
+      info->format = ImageFormat::BGRA8;
+      info->clear_alpha = true;
+    }
+    else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8B8G8R8))
+    {
+      info->format = ImageFormat::RGBA8;
+      info->clear_alpha = true;
+    }
+    else if (DDSPixelFormatMatches(header.ddspf, DDSPF_R8G8B8))
+    {
+      info->format = ImageFormat::BGR8;
+      info->clear_alpha = true;
+    }
+    else if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8B8G8R8))
+    {
+      info->format = ImageFormat::RGBA8;
+    }
+    else
+    {
+      Error::SetStringFmt(error, "Unhandled format with FOURCC 0x{:08X}", header.ddspf.dwFourCC);
+      return false;
+    }
+
+    // All these formats are RGBA, just with byte swapping.
+    info->block_size = 1;
+    info->bytes_per_block = header.ddspf.dwRGBBitCount / 8;
+  }
+
+  // Mip levels smaller than the block size are padded to multiples of the block size.
+  const u32 blocks_wide = Common::AlignUpPow2(info->width, info->block_size) / info->block_size;
+  const u32 blocks_high = Common::AlignUpPow2(info->height, info->block_size) / info->block_size;
+
+  // Pitch can be specified in the header, otherwise we can derive it from the dimensions. For
+  // compressed formats, both DDS_HEADER_FLAGS_LINEARSIZE and DDS_HEADER_FLAGS_PITCH should be
+  // set. See https://msdn.microsoft.com/en-us/library/windows/desktop/bb943982(v=vs.85).aspx
+  if (header.dwFlags & DDS_HEADER_FLAGS_PITCH && header.dwFlags & DDS_HEADER_FLAGS_LINEARSIZE)
+  {
+    // Convert pitch (in bytes) to texels/row length.
+    if (header.dwPitchOrLinearSize < info->bytes_per_block)
+    {
+      // Likely a corrupted or invalid file.
+      Error::SetStringFmt(error, "Invalid pitch: {}", header.dwPitchOrLinearSize);
+      return false;
+    }
+
+    info->base_image_pitch = header.dwPitchOrLinearSize;
+    info->base_image_size = info->base_image_pitch * blocks_high;
+  }
+  else
+  {
+    // Assume no padding between rows of blocks.
+    info->base_image_pitch = blocks_wide * info->bytes_per_block;
+    info->base_image_size = info->base_image_pitch * blocks_high;
+  }
+
+  info->base_image_offset = sizeof(magic) + header_size;
+
+#if 0
+  // D3D11 cannot handle block compressed textures where the first mip level is not a multiple of the block size.
+  if (mip_level == 0 && info.block_size > 1 && ((width % info.block_size) != 0 || (height % info.block_size) != 0))
+  {
+    Error::SetStringFmt(error,
+                        "Invalid dimensions for DDS texture. For compressed textures of this format, "
+                        "the width/height of the first mip level must be a multiple of {}.",
+                        info.block_size);
+    return false;
+  }
+#endif
+
+  return true;
+}
+
+bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* error)
+{
+  const auto header_reader = [fp](void* buffer, size_t size, Error* error) {
+    if (std::fread(buffer, size, 1, fp) == 1)
+      return true;
+
+    Error::SetErrno(error, "fread() failed: ", errno);
+    return false;
+  };
+
+  DDSLoadInfo info;
+  if (!ParseDDSHeader(header_reader, &info, error))
+    return false;
+
+  // always load the base image
+  if (!FileSystem::FSeek64(fp, info.base_image_offset, SEEK_SET, error))
+    return false;
+
+  image->Resize(info.width, info.height, info.format, false);
+  const u32 blocks = image->GetBlockYCount();
+  if (image->GetPitch() != info.base_image_pitch)
+  {
+    for (u32 y = 0; y < blocks; y++)
+    {
+      if (std::fread(image->GetRowPixels(y), info.base_image_pitch, 1, fp) != 1)
+      {
+        Error::SetErrno(error, "fread() failed: ", errno);
+        return false;
+      }
+    }
+  }
+  else
+  {
+    if (std::fread(image->GetPixels(), info.base_image_pitch * blocks, 1, fp) != 1)
+    {
+      Error::SetErrno(error, "fread() failed: ", errno);
+      return false;
+    }
+  }
+
+  if (info.clear_alpha)
+    image->SetAllPixelsOpaque();
+
+  return true;
+}
+
+bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error)
+{
+  size_t data_pos = 0;
+  const auto header_reader = [&data, &data_pos](void* buffer, size_t size, Error* error) {
+    if ((data_pos + size) > data.size())
+    {
+      Error::SetStringView(error, "Buffer does not contain sufficient data.");
+      return false;
+    }
+
+    std::memcpy(buffer, &data[data_pos], size);
+    data_pos += size;
+    return true;
+  };
+
+  DDSLoadInfo info;
+  if (!ParseDDSHeader(header_reader, &info, error))
+    return false;
+
+  if ((static_cast<u64>(info.base_image_offset) + info.base_image_size) > data.size())
+  {
+    Error::SetStringFmt(error, "Buffer does not contain complete base image.");
+    return false;
+  }
+
+  image->SetPixels(info.width, info.height, info.format, &data[static_cast<size_t>(info.base_image_offset)],
+                   info.base_image_pitch);
+
+  if (info.clear_alpha)
+    image->SetAllPixelsOpaque();
+
+  return true;
+}
--- a/src/util/image.h
+++ b/src/util/image.h
@ -21,6 +21,7 @@ enum class ImageFormat : u8
  BGRA8,
  RGB565,
  RGBA5551,
+  BGR8,
  BC1,
  BC2,
  BC3,
--- a/src/util/metal_device.mm
+++ b/src/util/metal_device.mm
@ -71,6 +71,11 @@ static constexpr std::array<MTLPixelFormat, static_cast<u32>(GPUTexture::Format:
  MTLPixelFormatRGBA16Float,           // RGBA16F
  MTLPixelFormatRGBA32Float,           // RGBA32F
  MTLPixelFormatBGR10A2Unorm,          // RGB10A2
+  MTLPixelFormatBC1_RGBA,              // BC1
+  MTLPixelFormatBC2_RGBA,              // BC2
+  MTLPixelFormatBC3_RGBA,              // BC3
+  MTLPixelFormatBC7_RGBAUnorm,         // BC7
+
 };

 static void LogNSError(NSError* error, std::string_view message)
@ -385,6 +390,10 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
  m_features.pipeline_cache = true;
  m_features.prefer_unused_textures = true;

+  // Same feature bit for both.
+  m_features.dxt_textures = m_features.bptc_textures =
+    !(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && m_device.supportsBCTextureCompression;
+
  // Disable pipeline cache on Intel, apparently it's buggy.
  if ([[m_device name] containsString:@"Intel"])
  {
@ -995,8 +1004,8 @@ MetalTexture::~MetalTexture()
 bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
                          u32 level /*= 0*/)
 {
-  const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 req_size = height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 req_size = CalcUploadSize(height, aligned_pitch);

  GPUDevice::GetStatistics().buffer_streamed += req_size;
  GPUDevice::GetStatistics().num_uploads++;
@ -1013,7 +1022,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
    actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options];
    actual_offset = 0;
    actual_pitch = pitch;
-    if (actual_buffer == nil)
+    if (actual_buffer == nil) [[unlikely]]
    {
      Panic("Failed to allocate temporary buffer.");
      return false;
@ -1026,7 +1035,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
    if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
    {
      dev.SubmitCommandBuffer();
-      if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
+      if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) [[unlikely]]
      {
        Panic("Failed to reserve texture upload space.");
        return false;
@ -1034,7 +1043,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
    }

    actual_offset = sb.GetCurrentOffset();
-    StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
+    CopyTextureDataForUpload(width, height, m_format, sb.GetCurrentHostPointer(), aligned_pitch, data, pitch);
    sb.CommitMemory(req_size);
    actual_buffer = sb.GetBuffer();
    actual_pitch = aligned_pitch;
@ -1065,8 +1074,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
  if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
    return false;

-  const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 req_size = height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 req_size = CalcUploadSize(height, aligned_pitch);

  MetalDevice& dev = MetalDevice::GetInstance();
  if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
@ -1097,8 +1106,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32

 void MetalTexture::Unmap()
 {
-  const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 req_size = m_map_height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);

  GPUDevice::GetStatistics().buffer_streamed += req_size;
  GPUDevice::GetStatistics().num_uploads++;
@ -1488,6 +1497,11 @@ bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const
    if (![m_device supportsFamily:MTLGPUFamilyApple2])
      return false;
  }
+  else if (format >= GPUTexture::Format::BC1 && format <= GPUTexture::Format::BC7)
+  {
+    if (!m_device.supportsBCTextureCompression)
+      return false;
+  }

  return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid);
 }
--- a/src/util/opengl_device.cpp
+++ b/src/util/opengl_device.cpp
@ -506,6 +506,12 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)

  m_features.shader_cache = false;

+  m_features.dxt_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && GLAD_GL_EXT_texture_compression_s3tc);
+  m_features.bptc_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
+     (GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc));
+
  m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary;
  if (m_features.pipeline_cache)
  {
--- a/src/util/opengl_texture.cpp
+++ b/src/util/opengl_texture.cpp
@ -33,61 +33,69 @@ const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(G
 {
  static constexpr std::array<std::tuple<GLenum, GLenum, GLenum>, static_cast<u32>(GPUTexture::Format::MaxCount)>
    mapping = {{
-      {},                                                       // Unknown
-      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                    // RGBA8
-      {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                    // BGRA8
-      {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},             // RGB565
-      {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},     // RGBA5551
-      {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                        // R8
-      {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT},     // D16
-      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8
-      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},    // D32F
-      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT},       // D32FS8
-      {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                      // R16
-      {GL_R16I, GL_RED_INTEGER, GL_SHORT},                      // R16I
-      {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},            // R16U
-      {GL_R16F, GL_RED, GL_HALF_FLOAT},                         // R16F
-      {GL_R32I, GL_RED_INTEGER, GL_INT},                        // R32I
-      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},              // R32U
-      {GL_R32F, GL_RED, GL_FLOAT},                              // R32F
-      {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                // RG8
-      {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT},                     // RG16
-      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                         // RG16F
-      {GL_RG32F, GL_RG, GL_FLOAT},                              // RG32F
-      {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE},                   // RGBA16
-      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                     // RGBA16F
-      {GL_RGBA32F, GL_RGBA, GL_FLOAT},                          // RGBA32F
-      {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},   // RGB10A2
+      {},                                                                                       // Unknown
+      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                                                    // RGBA8
+      {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                                                    // BGRA8
+      {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                                             // RGB565
+      {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},                                     // RGBA5551
+      {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                                        // R8
+      {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT},                                     // D16
+      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT},                                 // D24S8
+      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},                                    // D32F
+      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT},                                       // D32FS8
+      {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                                                      // R16
+      {GL_R16I, GL_RED_INTEGER, GL_SHORT},                                                      // R16I
+      {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                                            // R16U
+      {GL_R16F, GL_RED, GL_HALF_FLOAT},                                                         // R16F
+      {GL_R32I, GL_RED_INTEGER, GL_INT},                                                        // R32I
+      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                                              // R32U
+      {GL_R32F, GL_RED, GL_FLOAT},                                                              // R32F
+      {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                                                // RG8
+      {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT},                                                     // RG16
+      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                                         // RG16F
+      {GL_RG32F, GL_RG, GL_FLOAT},                                                              // RG32F
+      {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE},                                                   // RGBA16
+      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                                                     // RGBA16F
+      {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                                          // RGBA32F
+      {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},                                   // RGB10A2
+      {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE},   // BC1
+      {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE},   // BC2
+      {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE},   // BC3
+      {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
    }};

  // GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol.
  static constexpr std::array<std::tuple<GLenum, GLenum, GLenum>, static_cast<u32>(GPUTexture::Format::MaxCount)>
    mapping_gles = {{
-      {},                                                       // Unknown
-      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                    // RGBA8
-      {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                    // BGRA8
-      {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},             // RGB565
-      {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},     // RGBA5551
-      {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                        // R8
-      {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT},     // D16
-      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8
-      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},    // D32F
-      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT},       // D32FS8
-      {GL_R16F, GL_RED, GL_HALF_FLOAT},                         // R16
-      {GL_R16I, GL_RED_INTEGER, GL_SHORT},                      // R16I
-      {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},            // R16U
-      {GL_R16F, GL_RED, GL_HALF_FLOAT},                         // R16F
-      {GL_R32I, GL_RED_INTEGER, GL_INT},                        // R32I
-      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},              // R32U
-      {GL_R32F, GL_RED, GL_FLOAT},                              // R32F
-      {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                        // RG8
-      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                         // RG16
-      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                         // RG16F
-      {GL_RG32F, GL_RG, GL_FLOAT},                              // RG32F
-      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                     // RGBA16
-      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                     // RGBA16F
-      {GL_RGBA32F, GL_RGBA, GL_FLOAT},                          // RGBA32F
-      {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},   // RGB10A2
+      {},                                                                                       // Unknown
+      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                                                    // RGBA8
+      {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE},                                                    // BGRA8
+      {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                                             // RGB565
+      {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},                                     // RGBA5551
+      {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                                        // R8
+      {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT},                                     // D16
+      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT},                                 // D24S8
+      {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},                                    // D32F
+      {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT},                                       // D32FS8
+      {GL_R16F, GL_RED, GL_HALF_FLOAT},                                                         // R16
+      {GL_R16I, GL_RED_INTEGER, GL_SHORT},                                                      // R16I
+      {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                                            // R16U
+      {GL_R16F, GL_RED, GL_HALF_FLOAT},                                                         // R16F
+      {GL_R32I, GL_RED_INTEGER, GL_INT},                                                        // R32I
+      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                                              // R32U
+      {GL_R32F, GL_RED, GL_FLOAT},                                                              // R32F
+      {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                                        // RG8
+      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                                         // RG16
+      {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                                         // RG16F
+      {GL_RG32F, GL_RG, GL_FLOAT},                                                              // RG32F
+      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                                                     // RGBA16
+      {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                                                     // RGBA16F
+      {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                                          // RGBA32F
+      {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},                                   // RGB10A2
+      {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE},   // BC1
+      {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE},   // BC2
+      {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE},   // BC3
+      {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
    }};

  return gles ? mapping_gles[static_cast<u32>(format)] : mapping[static_cast<u32>(format)];
@ -169,6 +177,7 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
  else
  {
    const bool use_texture_storage = UseTextureStorage(false);
+    const bool is_compressed = IsCompressedFormat(format);
    if (use_texture_storage)
    {
      if (layers > 1)
@ -183,10 +192,10 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
      const u32 alignment = GetUploadAlignment(data_pitch);
      if (data)
      {
-        GPUDevice::GetStatistics().buffer_streamed += data_pitch * height;
+        GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, data_pitch);
        GPUDevice::GetStatistics().num_uploads++;

-        glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size);
+        glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(format, data_pitch));
        if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
          glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
      }
@ -198,18 +207,55 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
      {
        if (use_texture_storage)
        {
-          if (layers > 1)
-            glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr);
+          if (is_compressed)
+          {
+            const u32 size = CalcUploadSize(format, current_height, data_pitch);
+            if (layers > 1)
+            {
+              glCompressedTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, size,
+                                        data_ptr);
+            }
+            else
+            {
+              glCompressedTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, size, data_ptr);
+            }
+          }
          else
-            glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr);
+          {
+            if (layers > 1)
+              glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr);
+            else
+              glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr);
+          }
        }
        else
        {
-          if (layers > 1)
-            glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type,
-                         data_ptr);
+          if (is_compressed)
+          {
+            const u32 size = CalcUploadSize(format, current_height, data_pitch);
+            if (layers > 1)
+            {
+              glCompressedTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, size,
+                                     data_ptr);
+            }
+            else
+            {
+              glCompressedTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, size, data_ptr);
+            }
+          }
          else
-            glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr);
+          {
+            if (layers > 1)
+            {
+              glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type,
+                           data_ptr);
+            }
+            else
+            {
+              glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type,
+                           data_ptr);
+            }
+          }
        }

        if (data_ptr)
@ -257,14 +303,11 @@ void OpenGLTexture::CommitClear()
 bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
                           u32 level /*= 0*/)
 {
-  // TODO: perf counters
-
  // Worth using the PBO? Driver probably knows better...
  const GLenum target = GetGLTarget();
  const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
-  const u32 pixel_size = GetPixelSize();
-  const u32 preferred_pitch = Common::AlignUpPow2(static_cast<u32>(width) * pixel_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 map_size = preferred_pitch * static_cast<u32>(height);
+  const u32 preferred_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 map_size = CalcUploadSize(height, pitch);
  OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();

  CommitClear();
@ -283,8 +326,22 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
    if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
      glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);

-    glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size);
-    glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));
+    if (IsCompressedFormat())
+    {
+      const u32 size = CalcUploadSize(height, pitch);
+      if (IsTextureArray())
+        glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, data);
+      else
+        glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, data);
+    }
+    else
+    {
+      if (IsTextureArray())
+        glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, data);
+      else
+        glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, data);
+    }
    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);

    if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
@ -293,13 +350,39 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
  else
  {
    const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
-    StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * pixel_size, height);
+    CopyTextureDataForUpload(width, height, m_format, map.pointer, preferred_pitch, data, pitch);
    sb->Unmap(map_size);
    sb->Bind();

-    glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / pixel_size);
-    glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type,
-                    reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(preferred_pitch));
+    if (IsCompressedFormat())
+    {
+      const u32 size = CalcUploadSize(height, pitch);
+      if (IsTextureArray())
+      {
+        glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size,
+                                  reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
+      }
+      else
+      {
+        glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size,
+                                  reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
+      }
+    }
+    else
+    {
+      if (IsTextureArray())
+      {
+        glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type,
+                        reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
+      }
+      else
+      {
+        glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type,
+                        reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
+      }
+    }
+
    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);

    sb->Unbind();
@ -315,8 +398,8 @@ bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
  if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
    return false;

-  const u32 pitch = Common::AlignUpPow2(static_cast<u32>(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 upload_size = pitch * static_cast<u32>(height);
+  const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 upload_size = CalcUploadSize(height, pitch);
  OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();
  if (!sb || upload_size > sb->GetSize())
    return false;
@ -339,8 +422,8 @@ void OpenGLTexture::Unmap()
 {
  CommitClear();

-  const u32 pitch = Common::AlignUpPow2(static_cast<u32>(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
-  const u32 upload_size = pitch * static_cast<u32>(m_map_height);
+  const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
+  const u32 upload_size = CalcUploadSize(m_map_height, pitch);

  GPUDevice::GetStatistics().buffer_streamed += upload_size;
  GPUDevice::GetStatistics().num_uploads++;
@ -354,18 +437,35 @@ void OpenGLTexture::Unmap()
  const GLenum target = GetGLTarget();
  glBindTexture(target, m_id);

-  glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize());
+  glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));

  const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
-  if (IsTextureArray())
+  if (IsCompressedFormat())
  {
-    glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format,
-                    gl_type, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    const u32 size = CalcUploadSize(m_map_height, pitch);
+    if (IsTextureArray())
+    {
+      glCompressedTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1,
+                                gl_format, size, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    }
+    else
+    {
+      glCompressedTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, size,
+                                reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    }
  }
  else
  {
-    glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type,
-                    reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    if (IsTextureArray())
+    {
+      glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format,
+                      gl_type, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    }
+    else
+    {
+      glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type,
+                      reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
+    }
  }

  glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
--- a/src/util/vulkan_device.cpp
+++ b/src/util/vulkan_device.cpp
@ -96,6 +96,10 @@ const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> Vulka
  VK_FORMAT_R16G16B16A16_SFLOAT,      // RGBA16F
  VK_FORMAT_R32G32B32A32_SFLOAT,      // RGBA32F
  VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2
+  VK_FORMAT_BC1_RGBA_UNORM_BLOCK,     // BC1
+  VK_FORMAT_BC2_UNORM_BLOCK,          // BC2
+  VK_FORMAT_BC3_UNORM_BLOCK,          // BC3
+  VK_FORMAT_BC7_UNORM_BLOCK,          // BC7
 };

 // Handles are always 64-bit, even on 32-bit platforms.
@ -640,6 +644,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
  enabled_features.sampleRateShading = available_features.sampleRateShading;
  enabled_features.geometryShader = available_features.geometryShader;
  enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
+  enabled_features.textureCompressionBC = available_features.textureCompressionBC;
  device_info.pEnabledFeatures = &enabled_features;

  VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
@ -2456,6 +2461,10 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
  m_features.raster_order_views =
    (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics &&
     m_optional_extensions.vk_ext_fragment_shader_interlock);
+
+  // Same feature bit for both.
+  m_features.dxt_textures = m_features.bptc_textures =
+    (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && vk_features.textureCompressionBC);
 }

 void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
--- a/src/util/vulkan_texture.cpp
+++ b/src/util/vulkan_texture.cpp
@ -230,20 +230,13 @@ VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate()
  return dev.GetCurrentInitCommandBuffer();
 }

-void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
-                                             u32 upload_pitch) const
-{
-  StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
-}
-
 VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
-                                                    u32 height) const
+                                                    u32 height, u32 buffer_size) const
 {
-  const u32 size = upload_pitch * height;
  const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
                                  nullptr,
                                  0,
-                                  static_cast<VkDeviceSize>(size),
+                                  static_cast<VkDeviceSize>(buffer_size),
                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
                                  VK_SHARING_MODE_EXCLUSIVE,
                                  0,
@ -270,8 +263,8 @@ VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch,
  VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation);

  // And write the data.
-  CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch);
-  vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size);
+  CopyTextureDataForUpload(width, height, m_format, ai.pMappedData, upload_pitch, data, pitch);
+  vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, buffer_size);
  return buffer;
 }

@ -282,7 +275,7 @@ void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 w
  if (old_layout != Layout::TransferDst)
    TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst);

-  const u32 row_length = pitch / GetPixelSize();
+  const u32 row_length = CalcUploadRowLengthFromPitch(pitch);

  const VkBufferImageCopy bic = {static_cast<VkDeviceSize>(buffer_offset),
                                 row_length,
@ -302,8 +295,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
  DebugAssert(layer < m_layers && level < m_levels);
  DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level));

-  const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment());
-  const u32 required_size = height * upload_pitch;
+  const u32 upload_pitch =
+    Common::AlignUpPow2(CalcUploadPitch(width), VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment());
+  const u32 required_size = CalcUploadSize(height, upload_pitch);
  VulkanDevice& dev = VulkanDevice::GetInstance();
  VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer();

@ -314,7 +308,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
  if (required_size > (sbuffer.GetCurrentSize() / 2))
  {
    buffer_offset = 0;
-    buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height);
+    buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
    if (buffer == VK_NULL_HANDLE)
      return false;
  }
@ -332,7 +326,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data

    buffer = sbuffer.GetBuffer();
    buffer_offset = sbuffer.GetCurrentOffset();
-    CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch);
+    CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
    sbuffer.CommitMemory(required_size);
  }

@ -372,8 +366,8 @@ bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
    CommitClear(GetCommandBufferForUpdate());

  // see note in Update() for the reason why.
-  const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment());
-  const u32 req_size = height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), dev.GetBufferCopyRowPitchAlignment());
+  const u32 req_size = CalcUploadSize(height, aligned_pitch);
  VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer();
  if (req_size >= (buffer.GetCurrentSize() / 2))
    return false;
@ -402,8 +396,8 @@ void VulkanTexture::Unmap()
 {
  VulkanDevice& dev = VulkanDevice::GetInstance();
  VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer();
-  const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment());
-  const u32 req_size = m_map_height * aligned_pitch;
+  const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), dev.GetBufferCopyRowPitchAlignment());
+  const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
  const u32 offset = sb.GetCurrentOffset();
  sb.CommitMemory(req_size);

--- a/src/util/vulkan_texture.h
+++ b/src/util/vulkan_texture.h
@ -85,8 +85,8 @@ private:
                VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format);

  VkCommandBuffer GetCommandBufferForUpdate();
-  void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const;
-  VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const;
+  VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height,
+                                       u32 buffer_size) const;
  void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch,
                        VkBuffer buffer, u32 buffer_offset);