GPUDevice: Support compressed textures

This commit is contained in:
Stenzek 2024-11-24 18:10:59 +10:00
parent 24dfd30839
commit 7eb1d4e092
No known key found for this signature in database
18 changed files with 841 additions and 196 deletions

View File

@ -200,6 +200,13 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
(SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) && (SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) &&
data.ROVsSupported); data.ROVsSupported);
} }
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
SupportsTextureFormat(GPUTexture::Format::BC3)));
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
} }
D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle,

View File

@ -147,23 +147,24 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
if (HasFlag(Flags::AllowMap)) if (HasFlag(Flags::AllowMap))
{ {
void* map; void* map;
u32 map_stride; u32 map_pitch;
if (!Map(&map, &map_stride, x, y, width, height, layer, level)) if (!Map(&map, &map_pitch, x, y, width, height, layer, level))
return false; return false;
StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height); CopyTextureDataForUpload(width, height, m_format, map, map_pitch, data, pitch);
Unmap(); Unmap();
return true; return true;
} }
const CD3D11_BOX box(static_cast<LONG>(x), static_cast<LONG>(y), 0, static_cast<LONG>(x + width), const u32 bs = GetBlockSize();
static_cast<LONG>(y + height), 1); const D3D11_BOX box = {Common::AlignDownPow2(x, bs), Common::AlignDownPow2(y, bs), 0U,
Common::AlignUpPow2(x + width, bs), Common::AlignUpPow2(y + height, bs), 1U};
const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); const u32 srnum = D3D11CalcSubresource(level, layer, m_levels);
ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); ID3D11DeviceContext1* context = D3D11Device::GetD3DContext();
CommitClear(context); CommitClear(context);
GPUDevice::GetStatistics().buffer_streamed += height * pitch; GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, pitch);
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0);
@ -194,10 +195,18 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
return false; return false;
} }
GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch; GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, sr.RowPitch);
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
if (IsCompressedFormat(m_format))
{
*map = static_cast<u8*>(sr.pData) + ((y / GetBlockSize()) * sr.RowPitch) +
((x / GetBlockSize()) * GetPixelSize());
}
else
{
*map = static_cast<u8*>(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); *map = static_cast<u8*>(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize());
}
*map_stride = sr.RowPitch; *map_stride = sr.RowPitch;
m_mapped_subresource = srnum; m_mapped_subresource = srnum;
m_state = GPUTexture::State::Dirty; m_state = GPUTexture::State::Dirty;
@ -294,7 +303,7 @@ std::unique_ptr<D3D11Texture> D3D11Texture::Create(ID3D11Device* device, u32 wid
if (initial_data) if (initial_data)
{ {
GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride; GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, initial_data_stride);
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
} }

View File

@ -1366,6 +1366,13 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
options.ROVsSupported; options.ROVsSupported;
} }
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
SupportsTextureFormat(GPUTexture::Format::BC3)));
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
} }
void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,

View File

@ -340,23 +340,23 @@ ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate()
return dev.GetInitCommandList(); return dev.GetInitCommandList();
} }
void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
u32 upload_pitch) const
{
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
}
ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const u32 height, u32 buffer_size) const
{ {
const u32 size = upload_pitch * height;
ComPtr<ID3D12Resource> resource; ComPtr<ID3D12Resource> resource;
ComPtr<D3D12MA::Allocation> allocation; ComPtr<D3D12MA::Allocation> allocation;
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD, const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD,
D3D12_HEAP_FLAG_NONE, nullptr, nullptr}; D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
const D3D12_RESOURCE_DESC resource_desc = { const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, 0,
buffer_size,
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE}; D3D12_RESOURCE_FLAG_NONE};
HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(), &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(),
@ -375,9 +375,9 @@ ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32
return nullptr; return nullptr;
} }
CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch); CopyTextureDataForUpload(width, height, m_format, map_ptr, upload_pitch, data, pitch);
const D3D12_RANGE write_range = {0, size}; const D3D12_RANGE write_range = {0, buffer_size};
resource->Unmap(0, &write_range); resource->Unmap(0, &write_range);
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
@ -395,8 +395,8 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
D3D12Device& dev = D3D12Device::GetInstance(); D3D12Device& dev = D3D12Device::GetInstance();
D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer();
const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); const u32 upload_pitch = Common::AlignUpPow2<u32>(CalcUploadPitch(width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 required_size = height * upload_pitch; const u32 required_size = CalcUploadSize(height, upload_pitch);
D3D12_TEXTURE_COPY_LOCATION srcloc; D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
@ -410,7 +410,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions. // Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
if (required_size > (sbuffer.GetSize() / 2)) if (required_size > (sbuffer.GetSize() / 2))
{ {
srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
if (!srcloc.pResource) if (!srcloc.pResource)
return false; return false;
@ -431,7 +431,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
srcloc.pResource = sbuffer.GetBuffer(); srcloc.pResource = sbuffer.GetBuffer();
srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset(); srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset();
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
sbuffer.CommitMemory(required_size); sbuffer.CommitMemory(required_size);
} }
@ -482,8 +482,8 @@ bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
CommitClear(GetCommandBufferForUpdate()); CommitClear(GetCommandBufferForUpdate());
// see note in Update() for the reason why. // see note in Update() for the reason why.
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch; const u32 req_size = CalcUploadSize(m_height, aligned_pitch);
D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer(); D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer();
if (req_size >= (buffer.GetSize() / 2)) if (req_size >= (buffer.GetSize() / 2))
return false; return false;
@ -512,8 +512,8 @@ void D3D12Texture::Unmap()
{ {
D3D12Device& dev = D3D12Device::GetInstance(); D3D12Device& dev = D3D12Device::GetInstance();
D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer(); D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer();
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = m_map_height * aligned_pitch; const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
const u32 offset = sb.GetCurrentOffset(); const u32 offset = sb.GetCurrentOffset();
sb.CommitMemory(req_size); sb.CommitMemory(req_size);

View File

@ -80,8 +80,7 @@ private:
ID3D12GraphicsCommandList4* GetCommandBufferForUpdate(); ID3D12GraphicsCommandList4* GetCommandBufferForUpdate();
ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const; u32 height, u32 buffer_size) const;
void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const;
void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist); void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist);
ComPtr<ID3D12Resource> m_resource; ComPtr<ID3D12Resource> m_resource;

View File

@ -650,6 +650,10 @@ static constexpr std::array<D3DCommon::DXGIFormatMapping, static_cast<int>(GPUTe
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2 {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC1
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC2
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC3
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC7
// clang-format on // clang-format on
}}; }};

View File

@ -1057,8 +1057,22 @@ std::unique_ptr<GPUTexture> GPUDevice::FetchAndUploadTextureImage(const Image& i
{ {
const Image* image_to_upload = &image; const Image* image_to_upload = &image;
GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat()); GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat());
bool gpu_format_supported;
// avoid device query for compressed formats that we've already pretested
if (gpu_format >= GPUTexture::Format::BC1 && gpu_format <= GPUTexture::Format::BC3)
gpu_format_supported = m_features.dxt_textures;
else if (gpu_format == GPUTexture::Format::BC7)
gpu_format_supported = m_features.bptc_textures;
else if (gpu_format == GPUTexture::Format::RGBA8) // always supported
gpu_format_supported = true;
else if (gpu_format != GPUTexture::Format::Unknown)
gpu_format_supported = SupportsTextureFormat(gpu_format);
else
gpu_format_supported = false;
std::optional<Image> converted_image; std::optional<Image> converted_image;
if (!SupportsTextureFormat(gpu_format)) if (!gpu_format_supported)
{ {
converted_image = image.ConvertToRGBA8(error); converted_image = image.ConvertToRGBA8(error);
if (!converted_image.has_value()) if (!converted_image.has_value())

View File

@ -515,6 +515,7 @@ public:
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6), FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
FEATURE_MASK_MEMORY_IMPORT = (1 << 7), FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8), FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
FEATURE_MASK_COMPRESSED_TEXTURES = (1 << 9),
}; };
enum class DrawBarrier : u32 enum class DrawBarrier : u32
@ -553,6 +554,8 @@ public:
bool pipeline_cache : 1; bool pipeline_cache : 1;
bool prefer_unused_textures : 1; bool prefer_unused_textures : 1;
bool raster_order_views : 1; bool raster_order_views : 1;
bool dxt_textures : 1;
bool bptc_textures : 1;
}; };
struct Statistics struct Statistics

View File

@ -25,7 +25,7 @@ GPUTexture::~GPUTexture()
const char* GPUTexture::GetFormatName(Format format) const char* GPUTexture::GetFormatName(Format format)
{ {
static constexpr const char* format_names[static_cast<u8>(Format::MaxCount)] = { static constexpr const std::array<const char*, static_cast<size_t>(Format::MaxCount)> format_names = {{
"Unknown", // Unknown "Unknown", // Unknown
"RGBA8", // RGBA8 "RGBA8", // RGBA8
"BGRA8", // BGRA8 "BGRA8", // BGRA8
@ -51,43 +51,35 @@ const char* GPUTexture::GetFormatName(Format format)
"RGBA16F", // RGBA16F "RGBA16F", // RGBA16F
"RGBA32F", // RGBA32F "RGBA32F", // RGBA32F
"RGB10A2", // RGB10A2 "RGB10A2", // RGB10A2
}; "BC1", // BC1
"BC2", // BC2
"BC3", // BC3
"BC7", // BC7
}};
return format_names[static_cast<u8>(format)]; return format_names[static_cast<u8>(format)];
} }
u32 GPUTexture::GetCompressedBytesPerBlock() const u32 GPUTexture::GetBlockSize() const
{ {
return GetCompressedBytesPerBlock(m_format); return GetBlockSize(m_format);
} }
u32 GPUTexture::GetCompressedBytesPerBlock(Format format) u32 GPUTexture::GetBlockSize(Format format)
{ {
// TODO: Implement me if (format >= Format::BC1 && format <= Format::BC7)
return GetPixelSize(format);
}
u32 GPUTexture::GetCompressedBlockSize() const
{
return GetCompressedBlockSize(m_format);
}
u32 GPUTexture::GetCompressedBlockSize(Format format)
{
// TODO: Implement me
/*if (format >= Format::BC1 && format <= Format::BC7)
return 4; return 4;
else*/ else
return 1; return 1;
} }
u32 GPUTexture::CalcUploadPitch(Format format, u32 width) u32 GPUTexture::CalcUploadPitch(Format format, u32 width)
{ {
/* // convert to blocks
if (format >= Format::BC1 && format <= Format::BC7) if (format >= Format::BC1 && format <= Format::BC7)
width = Common::AlignUpPow2(width, 4) / 4; width = Common::AlignUpPow2(width, 4) / 4;
*/
return width * GetCompressedBytesPerBlock(format); return width * GetPixelSize(format);
} }
u32 GPUTexture::CalcUploadPitch(u32 width) const u32 GPUTexture::CalcUploadPitch(u32 width) const
@ -102,9 +94,11 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const
u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch) u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch)
{ {
const u32 block_size = GetCompressedBlockSize(format); const u32 pixel_size = GetPixelSize(format);
const u32 bytes_per_block = GetCompressedBytesPerBlock(format); if (IsCompressedFormat(format))
return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size; return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4;
else
return pitch / pixel_size;
} }
u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
@ -114,36 +108,64 @@ u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch) u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch)
{ {
const u32 block_size = GetCompressedBlockSize(format); const u32 block_size = GetBlockSize(format);
return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size); return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
} }
bool GPUTexture::IsCompressedFormat(Format format)
{
return (format >= Format::BC1);
}
bool GPUTexture::IsCompressedFormat() const
{
return IsCompressedFormat(m_format);
}
u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height) u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height)
{ {
const u32 max_dim = Common::PreviousPow2(std::max(width, height)); const u32 max_dim = Common::PreviousPow2(std::max(width, height));
return (std::countr_zero(max_dim) + 1); return (std::countr_zero(max_dim) + 1);
} }
void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch,
const void* src, u32 src_pitch)
{
if (IsCompressedFormat(format))
{
const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4;
const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4;
const u32 block_size = GetPixelSize(format);
StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high);
}
else
{
StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, width * GetPixelSize(format), height);
}
}
GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format) GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format)
{ {
static constexpr const std::array<Format, static_cast<size_t>(ImageFormat::MaxCount)> mapping = {{ static constexpr const std::array mapping = {
Format::Unknown, // None Format::Unknown, // None
Format::RGBA8, // RGBA8 Format::RGBA8, // RGBA8
Format::BGRA8, // BGRA8 Format::BGRA8, // BGRA8
Format::RGB565, // RGB565 Format::RGB565, // RGB565
Format::Unknown, // RGBA5551 Format::RGBA5551, // RGBA5551
Format::Unknown, // BC1 Format::Unknown, // BGR8
Format::Unknown, // BC2 Format::BC1, // BC1
Format::Unknown, // BC3 Format::BC2, // BC2
Format::Unknown, // BC7 Format::BC3, // BC3
}}; Format::BC7, // BC7
};
static_assert(mapping.size() == static_cast<size_t>(ImageFormat::MaxCount));
return mapping[static_cast<size_t>(format)]; return mapping[static_cast<size_t>(format)];
} }
ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
{ {
static constexpr const std::array<ImageFormat, static_cast<size_t>(Format::MaxCount)> mapping = {{ static constexpr const std::array mapping = {
ImageFormat::None, // Unknown ImageFormat::None, // Unknown
ImageFormat::RGBA8, // RGBA8 ImageFormat::RGBA8, // RGBA8
ImageFormat::BGRA8, // BGRA8 ImageFormat::BGRA8, // BGRA8
@ -169,7 +191,12 @@ ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
ImageFormat::None, // RGBA16F ImageFormat::None, // RGBA16F
ImageFormat::None, // RGBA32F ImageFormat::None, // RGBA32F
ImageFormat::None, // RGB10A2 ImageFormat::None, // RGB10A2
}}; ImageFormat::BC1, // BC1
ImageFormat::BC2, // BC2
ImageFormat::BC3, // BC3
ImageFormat::BC7, // BC7
};
static_assert(mapping.size() == static_cast<size_t>(Format::MaxCount));
return mapping[static_cast<size_t>(format)]; return mapping[static_cast<size_t>(format)];
} }
@ -226,6 +253,10 @@ u32 GPUTexture::GetPixelSize(GPUTexture::Format format)
8, // RGBA16F 8, // RGBA16F
16, // RGBA32F 16, // RGBA32F
4, // RGB10A2 4, // RGB10A2
8, // BC1 - 16 pixels in 64 bits
16, // BC2 - 16 pixels in 128 bits
16, // BC3 - 16 pixels in 128 bits
16, // BC4 - 16 pixels in 128 bits
}}; }};
return sizes[static_cast<size_t>(format)]; return sizes[static_cast<size_t>(format)];
@ -241,12 +272,6 @@ bool GPUTexture::IsDepthStencilFormat(Format format)
return (format == Format::D24S8 || format == Format::D32FS8); return (format == Format::D24S8 || format == Format::D32FS8);
} }
bool GPUTexture::IsCompressedFormat(Format format)
{
// TODO: Implement me
return false;
}
bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
Flags flags, Error* error) Flags flags, Error* error)
{ {
@ -318,6 +343,12 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u
return false; return false;
} }
if (IsCompressedFormat(format) && (type != Type::Texture || ((flags & Flags::AllowBindAsImage) != Flags::None)))
{
Error::SetStringView(error, "Compressed formats are only supported for textures.");
return false;
}
return true; return true;
} }

View File

@ -61,7 +61,11 @@ public:
RGBA16F, RGBA16F,
RGBA32F, RGBA32F,
RGB10A2, RGB10A2,
MaxCount BC1, ///< BC1, aka DXT1 compressed texture
BC2, ///< BC2, aka DXT2/3 compressed texture
BC3, ///< BC3, aka DXT4/5 compressed texture
BC7, ///< BC7, aka BPTC compressed texture
MaxCount,
}; };
enum class State : u8 enum class State : u8
@ -95,12 +99,13 @@ public:
static bool IsDepthFormat(Format format); static bool IsDepthFormat(Format format);
static bool IsDepthStencilFormat(Format format); static bool IsDepthStencilFormat(Format format);
static bool IsCompressedFormat(Format format); static bool IsCompressedFormat(Format format);
static u32 GetCompressedBytesPerBlock(Format format); static u32 GetBlockSize(Format format);
static u32 GetCompressedBlockSize(Format format);
static u32 CalcUploadPitch(Format format, u32 width); static u32 CalcUploadPitch(Format format, u32 width);
static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch); static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch);
static u32 CalcUploadSize(Format format, u32 height, u32 pitch); static u32 CalcUploadSize(Format format, u32 height, u32 pitch);
static u32 GetFullMipmapCount(u32 width, u32 height); static u32 GetFullMipmapCount(u32 width, u32 height);
static void CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, const void* src,
u32 src_pitch);
static Format GetTextureFormatForImageFormat(ImageFormat format); static Format GetTextureFormatForImageFormat(ImageFormat format);
static ImageFormat GetImageFormatForTextureFormat(Format format); static ImageFormat GetImageFormatForTextureFormat(Format format);
@ -160,8 +165,8 @@ public:
size_t GetVRAMUsage() const; size_t GetVRAMUsage() const;
u32 GetCompressedBytesPerBlock() const; bool IsCompressedFormat() const;
u32 GetCompressedBlockSize() const; u32 GetBlockSize() const;
u32 CalcUploadPitch(u32 width) const; u32 CalcUploadPitch(u32 width) const;
u32 CalcUploadRowLengthFromPitch(u32 pitch) const; u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
u32 CalcUploadSize(u32 height, u32 pitch) const; u32 CalcUploadSize(u32 height, u32 pitch) const;

View File

@ -46,6 +46,10 @@ static bool WebPBufferSaver(const Image& image, DynamicHeapArray<u8>* data, u8 q
static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error);
static bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error);
static bool DDSFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
namespace {
struct FormatHandler struct FormatHandler
{ {
const char* extension; const char* extension;
@ -54,12 +58,14 @@ struct FormatHandler
bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*); bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*);
bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*); bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*);
}; };
} // namespace
static constexpr FormatHandler s_format_handlers[] = { static constexpr FormatHandler s_format_handlers[] = {
{"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver}, {"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver},
{"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
{"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
{"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver}, {"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver},
{"dds", DDSBufferLoader, nullptr, DDSFileLoader, nullptr},
}; };
static const FormatHandler* GetFormatHandler(std::string_view extension) static const FormatHandler* GetFormatHandler(std::string_view extension)
@ -155,17 +161,19 @@ Image& Image::operator=(Image&& move)
const char* Image::GetFormatName(ImageFormat format) const char* Image::GetFormatName(ImageFormat format)
{ {
static constexpr std::array<const char*, static_cast<size_t>(ImageFormat::MaxCount)> names = { static constexpr std::array names = {
"None", // None "None", // None
"RGBA8", // RGBA8 "RGBA8", // RGBA8
"BGRA8", // BGRA8 "BGRA8", // BGRA8
"RGB565", // RGB565 "RGB565", // RGB565
"RGB5551", // RGBA5551 "RGB5551", // RGBA5551
"BGR8", // BGR8
"BC1", // BC1 "BC1", // BC1
"BC2", // BC2 "BC2", // BC2
"BC3", // BC3 "BC3", // BC3
"BC7", // BC7 "BC7", // BC7
}; };
static_assert(names.size() == static_cast<size_t>(ImageFormat::MaxCount));
return names[static_cast<size_t>(format)]; return names[static_cast<size_t>(format)];
} }
@ -178,6 +186,7 @@ u32 Image::GetPixelSize(ImageFormat format)
4, // BGRA8 4, // BGRA8
2, // RGB565 2, // RGB565
2, // RGBA5551 2, // RGBA5551
3, // BGR8
8, // BC1 - 16 pixels in 64 bits 8, // BC1 - 16 pixels in 64 bits
16, // BC2 - 16 pixels in 128 bits 16, // BC2 - 16 pixels in 128 bits
16, // BC3 - 16 pixels in 128 bits 16, // BC3 - 16 pixels in 128 bits
@ -563,6 +572,27 @@ std::optional<Image> Image::ConvertToRGBA8(Error* error) const
} }
} }
} }
break;
case ImageFormat::BGR8:
{
ret = Image(m_width, m_height, ImageFormat::RGBA8);
for (u32 y = 0; y < m_height; y++)
{
const u8* pixels_in = GetRowPixels(y);
u8* pixels_out = ret->GetRowPixels(y);
for (u32 x = 0; x < m_width; x++)
{
// Set alpha channel to full intensity.
const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) |
(ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u);
std::memcpy(pixels_out, &rgba, sizeof(rgba));
pixels_in += 3;
pixels_out += sizeof(rgba);
}
}
}
break; break;
// TODO: Block format decompression // TODO: Block format decompression
@ -1220,3 +1250,415 @@ bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp,
return true; return true;
} }
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// DDS Handler
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// From https://raw.githubusercontent.com/Microsoft/DirectXTex/master/DirectXTex/DDS.h
//
// This header defines constants and structures that are useful when parsing
// DDS files. DDS files were originally designed to use several structures
// and constants that are native to DirectDraw and are defined in ddraw.h,
// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar
// (compatible) constants and structures so that one can use DDS files
// without needing to include ddraw.h.
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkId=248926
#pragma pack(push, 1)
static constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS "
struct DDS_PIXELFORMAT
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwFourCC;
uint32_t dwRGBBitCount;
uint32_t dwRBitMask;
uint32_t dwGBitMask;
uint32_t dwBBitMask;
uint32_t dwABitMask;
};
#define DDS_FOURCC 0x00000004 // DDPF_FOURCC
#define DDS_RGB 0x00000040 // DDPF_RGB
#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS
#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE
#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS
#define DDS_ALPHA 0x00000002 // DDPF_ALPHA
#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8
#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS
#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV
#ifndef MAKEFOURCC
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \
((uint32_t)(uint8_t)(ch3) << 24))
#endif /* defined(MAKEFOURCC) */
#define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT
#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT
#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH
#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH
#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE
#define DDS_MAX_TEXTURE_SIZE 32768
// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION
enum DDS_RESOURCE_DIMENSION
{
DDS_DIMENSION_TEXTURE1D = 2,
DDS_DIMENSION_TEXTURE2D = 3,
DDS_DIMENSION_TEXTURE3D = 4,
};
struct DDS_HEADER
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwHeight;
uint32_t dwWidth;
uint32_t dwPitchOrLinearSize;
uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags
uint32_t dwMipMapCount;
uint32_t dwReserved1[11];
DDS_PIXELFORMAT ddspf;
uint32_t dwCaps;
uint32_t dwCaps2;
uint32_t dwCaps3;
uint32_t dwCaps4;
uint32_t dwReserved2;
};
struct DDS_HEADER_DXT10
{
uint32_t dxgiFormat;
uint32_t resourceDimension;
uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG
uint32_t arraySize;
uint32_t miscFlags2; // see DDS_MISC_FLAGS2
};
#pragma pack(pop)
static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch");
static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch");
constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000};
constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000};
constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000};
constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
// End of Microsoft code from DDS.h.
static bool DDSPixelFormatMatches(const DDS_PIXELFORMAT& pf1, const DDS_PIXELFORMAT& pf2)
{
return std::tie(pf1.dwSize, pf1.dwFlags, pf1.dwFourCC, pf1.dwRGBBitCount, pf1.dwRBitMask, pf1.dwGBitMask,
pf1.dwGBitMask, pf1.dwBBitMask,
pf1.dwABitMask) == std::tie(pf2.dwSize, pf2.dwFlags, pf2.dwFourCC, pf2.dwRGBBitCount, pf2.dwRBitMask,
pf2.dwGBitMask, pf2.dwGBitMask, pf2.dwBBitMask, pf2.dwABitMask);
}
struct DDSLoadInfo
{
u32 block_size = 1;
u32 bytes_per_block = 4;
u32 width = 0;
u32 height = 0;
u32 mip_count = 0;
ImageFormat format = ImageFormat::RGBA8;
s64 base_image_offset = 0;
u32 base_image_size = 0;
u32 base_image_pitch = 0;
bool clear_alpha = false;
};
template<typename ReadFunction>
static bool ParseDDSHeader(const ReadFunction& RF, DDSLoadInfo* info, Error* error)
{
u32 magic;
if (!RF(&magic, sizeof(magic), error) || magic != DDS_MAGIC)
{
Error::AddPrefix(error, "Failed to read magic: ");
return false;
}
DDS_HEADER header;
u32 header_size = sizeof(header);
if (!RF(&header, header_size, error) || header.dwSize < header_size)
{
Error::AddPrefix(error, "Failed to read header: ");
return false;
}
// We should check for DDS_HEADER_FLAGS_TEXTURE here, but some tools don't seem
// to set it (e.g. compressonator). But we can still validate the size.
if (header.dwWidth == 0 || header.dwWidth >= DDS_MAX_TEXTURE_SIZE || header.dwHeight == 0 ||
header.dwHeight >= DDS_MAX_TEXTURE_SIZE)
{
Error::SetStringFmt(error, "Size is invalid: {}x{}", header.dwWidth, header.dwHeight);
return false;
}
// Image should be 2D.
if (header.dwFlags & DDS_HEADER_FLAGS_VOLUME)
{
Error::SetStringView(error, "Volume textures are not supported.");
return false;
}
// Presence of width/height fields is already tested by DDS_HEADER_FLAGS_TEXTURE.
info->width = header.dwWidth;
info->height = header.dwHeight;
// Check for mip levels.
if (header.dwFlags & DDS_HEADER_FLAGS_MIPMAP)
{
info->mip_count = header.dwMipMapCount;
if (header.dwMipMapCount != 0)
{
info->mip_count = header.dwMipMapCount;
}
else
{
const u32 max_dim = Common::PreviousPow2(std::max(header.dwWidth, header.dwHeight));
info->mip_count = (std::countr_zero(max_dim) + 1);
}
}
else
{
info->mip_count = 1;
}
// Handle fourcc formats vs uncompressed formats.
const bool has_fourcc = (header.ddspf.dwFlags & DDS_FOURCC) != 0;
if (has_fourcc)
{
// Handle DX10 extension header.
u32 dxt10_format = 0;
if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0'))
{
DDS_HEADER_DXT10 dxt10_header;
if (!RF(&dxt10_header, sizeof(dxt10_header), error))
{
Error::AddPrefix(error, "Failed to read DXT10 header: ");
return false;
}
// Can't handle array textures here. Doesn't make sense to use them, anyway.
if (dxt10_header.resourceDimension != DDS_DIMENSION_TEXTURE2D || dxt10_header.arraySize != 1)
{
Error::SetStringView(error, "Only 2D textures are supported.");
return false;
}
header_size += sizeof(dxt10_header);
dxt10_format = dxt10_header.dxgiFormat;
}
if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '1') || dxt10_format == 71)
{
info->format = ImageFormat::BC1;
info->block_size = 4;
info->bytes_per_block = 8;
}
else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '2') ||
header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '3') || dxt10_format == 74)
{
info->format = ImageFormat::BC2;
info->block_size = 4;
info->bytes_per_block = 16;
}
else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '4') ||
header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '5') || dxt10_format == 77)
{
info->format = ImageFormat::BC3;
info->block_size = 4;
info->bytes_per_block = 16;
}
else if (dxt10_format == 98)
{
info->format = ImageFormat::BC7;
info->block_size = 4;
info->bytes_per_block = 16;
}
else
{
Error::SetStringFmt(error, "Unknown format with FOURCC 0x{:08X} / DXT10 format {}", header.ddspf.dwFourCC,
dxt10_format);
return false;
}
}
else
{
if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8R8G8B8))
{
info->format = ImageFormat::BGRA8;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8R8G8B8))
{
info->format = ImageFormat::BGRA8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8B8G8R8))
{
info->format = ImageFormat::RGBA8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_R8G8B8))
{
info->format = ImageFormat::BGR8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8B8G8R8))
{
info->format = ImageFormat::RGBA8;
}
else
{
Error::SetStringFmt(error, "Unhandled format with FOURCC 0x{:08X}", header.ddspf.dwFourCC);
return false;
}
// All these formats are RGBA, just with byte swapping.
info->block_size = 1;
info->bytes_per_block = header.ddspf.dwRGBBitCount / 8;
}
// Mip levels smaller than the block size are padded to multiples of the block size.
const u32 blocks_wide = Common::AlignUpPow2(info->width, info->block_size) / info->block_size;
const u32 blocks_high = Common::AlignUpPow2(info->height, info->block_size) / info->block_size;
// Pitch can be specified in the header, otherwise we can derive it from the dimensions. For
// compressed formats, both DDS_HEADER_FLAGS_LINEARSIZE and DDS_HEADER_FLAGS_PITCH should be
// set. See https://msdn.microsoft.com/en-us/library/windows/desktop/bb943982(v=vs.85).aspx
if (header.dwFlags & DDS_HEADER_FLAGS_PITCH && header.dwFlags & DDS_HEADER_FLAGS_LINEARSIZE)
{
// Convert pitch (in bytes) to texels/row length.
if (header.dwPitchOrLinearSize < info->bytes_per_block)
{
// Likely a corrupted or invalid file.
Error::SetStringFmt(error, "Invalid pitch: {}", header.dwPitchOrLinearSize);
return false;
}
info->base_image_pitch = header.dwPitchOrLinearSize;
info->base_image_size = info->base_image_pitch * blocks_high;
}
else
{
// Assume no padding between rows of blocks.
info->base_image_pitch = blocks_wide * info->bytes_per_block;
info->base_image_size = info->base_image_pitch * blocks_high;
}
info->base_image_offset = sizeof(magic) + header_size;
#if 0
// D3D11 cannot handle block compressed textures where the first mip level is not a multiple of the block size.
if (mip_level == 0 && info.block_size > 1 && ((width % info.block_size) != 0 || (height % info.block_size) != 0))
{
Error::SetStringFmt(error,
"Invalid dimensions for DDS texture. For compressed textures of this format, "
"the width/height of the first mip level must be a multiple of {}.",
info.block_size);
return false;
}
#endif
return true;
}
bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* error)
{
const auto header_reader = [fp](void* buffer, size_t size, Error* error) {
if (std::fread(buffer, size, 1, fp) == 1)
return true;
Error::SetErrno(error, "fread() failed: ", errno);
return false;
};
DDSLoadInfo info;
if (!ParseDDSHeader(header_reader, &info, error))
return false;
// always load the base image
if (!FileSystem::FSeek64(fp, info.base_image_offset, SEEK_SET, error))
return false;
image->Resize(info.width, info.height, info.format, false);
const u32 blocks = image->GetBlockYCount();
if (image->GetPitch() != info.base_image_pitch)
{
for (u32 y = 0; y < blocks; y++)
{
if (std::fread(image->GetRowPixels(y), info.base_image_pitch, 1, fp) != 1)
{
Error::SetErrno(error, "fread() failed: ", errno);
return false;
}
}
}
else
{
if (std::fread(image->GetPixels(), info.base_image_pitch * blocks, 1, fp) != 1)
{
Error::SetErrno(error, "fread() failed: ", errno);
return false;
}
}
if (info.clear_alpha)
image->SetAllPixelsOpaque();
return true;
}
bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error)
{
size_t data_pos = 0;
const auto header_reader = [&data, &data_pos](void* buffer, size_t size, Error* error) {
if ((data_pos + size) > data.size())
{
Error::SetStringView(error, "Buffer does not contain sufficient data.");
return false;
}
std::memcpy(buffer, &data[data_pos], size);
data_pos += size;
return true;
};
DDSLoadInfo info;
if (!ParseDDSHeader(header_reader, &info, error))
return false;
if ((static_cast<u64>(info.base_image_offset) + info.base_image_size) > data.size())
{
Error::SetStringFmt(error, "Buffer does not contain complete base image.");
return false;
}
image->SetPixels(info.width, info.height, info.format, &data[static_cast<size_t>(info.base_image_offset)],
info.base_image_pitch);
if (info.clear_alpha)
image->SetAllPixelsOpaque();
return true;
}

View File

@ -21,6 +21,7 @@ enum class ImageFormat : u8
BGRA8, BGRA8,
RGB565, RGB565,
RGBA5551, RGBA5551,
BGR8,
BC1, BC1,
BC2, BC2,
BC3, BC3,

View File

@ -71,6 +71,11 @@ static constexpr std::array<MTLPixelFormat, static_cast<u32>(GPUTexture::Format:
MTLPixelFormatRGBA16Float, // RGBA16F MTLPixelFormatRGBA16Float, // RGBA16F
MTLPixelFormatRGBA32Float, // RGBA32F MTLPixelFormatRGBA32Float, // RGBA32F
MTLPixelFormatBGR10A2Unorm, // RGB10A2 MTLPixelFormatBGR10A2Unorm, // RGB10A2
MTLPixelFormatBC1_RGBA, // BC1
MTLPixelFormatBC2_RGBA, // BC2
MTLPixelFormatBC3_RGBA, // BC3
MTLPixelFormatBC7_RGBAUnorm, // BC7
}; };
static void LogNSError(NSError* error, std::string_view message) static void LogNSError(NSError* error, std::string_view message)
@ -385,6 +390,10 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.pipeline_cache = true; m_features.pipeline_cache = true;
m_features.prefer_unused_textures = true; m_features.prefer_unused_textures = true;
// Same feature bit for both.
m_features.dxt_textures = m_features.bptc_textures =
!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && m_device.supportsBCTextureCompression;
// Disable pipeline cache on Intel, apparently it's buggy. // Disable pipeline cache on Intel, apparently it's buggy.
if ([[m_device name] containsString:@"Intel"]) if ([[m_device name] containsString:@"Intel"])
{ {
@ -995,8 +1004,8 @@ MetalTexture::~MetalTexture()
bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
u32 level /*= 0*/) u32 level /*= 0*/)
{ {
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch; const u32 req_size = CalcUploadSize(height, aligned_pitch);
GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().buffer_streamed += req_size;
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
@ -1013,7 +1022,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options]; actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options];
actual_offset = 0; actual_offset = 0;
actual_pitch = pitch; actual_pitch = pitch;
if (actual_buffer == nil) if (actual_buffer == nil) [[unlikely]]
{ {
Panic("Failed to allocate temporary buffer."); Panic("Failed to allocate temporary buffer.");
return false; return false;
@ -1026,7 +1035,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
{ {
dev.SubmitCommandBuffer(); dev.SubmitCommandBuffer();
if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) [[unlikely]]
{ {
Panic("Failed to reserve texture upload space."); Panic("Failed to reserve texture upload space.");
return false; return false;
@ -1034,7 +1043,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
} }
actual_offset = sb.GetCurrentOffset(); actual_offset = sb.GetCurrentOffset();
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); CopyTextureDataForUpload(width, height, m_format, sb.GetCurrentHostPointer(), aligned_pitch, data, pitch);
sb.CommitMemory(req_size); sb.CommitMemory(req_size);
actual_buffer = sb.GetBuffer(); actual_buffer = sb.GetBuffer();
actual_pitch = aligned_pitch; actual_pitch = aligned_pitch;
@ -1065,8 +1074,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
return false; return false;
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch; const u32 req_size = CalcUploadSize(height, aligned_pitch);
MetalDevice& dev = MetalDevice::GetInstance(); MetalDevice& dev = MetalDevice::GetInstance();
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
@ -1097,8 +1106,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
void MetalTexture::Unmap() void MetalTexture::Unmap()
{ {
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = m_map_height * aligned_pitch; const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().buffer_streamed += req_size;
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
@ -1488,6 +1497,11 @@ bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const
if (![m_device supportsFamily:MTLGPUFamilyApple2]) if (![m_device supportsFamily:MTLGPUFamilyApple2])
return false; return false;
} }
else if (format >= GPUTexture::Format::BC1 && format <= GPUTexture::Format::BC7)
{
if (!m_device.supportsBCTextureCompression)
return false;
}
return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid); return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid);
} }

View File

@ -506,6 +506,12 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
m_features.shader_cache = false; m_features.shader_cache = false;
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && GLAD_GL_EXT_texture_compression_s3tc);
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc));
m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary; m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary;
if (m_features.pipeline_cache) if (m_features.pipeline_cache)
{ {

View File

@ -58,6 +58,10 @@ const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(G
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
{GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
}}; }};
// GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol. // GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol.
@ -88,6 +92,10 @@ const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(G
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
{GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
}}; }};
return gles ? mapping_gles[static_cast<u32>(format)] : mapping[static_cast<u32>(format)]; return gles ? mapping_gles[static_cast<u32>(format)] : mapping[static_cast<u32>(format)];
@ -169,6 +177,7 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
else else
{ {
const bool use_texture_storage = UseTextureStorage(false); const bool use_texture_storage = UseTextureStorage(false);
const bool is_compressed = IsCompressedFormat(format);
if (use_texture_storage) if (use_texture_storage)
{ {
if (layers > 1) if (layers > 1)
@ -183,10 +192,10 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
const u32 alignment = GetUploadAlignment(data_pitch); const u32 alignment = GetUploadAlignment(data_pitch);
if (data) if (data)
{ {
GPUDevice::GetStatistics().buffer_streamed += data_pitch * height; GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, data_pitch);
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size); glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(format, data_pitch));
if (alignment != DEFAULT_UPLOAD_ALIGNMENT) if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
} }
@ -197,19 +206,56 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
for (u32 i = 0; i < levels; i++) for (u32 i = 0; i < levels; i++)
{ {
if (use_texture_storage) if (use_texture_storage)
{
if (is_compressed)
{
const u32 size = CalcUploadSize(format, current_height, data_pitch);
if (layers > 1)
{
glCompressedTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, size,
data_ptr);
}
else
{
glCompressedTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, size, data_ptr);
}
}
else
{ {
if (layers > 1) if (layers > 1)
glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr);
else else
glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr);
} }
}
else
{
if (is_compressed)
{
const u32 size = CalcUploadSize(format, current_height, data_pitch);
if (layers > 1)
{
glCompressedTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, size,
data_ptr);
}
else
{
glCompressedTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, size, data_ptr);
}
}
else else
{ {
if (layers > 1) if (layers > 1)
{
glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type,
data_ptr); data_ptr);
}
else else
glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr); {
glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type,
data_ptr);
}
}
} }
if (data_ptr) if (data_ptr)
@ -257,14 +303,11 @@ void OpenGLTexture::CommitClear()
bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
u32 level /*= 0*/) u32 level /*= 0*/)
{ {
// TODO: perf counters
// Worth using the PBO? Driver probably knows better... // Worth using the PBO? Driver probably knows better...
const GLenum target = GetGLTarget(); const GLenum target = GetGLTarget();
const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
const u32 pixel_size = GetPixelSize(); const u32 preferred_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 preferred_pitch = Common::AlignUpPow2(static_cast<u32>(width) * pixel_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 map_size = CalcUploadSize(height, pitch);
const u32 map_size = preferred_pitch * static_cast<u32>(height);
OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();
CommitClear(); CommitClear();
@ -283,8 +326,22 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
if (alignment != DEFAULT_UPLOAD_ALIGNMENT) if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));
glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data); if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(height, pitch);
if (IsTextureArray())
glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, data);
else
glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, data);
}
else
{
if (IsTextureArray())
glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, data);
else
glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, data);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
if (alignment != DEFAULT_UPLOAD_ALIGNMENT) if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
@ -293,13 +350,39 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
else else
{ {
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * pixel_size, height); CopyTextureDataForUpload(width, height, m_format, map.pointer, preferred_pitch, data, pitch);
sb->Unmap(map_size); sb->Unmap(map_size);
sb->Bind(); sb->Bind();
glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / pixel_size); glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(preferred_pitch));
glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type, if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(height, pitch);
if (IsTextureArray())
{
glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset))); reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
else
{
glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
}
else
{
if (IsTextureArray())
{
glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
else
{
glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
sb->Unbind(); sb->Unbind();
@ -315,8 +398,8 @@ bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
return false; return false;
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * static_cast<u32>(height); const u32 upload_size = CalcUploadSize(height, pitch);
OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();
if (!sb || upload_size > sb->GetSize()) if (!sb || upload_size > sb->GetSize())
return false; return false;
@ -339,8 +422,8 @@ void OpenGLTexture::Unmap()
{ {
CommitClear(); CommitClear();
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * static_cast<u32>(m_map_height); const u32 upload_size = CalcUploadSize(m_map_height, pitch);
GPUDevice::GetStatistics().buffer_streamed += upload_size; GPUDevice::GetStatistics().buffer_streamed += upload_size;
GPUDevice::GetStatistics().num_uploads++; GPUDevice::GetStatistics().num_uploads++;
@ -354,9 +437,25 @@ void OpenGLTexture::Unmap()
const GLenum target = GetGLTarget(); const GLenum target = GetGLTarget();
glBindTexture(target, m_id); glBindTexture(target, m_id);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));
const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(m_map_height, pitch);
if (IsTextureArray())
{
glCompressedTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1,
gl_format, size, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
}
else
{
glCompressedTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
}
}
else
{
if (IsTextureArray()) if (IsTextureArray())
{ {
glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format,
@ -367,6 +466,7 @@ void OpenGLTexture::Unmap()
glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset))); reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
} }
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);

View File

@ -96,6 +96,10 @@ const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> Vulka
VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F
VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F
VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2 VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2
VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // BC1
VK_FORMAT_BC2_UNORM_BLOCK, // BC2
VK_FORMAT_BC3_UNORM_BLOCK, // BC3
VK_FORMAT_BC7_UNORM_BLOCK, // BC7
}; };
// Handles are always 64-bit, even on 32-bit platforms. // Handles are always 64-bit, even on 32-bit platforms.
@ -640,6 +644,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
enabled_features.sampleRateShading = available_features.sampleRateShading; enabled_features.sampleRateShading = available_features.sampleRateShading;
enabled_features.geometryShader = available_features.geometryShader; enabled_features.geometryShader = available_features.geometryShader;
enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
enabled_features.textureCompressionBC = available_features.textureCompressionBC;
device_info.pEnabledFeatures = &enabled_features; device_info.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
@ -2456,6 +2461,10 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
m_features.raster_order_views = m_features.raster_order_views =
(!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics && (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics &&
m_optional_extensions.vk_ext_fragment_shader_interlock); m_optional_extensions.vk_ext_fragment_shader_interlock);
// Same feature bit for both.
m_features.dxt_textures = m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && vk_features.textureCompressionBC);
} }
void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,

View File

@ -230,20 +230,13 @@ VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate()
return dev.GetCurrentInitCommandBuffer(); return dev.GetCurrentInitCommandBuffer();
} }
void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
u32 upload_pitch) const
{
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
}
VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const u32 height, u32 buffer_size) const
{ {
const u32 size = upload_pitch * height;
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr, nullptr,
0, 0,
static_cast<VkDeviceSize>(size), static_cast<VkDeviceSize>(buffer_size),
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_SHARING_MODE_EXCLUSIVE, VK_SHARING_MODE_EXCLUSIVE,
0, 0,
@ -270,8 +263,8 @@ VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch,
VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation); VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation);
// And write the data. // And write the data.
CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch); CopyTextureDataForUpload(width, height, m_format, ai.pMappedData, upload_pitch, data, pitch);
vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size); vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, buffer_size);
return buffer; return buffer;
} }
@ -282,7 +275,7 @@ void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 w
if (old_layout != Layout::TransferDst) if (old_layout != Layout::TransferDst)
TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst); TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst);
const u32 row_length = pitch / GetPixelSize(); const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
const VkBufferImageCopy bic = {static_cast<VkDeviceSize>(buffer_offset), const VkBufferImageCopy bic = {static_cast<VkDeviceSize>(buffer_offset),
row_length, row_length,
@ -302,8 +295,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
DebugAssert(layer < m_layers && level < m_levels); DebugAssert(layer < m_layers && level < m_levels);
DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level)); DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level));
const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); const u32 upload_pitch =
const u32 required_size = height * upload_pitch; Common::AlignUpPow2(CalcUploadPitch(width), VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment());
const u32 required_size = CalcUploadSize(height, upload_pitch);
VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanDevice& dev = VulkanDevice::GetInstance();
VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer();
@ -314,7 +308,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
if (required_size > (sbuffer.GetCurrentSize() / 2)) if (required_size > (sbuffer.GetCurrentSize() / 2))
{ {
buffer_offset = 0; buffer_offset = 0;
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
if (buffer == VK_NULL_HANDLE) if (buffer == VK_NULL_HANDLE)
return false; return false;
} }
@ -332,7 +326,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
buffer = sbuffer.GetBuffer(); buffer = sbuffer.GetBuffer();
buffer_offset = sbuffer.GetCurrentOffset(); buffer_offset = sbuffer.GetCurrentOffset();
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
sbuffer.CommitMemory(required_size); sbuffer.CommitMemory(required_size);
} }
@ -372,8 +366,8 @@ bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
CommitClear(GetCommandBufferForUpdate()); CommitClear(GetCommandBufferForUpdate());
// see note in Update() for the reason why. // see note in Update() for the reason why.
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = height * aligned_pitch; const u32 req_size = CalcUploadSize(height, aligned_pitch);
VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer(); VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer();
if (req_size >= (buffer.GetCurrentSize() / 2)) if (req_size >= (buffer.GetCurrentSize() / 2))
return false; return false;
@ -402,8 +396,8 @@ void VulkanTexture::Unmap()
{ {
VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanDevice& dev = VulkanDevice::GetInstance();
VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer(); VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer();
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = m_map_height * aligned_pitch; const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
const u32 offset = sb.GetCurrentOffset(); const u32 offset = sb.GetCurrentOffset();
sb.CommitMemory(req_size); sb.CommitMemory(req_size);

View File

@ -85,8 +85,8 @@ private:
VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format); VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format);
VkCommandBuffer GetCommandBufferForUpdate(); VkCommandBuffer GetCommandBufferForUpdate();
void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height,
VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const; u32 buffer_size) const;
void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch, void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch,
VkBuffer buffer, u32 buffer_offset); VkBuffer buffer, u32 buffer_offset);