GPUDevice: Support compressed textures

This commit is contained in:
Stenzek 2024-11-24 18:10:59 +10:00
parent 24dfd30839
commit 7eb1d4e092
No known key found for this signature in database
18 changed files with 841 additions and 196 deletions

View File

@ -200,6 +200,13 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
(SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) &&
data.ROVsSupported);
}
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
SupportsTextureFormat(GPUTexture::Format::BC3)));
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
}
D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle,

View File

@ -147,23 +147,24 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
if (HasFlag(Flags::AllowMap))
{
void* map;
u32 map_stride;
if (!Map(&map, &map_stride, x, y, width, height, layer, level))
u32 map_pitch;
if (!Map(&map, &map_pitch, x, y, width, height, layer, level))
return false;
StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height);
CopyTextureDataForUpload(width, height, m_format, map, map_pitch, data, pitch);
Unmap();
return true;
}
const CD3D11_BOX box(static_cast<LONG>(x), static_cast<LONG>(y), 0, static_cast<LONG>(x + width),
static_cast<LONG>(y + height), 1);
const u32 bs = GetBlockSize();
const D3D11_BOX box = {Common::AlignDownPow2(x, bs), Common::AlignDownPow2(y, bs), 0U,
Common::AlignUpPow2(x + width, bs), Common::AlignUpPow2(y + height, bs), 1U};
const u32 srnum = D3D11CalcSubresource(level, layer, m_levels);
ID3D11DeviceContext1* context = D3D11Device::GetD3DContext();
CommitClear(context);
GPUDevice::GetStatistics().buffer_streamed += height * pitch;
GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, pitch);
GPUDevice::GetStatistics().num_uploads++;
context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0);
@ -194,10 +195,18 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
return false;
}
GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch;
GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, sr.RowPitch);
GPUDevice::GetStatistics().num_uploads++;
if (IsCompressedFormat(m_format))
{
*map = static_cast<u8*>(sr.pData) + ((y / GetBlockSize()) * sr.RowPitch) +
((x / GetBlockSize()) * GetPixelSize());
}
else
{
*map = static_cast<u8*>(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize());
}
*map_stride = sr.RowPitch;
m_mapped_subresource = srnum;
m_state = GPUTexture::State::Dirty;
@ -294,7 +303,7 @@ std::unique_ptr<D3D11Texture> D3D11Texture::Create(ID3D11Device* device, u32 wid
if (initial_data)
{
GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride;
GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, initial_data_stride);
GPUDevice::GetStatistics().num_uploads++;
}

View File

@ -1366,6 +1366,13 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
options.ROVsSupported;
}
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) &&
SupportsTextureFormat(GPUTexture::Format::BC3)));
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7));
}
void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,

View File

@ -340,23 +340,23 @@ ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate()
return dev.GetInitCommandList();
}
void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
u32 upload_pitch) const
{
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
}
ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const
u32 height, u32 buffer_size) const
{
const u32 size = upload_pitch * height;
ComPtr<ID3D12Resource> resource;
ComPtr<D3D12MA::Allocation> allocation;
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD,
D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
const D3D12_RESOURCE_DESC resource_desc = {
D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
0,
buffer_size,
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(),
@ -375,9 +375,9 @@ ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32
return nullptr;
}
CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch);
CopyTextureDataForUpload(width, height, m_format, map_ptr, upload_pitch, data, pitch);
const D3D12_RANGE write_range = {0, size};
const D3D12_RANGE write_range = {0, buffer_size};
resource->Unmap(0, &write_range);
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
@ -395,8 +395,8 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
D3D12Device& dev = D3D12Device::GetInstance();
D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer();
const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 required_size = height * upload_pitch;
const u32 upload_pitch = Common::AlignUpPow2<u32>(CalcUploadPitch(width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 required_size = CalcUploadSize(height, upload_pitch);
D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
@ -410,7 +410,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
if (required_size > (sbuffer.GetSize() / 2))
{
srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height);
srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
if (!srcloc.pResource)
return false;
@ -431,7 +431,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
srcloc.pResource = sbuffer.GetBuffer();
srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset();
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch);
CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
sbuffer.CommitMemory(required_size);
}
@ -482,8 +482,8 @@ bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
CommitClear(GetCommandBufferForUpdate());
// see note in Update() for the reason why.
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = CalcUploadSize(m_height, aligned_pitch);
D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer();
if (req_size >= (buffer.GetSize() / 2))
return false;
@ -512,8 +512,8 @@ void D3D12Texture::Unmap()
{
D3D12Device& dev = D3D12Device::GetInstance();
D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer();
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = m_map_height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
const u32 offset = sb.GetCurrentOffset();
sb.CommitMemory(req_size);

View File

@ -80,8 +80,7 @@ private:
ID3D12GraphicsCommandList4* GetCommandBufferForUpdate();
ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const;
void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const;
u32 height, u32 buffer_size) const;
void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist);
ComPtr<ID3D12Resource> m_resource;

View File

@ -650,6 +650,10 @@ static constexpr std::array<D3DCommon::DXGIFormatMapping, static_cast<int>(GPUTe
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC1
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC2
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC3
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC7
// clang-format on
}};

View File

@ -1057,8 +1057,22 @@ std::unique_ptr<GPUTexture> GPUDevice::FetchAndUploadTextureImage(const Image& i
{
const Image* image_to_upload = &image;
GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat());
bool gpu_format_supported;
// avoid device query for compressed formats that we've already pretested
if (gpu_format >= GPUTexture::Format::BC1 && gpu_format <= GPUTexture::Format::BC3)
gpu_format_supported = m_features.dxt_textures;
else if (gpu_format == GPUTexture::Format::BC7)
gpu_format_supported = m_features.bptc_textures;
else if (gpu_format == GPUTexture::Format::RGBA8) // always supported
gpu_format_supported = true;
else if (gpu_format != GPUTexture::Format::Unknown)
gpu_format_supported = SupportsTextureFormat(gpu_format);
else
gpu_format_supported = false;
std::optional<Image> converted_image;
if (!SupportsTextureFormat(gpu_format))
if (!gpu_format_supported)
{
converted_image = image.ConvertToRGBA8(error);
if (!converted_image.has_value())

View File

@ -515,6 +515,7 @@ public:
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
FEATURE_MASK_COMPRESSED_TEXTURES = (1 << 9),
};
enum class DrawBarrier : u32
@ -553,6 +554,8 @@ public:
bool pipeline_cache : 1;
bool prefer_unused_textures : 1;
bool raster_order_views : 1;
bool dxt_textures : 1;
bool bptc_textures : 1;
};
struct Statistics

View File

@ -25,7 +25,7 @@ GPUTexture::~GPUTexture()
const char* GPUTexture::GetFormatName(Format format)
{
static constexpr const char* format_names[static_cast<u8>(Format::MaxCount)] = {
static constexpr const std::array<const char*, static_cast<size_t>(Format::MaxCount)> format_names = {{
"Unknown", // Unknown
"RGBA8", // RGBA8
"BGRA8", // BGRA8
@ -51,43 +51,35 @@ const char* GPUTexture::GetFormatName(Format format)
"RGBA16F", // RGBA16F
"RGBA32F", // RGBA32F
"RGB10A2", // RGB10A2
};
"BC1", // BC1
"BC2", // BC2
"BC3", // BC3
"BC7", // BC7
}};
return format_names[static_cast<u8>(format)];
}
u32 GPUTexture::GetCompressedBytesPerBlock() const
u32 GPUTexture::GetBlockSize() const
{
return GetCompressedBytesPerBlock(m_format);
return GetBlockSize(m_format);
}
u32 GPUTexture::GetCompressedBytesPerBlock(Format format)
u32 GPUTexture::GetBlockSize(Format format)
{
// TODO: Implement me
return GetPixelSize(format);
}
u32 GPUTexture::GetCompressedBlockSize() const
{
return GetCompressedBlockSize(m_format);
}
u32 GPUTexture::GetCompressedBlockSize(Format format)
{
// TODO: Implement me
/*if (format >= Format::BC1 && format <= Format::BC7)
if (format >= Format::BC1 && format <= Format::BC7)
return 4;
else*/
else
return 1;
}
u32 GPUTexture::CalcUploadPitch(Format format, u32 width)
{
/*
// convert to blocks
if (format >= Format::BC1 && format <= Format::BC7)
width = Common::AlignUpPow2(width, 4) / 4;
*/
return width * GetCompressedBytesPerBlock(format);
return width * GetPixelSize(format);
}
u32 GPUTexture::CalcUploadPitch(u32 width) const
@ -102,9 +94,11 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const
u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
const u32 bytes_per_block = GetCompressedBytesPerBlock(format);
return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size;
const u32 pixel_size = GetPixelSize(format);
if (IsCompressedFormat(format))
return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4;
else
return pitch / pixel_size;
}
u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
@ -114,36 +108,64 @@ u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
const u32 block_size = GetBlockSize(format);
return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
}
bool GPUTexture::IsCompressedFormat(Format format)
{
return (format >= Format::BC1);
}
bool GPUTexture::IsCompressedFormat() const
{
return IsCompressedFormat(m_format);
}
u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height)
{
const u32 max_dim = Common::PreviousPow2(std::max(width, height));
return (std::countr_zero(max_dim) + 1);
}
void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch,
const void* src, u32 src_pitch)
{
if (IsCompressedFormat(format))
{
const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4;
const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4;
const u32 block_size = GetPixelSize(format);
StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high);
}
else
{
StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, width * GetPixelSize(format), height);
}
}
GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format)
{
static constexpr const std::array<Format, static_cast<size_t>(ImageFormat::MaxCount)> mapping = {{
static constexpr const std::array mapping = {
Format::Unknown, // None
Format::RGBA8, // RGBA8
Format::BGRA8, // BGRA8
Format::RGB565, // RGB565
Format::Unknown, // RGBA5551
Format::Unknown, // BC1
Format::Unknown, // BC2
Format::Unknown, // BC3
Format::Unknown, // BC7
}};
Format::RGBA5551, // RGBA5551
Format::Unknown, // BGR8
Format::BC1, // BC1
Format::BC2, // BC2
Format::BC3, // BC3
Format::BC7, // BC7
};
static_assert(mapping.size() == static_cast<size_t>(ImageFormat::MaxCount));
return mapping[static_cast<size_t>(format)];
}
ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
{
static constexpr const std::array<ImageFormat, static_cast<size_t>(Format::MaxCount)> mapping = {{
static constexpr const std::array mapping = {
ImageFormat::None, // Unknown
ImageFormat::RGBA8, // RGBA8
ImageFormat::BGRA8, // BGRA8
@ -169,7 +191,12 @@ ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format)
ImageFormat::None, // RGBA16F
ImageFormat::None, // RGBA32F
ImageFormat::None, // RGB10A2
}};
ImageFormat::BC1, // BC1
ImageFormat::BC2, // BC2
ImageFormat::BC3, // BC3
ImageFormat::BC7, // BC7
};
static_assert(mapping.size() == static_cast<size_t>(Format::MaxCount));
return mapping[static_cast<size_t>(format)];
}
@ -226,6 +253,10 @@ u32 GPUTexture::GetPixelSize(GPUTexture::Format format)
8, // RGBA16F
16, // RGBA32F
4, // RGB10A2
8, // BC1 - 16 pixels in 64 bits
16, // BC2 - 16 pixels in 128 bits
16, // BC3 - 16 pixels in 128 bits
16, // BC4 - 16 pixels in 128 bits
}};
return sizes[static_cast<size_t>(format)];
@ -241,12 +272,6 @@ bool GPUTexture::IsDepthStencilFormat(Format format)
return (format == Format::D24S8 || format == Format::D32FS8);
}
bool GPUTexture::IsCompressedFormat(Format format)
{
// TODO: Implement me
return false;
}
bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format,
Flags flags, Error* error)
{
@ -318,6 +343,12 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u
return false;
}
if (IsCompressedFormat(format) && (type != Type::Texture || ((flags & Flags::AllowBindAsImage) != Flags::None)))
{
Error::SetStringView(error, "Compressed formats are only supported for textures.");
return false;
}
return true;
}

View File

@ -61,7 +61,11 @@ public:
RGBA16F,
RGBA32F,
RGB10A2,
MaxCount
BC1, ///< BC1, aka DXT1 compressed texture
BC2, ///< BC2, aka DXT2/3 compressed texture
BC3, ///< BC3, aka DXT4/5 compressed texture
BC7, ///< BC7, aka BPTC compressed texture
MaxCount,
};
enum class State : u8
@ -95,12 +99,13 @@ public:
static bool IsDepthFormat(Format format);
static bool IsDepthStencilFormat(Format format);
static bool IsCompressedFormat(Format format);
static u32 GetCompressedBytesPerBlock(Format format);
static u32 GetCompressedBlockSize(Format format);
static u32 GetBlockSize(Format format);
static u32 CalcUploadPitch(Format format, u32 width);
static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch);
static u32 CalcUploadSize(Format format, u32 height, u32 pitch);
static u32 GetFullMipmapCount(u32 width, u32 height);
static void CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, const void* src,
u32 src_pitch);
static Format GetTextureFormatForImageFormat(ImageFormat format);
static ImageFormat GetImageFormatForTextureFormat(Format format);
@ -160,8 +165,8 @@ public:
size_t GetVRAMUsage() const;
u32 GetCompressedBytesPerBlock() const;
u32 GetCompressedBlockSize() const;
bool IsCompressedFormat() const;
u32 GetBlockSize() const;
u32 CalcUploadPitch(u32 width) const;
u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
u32 CalcUploadSize(u32 height, u32 pitch) const;

View File

@ -46,6 +46,10 @@ static bool WebPBufferSaver(const Image& image, DynamicHeapArray<u8>* data, u8 q
static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error);
static bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error);
static bool DDSFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error);
namespace {
struct FormatHandler
{
const char* extension;
@ -54,12 +58,14 @@ struct FormatHandler
bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*);
bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*);
};
} // namespace
static constexpr FormatHandler s_format_handlers[] = {
{"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver},
{"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
{"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver},
{"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver},
{"dds", DDSBufferLoader, nullptr, DDSFileLoader, nullptr},
};
static const FormatHandler* GetFormatHandler(std::string_view extension)
@ -155,17 +161,19 @@ Image& Image::operator=(Image&& move)
const char* Image::GetFormatName(ImageFormat format)
{
static constexpr std::array<const char*, static_cast<size_t>(ImageFormat::MaxCount)> names = {
static constexpr std::array names = {
"None", // None
"RGBA8", // RGBA8
"BGRA8", // BGRA8
"RGB565", // RGB565
"RGB5551", // RGBA5551
"BGR8", // BGR8
"BC1", // BC1
"BC2", // BC2
"BC3", // BC3
"BC7", // BC7
};
static_assert(names.size() == static_cast<size_t>(ImageFormat::MaxCount));
return names[static_cast<size_t>(format)];
}
@ -178,6 +186,7 @@ u32 Image::GetPixelSize(ImageFormat format)
4, // BGRA8
2, // RGB565
2, // RGBA5551
3, // BGR8
8, // BC1 - 16 pixels in 64 bits
16, // BC2 - 16 pixels in 128 bits
16, // BC3 - 16 pixels in 128 bits
@ -563,6 +572,27 @@ std::optional<Image> Image::ConvertToRGBA8(Error* error) const
}
}
}
break;
case ImageFormat::BGR8:
{
ret = Image(m_width, m_height, ImageFormat::RGBA8);
for (u32 y = 0; y < m_height; y++)
{
const u8* pixels_in = GetRowPixels(y);
u8* pixels_out = ret->GetRowPixels(y);
for (u32 x = 0; x < m_width; x++)
{
// Set alpha channel to full intensity.
const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) |
(ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u);
std::memcpy(pixels_out, &rgba, sizeof(rgba));
pixels_in += 3;
pixels_out += sizeof(rgba);
}
}
}
break;
// TODO: Block format decompression
@ -1220,3 +1250,415 @@ bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp,
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// DDS Handler
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// From https://raw.githubusercontent.com/Microsoft/DirectXTex/master/DirectXTex/DDS.h
//
// This header defines constants and structures that are useful when parsing
// DDS files. DDS files were originally designed to use several structures
// and constants that are native to DirectDraw and are defined in ddraw.h,
// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar
// (compatible) constants and structures so that one can use DDS files
// without needing to include ddraw.h.
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// http://go.microsoft.com/fwlink/?LinkId=248926
#pragma pack(push, 1)
static constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS "
struct DDS_PIXELFORMAT
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwFourCC;
uint32_t dwRGBBitCount;
uint32_t dwRBitMask;
uint32_t dwGBitMask;
uint32_t dwBBitMask;
uint32_t dwABitMask;
};
#define DDS_FOURCC 0x00000004 // DDPF_FOURCC
#define DDS_RGB 0x00000040 // DDPF_RGB
#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS
#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE
#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS
#define DDS_ALPHA 0x00000002 // DDPF_ALPHA
#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8
#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS
#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV
#ifndef MAKEFOURCC
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \
((uint32_t)(uint8_t)(ch3) << 24))
#endif /* defined(MAKEFOURCC) */
#define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT
#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT
#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH
#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH
#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE
#define DDS_MAX_TEXTURE_SIZE 32768
// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION
enum DDS_RESOURCE_DIMENSION
{
DDS_DIMENSION_TEXTURE1D = 2,
DDS_DIMENSION_TEXTURE2D = 3,
DDS_DIMENSION_TEXTURE3D = 4,
};
struct DDS_HEADER
{
uint32_t dwSize;
uint32_t dwFlags;
uint32_t dwHeight;
uint32_t dwWidth;
uint32_t dwPitchOrLinearSize;
uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags
uint32_t dwMipMapCount;
uint32_t dwReserved1[11];
DDS_PIXELFORMAT ddspf;
uint32_t dwCaps;
uint32_t dwCaps2;
uint32_t dwCaps3;
uint32_t dwCaps4;
uint32_t dwReserved2;
};
struct DDS_HEADER_DXT10
{
uint32_t dxgiFormat;
uint32_t resourceDimension;
uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG
uint32_t arraySize;
uint32_t miscFlags2; // see DDS_MISC_FLAGS2
};
#pragma pack(pop)
static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch");
static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch");
constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000};
constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000};
constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000};
constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = {
sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000};
// End of Microsoft code from DDS.h.
static bool DDSPixelFormatMatches(const DDS_PIXELFORMAT& pf1, const DDS_PIXELFORMAT& pf2)
{
return std::tie(pf1.dwSize, pf1.dwFlags, pf1.dwFourCC, pf1.dwRGBBitCount, pf1.dwRBitMask, pf1.dwGBitMask,
pf1.dwGBitMask, pf1.dwBBitMask,
pf1.dwABitMask) == std::tie(pf2.dwSize, pf2.dwFlags, pf2.dwFourCC, pf2.dwRGBBitCount, pf2.dwRBitMask,
pf2.dwGBitMask, pf2.dwGBitMask, pf2.dwBBitMask, pf2.dwABitMask);
}
struct DDSLoadInfo
{
u32 block_size = 1;
u32 bytes_per_block = 4;
u32 width = 0;
u32 height = 0;
u32 mip_count = 0;
ImageFormat format = ImageFormat::RGBA8;
s64 base_image_offset = 0;
u32 base_image_size = 0;
u32 base_image_pitch = 0;
bool clear_alpha = false;
};
template<typename ReadFunction>
static bool ParseDDSHeader(const ReadFunction& RF, DDSLoadInfo* info, Error* error)
{
u32 magic;
if (!RF(&magic, sizeof(magic), error) || magic != DDS_MAGIC)
{
Error::AddPrefix(error, "Failed to read magic: ");
return false;
}
DDS_HEADER header;
u32 header_size = sizeof(header);
if (!RF(&header, header_size, error) || header.dwSize < header_size)
{
Error::AddPrefix(error, "Failed to read header: ");
return false;
}
// We should check for DDS_HEADER_FLAGS_TEXTURE here, but some tools don't seem
// to set it (e.g. compressonator). But we can still validate the size.
if (header.dwWidth == 0 || header.dwWidth >= DDS_MAX_TEXTURE_SIZE || header.dwHeight == 0 ||
header.dwHeight >= DDS_MAX_TEXTURE_SIZE)
{
Error::SetStringFmt(error, "Size is invalid: {}x{}", header.dwWidth, header.dwHeight);
return false;
}
// Image should be 2D.
if (header.dwFlags & DDS_HEADER_FLAGS_VOLUME)
{
Error::SetStringView(error, "Volume textures are not supported.");
return false;
}
// Presence of width/height fields is already tested by DDS_HEADER_FLAGS_TEXTURE.
info->width = header.dwWidth;
info->height = header.dwHeight;
// Check for mip levels.
if (header.dwFlags & DDS_HEADER_FLAGS_MIPMAP)
{
info->mip_count = header.dwMipMapCount;
if (header.dwMipMapCount != 0)
{
info->mip_count = header.dwMipMapCount;
}
else
{
const u32 max_dim = Common::PreviousPow2(std::max(header.dwWidth, header.dwHeight));
info->mip_count = (std::countr_zero(max_dim) + 1);
}
}
else
{
info->mip_count = 1;
}
// Handle fourcc formats vs uncompressed formats.
const bool has_fourcc = (header.ddspf.dwFlags & DDS_FOURCC) != 0;
if (has_fourcc)
{
// Handle DX10 extension header.
u32 dxt10_format = 0;
if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0'))
{
DDS_HEADER_DXT10 dxt10_header;
if (!RF(&dxt10_header, sizeof(dxt10_header), error))
{
Error::AddPrefix(error, "Failed to read DXT10 header: ");
return false;
}
// Can't handle array textures here. Doesn't make sense to use them, anyway.
if (dxt10_header.resourceDimension != DDS_DIMENSION_TEXTURE2D || dxt10_header.arraySize != 1)
{
Error::SetStringView(error, "Only 2D textures are supported.");
return false;
}
header_size += sizeof(dxt10_header);
dxt10_format = dxt10_header.dxgiFormat;
}
if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '1') || dxt10_format == 71)
{
info->format = ImageFormat::BC1;
info->block_size = 4;
info->bytes_per_block = 8;
}
else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '2') ||
header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '3') || dxt10_format == 74)
{
info->format = ImageFormat::BC2;
info->block_size = 4;
info->bytes_per_block = 16;
}
else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '4') ||
header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '5') || dxt10_format == 77)
{
info->format = ImageFormat::BC3;
info->block_size = 4;
info->bytes_per_block = 16;
}
else if (dxt10_format == 98)
{
info->format = ImageFormat::BC7;
info->block_size = 4;
info->bytes_per_block = 16;
}
else
{
Error::SetStringFmt(error, "Unknown format with FOURCC 0x{:08X} / DXT10 format {}", header.ddspf.dwFourCC,
dxt10_format);
return false;
}
}
else
{
if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8R8G8B8))
{
info->format = ImageFormat::BGRA8;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8R8G8B8))
{
info->format = ImageFormat::BGRA8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8B8G8R8))
{
info->format = ImageFormat::RGBA8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_R8G8B8))
{
info->format = ImageFormat::BGR8;
info->clear_alpha = true;
}
else if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8B8G8R8))
{
info->format = ImageFormat::RGBA8;
}
else
{
Error::SetStringFmt(error, "Unhandled format with FOURCC 0x{:08X}", header.ddspf.dwFourCC);
return false;
}
// All these formats are RGBA, just with byte swapping.
info->block_size = 1;
info->bytes_per_block = header.ddspf.dwRGBBitCount / 8;
}
// Mip levels smaller than the block size are padded to multiples of the block size.
const u32 blocks_wide = Common::AlignUpPow2(info->width, info->block_size) / info->block_size;
const u32 blocks_high = Common::AlignUpPow2(info->height, info->block_size) / info->block_size;
// Pitch can be specified in the header, otherwise we can derive it from the dimensions. For
// compressed formats, both DDS_HEADER_FLAGS_LINEARSIZE and DDS_HEADER_FLAGS_PITCH should be
// set. See https://msdn.microsoft.com/en-us/library/windows/desktop/bb943982(v=vs.85).aspx
if (header.dwFlags & DDS_HEADER_FLAGS_PITCH && header.dwFlags & DDS_HEADER_FLAGS_LINEARSIZE)
{
// Convert pitch (in bytes) to texels/row length.
if (header.dwPitchOrLinearSize < info->bytes_per_block)
{
// Likely a corrupted or invalid file.
Error::SetStringFmt(error, "Invalid pitch: {}", header.dwPitchOrLinearSize);
return false;
}
info->base_image_pitch = header.dwPitchOrLinearSize;
info->base_image_size = info->base_image_pitch * blocks_high;
}
else
{
// Assume no padding between rows of blocks.
info->base_image_pitch = blocks_wide * info->bytes_per_block;
info->base_image_size = info->base_image_pitch * blocks_high;
}
info->base_image_offset = sizeof(magic) + header_size;
#if 0
// D3D11 cannot handle block compressed textures where the first mip level is not a multiple of the block size.
if (mip_level == 0 && info.block_size > 1 && ((width % info.block_size) != 0 || (height % info.block_size) != 0))
{
Error::SetStringFmt(error,
"Invalid dimensions for DDS texture. For compressed textures of this format, "
"the width/height of the first mip level must be a multiple of {}.",
info.block_size);
return false;
}
#endif
return true;
}
bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* error)
{
const auto header_reader = [fp](void* buffer, size_t size, Error* error) {
if (std::fread(buffer, size, 1, fp) == 1)
return true;
Error::SetErrno(error, "fread() failed: ", errno);
return false;
};
DDSLoadInfo info;
if (!ParseDDSHeader(header_reader, &info, error))
return false;
// always load the base image
if (!FileSystem::FSeek64(fp, info.base_image_offset, SEEK_SET, error))
return false;
image->Resize(info.width, info.height, info.format, false);
const u32 blocks = image->GetBlockYCount();
if (image->GetPitch() != info.base_image_pitch)
{
for (u32 y = 0; y < blocks; y++)
{
if (std::fread(image->GetRowPixels(y), info.base_image_pitch, 1, fp) != 1)
{
Error::SetErrno(error, "fread() failed: ", errno);
return false;
}
}
}
else
{
if (std::fread(image->GetPixels(), info.base_image_pitch * blocks, 1, fp) != 1)
{
Error::SetErrno(error, "fread() failed: ", errno);
return false;
}
}
if (info.clear_alpha)
image->SetAllPixelsOpaque();
return true;
}
bool DDSBufferLoader(Image* image, std::span<const u8> data, Error* error)
{
size_t data_pos = 0;
const auto header_reader = [&data, &data_pos](void* buffer, size_t size, Error* error) {
if ((data_pos + size) > data.size())
{
Error::SetStringView(error, "Buffer does not contain sufficient data.");
return false;
}
std::memcpy(buffer, &data[data_pos], size);
data_pos += size;
return true;
};
DDSLoadInfo info;
if (!ParseDDSHeader(header_reader, &info, error))
return false;
if ((static_cast<u64>(info.base_image_offset) + info.base_image_size) > data.size())
{
Error::SetStringFmt(error, "Buffer does not contain complete base image.");
return false;
}
image->SetPixels(info.width, info.height, info.format, &data[static_cast<size_t>(info.base_image_offset)],
info.base_image_pitch);
if (info.clear_alpha)
image->SetAllPixelsOpaque();
return true;
}

View File

@ -21,6 +21,7 @@ enum class ImageFormat : u8
BGRA8,
RGB565,
RGBA5551,
BGR8,
BC1,
BC2,
BC3,

View File

@ -71,6 +71,11 @@ static constexpr std::array<MTLPixelFormat, static_cast<u32>(GPUTexture::Format:
MTLPixelFormatRGBA16Float, // RGBA16F
MTLPixelFormatRGBA32Float, // RGBA32F
MTLPixelFormatBGR10A2Unorm, // RGB10A2
MTLPixelFormatBC1_RGBA, // BC1
MTLPixelFormatBC2_RGBA, // BC2
MTLPixelFormatBC3_RGBA, // BC3
MTLPixelFormatBC7_RGBAUnorm, // BC7
};
static void LogNSError(NSError* error, std::string_view message)
@ -385,6 +390,10 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.pipeline_cache = true;
m_features.prefer_unused_textures = true;
// Same feature bit for both.
m_features.dxt_textures = m_features.bptc_textures =
!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && m_device.supportsBCTextureCompression;
// Disable pipeline cache on Intel, apparently it's buggy.
if ([[m_device name] containsString:@"Intel"])
{
@ -995,8 +1004,8 @@ MetalTexture::~MetalTexture()
bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
u32 level /*= 0*/)
{
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = CalcUploadSize(height, aligned_pitch);
GPUDevice::GetStatistics().buffer_streamed += req_size;
GPUDevice::GetStatistics().num_uploads++;
@ -1013,7 +1022,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options];
actual_offset = 0;
actual_pitch = pitch;
if (actual_buffer == nil)
if (actual_buffer == nil) [[unlikely]]
{
Panic("Failed to allocate temporary buffer.");
return false;
@ -1026,7 +1035,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
{
dev.SubmitCommandBuffer();
if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) [[unlikely]]
{
Panic("Failed to reserve texture upload space.");
return false;
@ -1034,7 +1043,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data,
}
actual_offset = sb.GetCurrentOffset();
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
CopyTextureDataForUpload(width, height, m_format, sb.GetCurrentHostPointer(), aligned_pitch, data, pitch);
sb.CommitMemory(req_size);
actual_buffer = sb.GetBuffer();
actual_pitch = aligned_pitch;
@ -1065,8 +1074,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
return false;
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = CalcUploadSize(height, aligned_pitch);
MetalDevice& dev = MetalDevice::GetInstance();
if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
@ -1097,8 +1106,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
void MetalTexture::Unmap()
{
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = m_map_height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
GPUDevice::GetStatistics().buffer_streamed += req_size;
GPUDevice::GetStatistics().num_uploads++;
@ -1488,6 +1497,11 @@ bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const
if (![m_device supportsFamily:MTLGPUFamilyApple2])
return false;
}
else if (format >= GPUTexture::Format::BC1 && format <= GPUTexture::Format::BC7)
{
if (!m_device.supportsBCTextureCompression)
return false;
}
return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid);
}

View File

@ -506,6 +506,12 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
m_features.shader_cache = false;
m_features.dxt_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && GLAD_GL_EXT_texture_compression_s3tc);
m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) &&
(GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc));
m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary;
if (m_features.pipeline_cache)
{

View File

@ -58,6 +58,10 @@ const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(G
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
{GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
}};
// GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol.
@ -88,6 +92,10 @@ const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(G
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
{GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3
{GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7
}};
return gles ? mapping_gles[static_cast<u32>(format)] : mapping[static_cast<u32>(format)];
@ -169,6 +177,7 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
else
{
const bool use_texture_storage = UseTextureStorage(false);
const bool is_compressed = IsCompressedFormat(format);
if (use_texture_storage)
{
if (layers > 1)
@ -183,10 +192,10 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
const u32 alignment = GetUploadAlignment(data_pitch);
if (data)
{
GPUDevice::GetStatistics().buffer_streamed += data_pitch * height;
GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, data_pitch);
GPUDevice::GetStatistics().num_uploads++;
glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size);
glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(format, data_pitch));
if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
}
@ -197,19 +206,56 @@ std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32
for (u32 i = 0; i < levels; i++)
{
if (use_texture_storage)
{
if (is_compressed)
{
const u32 size = CalcUploadSize(format, current_height, data_pitch);
if (layers > 1)
{
glCompressedTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, size,
data_ptr);
}
else
{
glCompressedTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, size, data_ptr);
}
}
else
{
if (layers > 1)
glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr);
else
glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr);
}
}
else
{
if (is_compressed)
{
const u32 size = CalcUploadSize(format, current_height, data_pitch);
if (layers > 1)
{
glCompressedTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, size,
data_ptr);
}
else
{
glCompressedTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, size, data_ptr);
}
}
else
{
if (layers > 1)
{
glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type,
data_ptr);
}
else
glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr);
{
glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type,
data_ptr);
}
}
}
if (data_ptr)
@ -257,14 +303,11 @@ void OpenGLTexture::CommitClear()
bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
u32 level /*= 0*/)
{
// TODO: perf counters
// Worth using the PBO? Driver probably knows better...
const GLenum target = GetGLTarget();
const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
const u32 pixel_size = GetPixelSize();
const u32 preferred_pitch = Common::AlignUpPow2(static_cast<u32>(width) * pixel_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 map_size = preferred_pitch * static_cast<u32>(height);
const u32 preferred_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 map_size = CalcUploadSize(height, pitch);
OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();
CommitClear();
@ -283,8 +326,22 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size);
glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data);
glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));
if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(height, pitch);
if (IsTextureArray())
glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, data);
else
glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, data);
}
else
{
if (IsTextureArray())
glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, data);
else
glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, data);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
if (alignment != DEFAULT_UPLOAD_ALIGNMENT)
@ -293,13 +350,39 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
else
{
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * pixel_size, height);
CopyTextureDataForUpload(width, height, m_format, map.pointer, preferred_pitch, data, pitch);
sb->Unmap(map_size);
sb->Bind();
glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / pixel_size);
glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type,
glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(preferred_pitch));
if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(height, pitch);
if (IsTextureArray())
{
glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
else
{
glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
}
else
{
if (IsTextureArray())
{
glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
else
{
glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
sb->Unbind();
@ -315,8 +398,8 @@ bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
return false;
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * static_cast<u32>(height);
const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = CalcUploadSize(height, pitch);
OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer();
if (!sb || upload_size > sb->GetSize())
return false;
@ -339,8 +422,8 @@ void OpenGLTexture::Unmap()
{
CommitClear();
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = pitch * static_cast<u32>(m_map_height);
const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 upload_size = CalcUploadSize(m_map_height, pitch);
GPUDevice::GetStatistics().buffer_streamed += upload_size;
GPUDevice::GetStatistics().num_uploads++;
@ -354,9 +437,25 @@ void OpenGLTexture::Unmap()
const GLenum target = GetGLTarget();
glBindTexture(target, m_id);
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize());
glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch));
const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES());
if (IsCompressedFormat())
{
const u32 size = CalcUploadSize(m_map_height, pitch);
if (IsTextureArray())
{
glCompressedTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1,
gl_format, size, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
}
else
{
glCompressedTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, size,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
}
}
else
{
if (IsTextureArray())
{
glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format,
@ -367,6 +466,7 @@ void OpenGLTexture::Unmap()
glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
}
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);

View File

@ -96,6 +96,10 @@ const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> Vulka
VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F
VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F
VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2
VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // BC1
VK_FORMAT_BC2_UNORM_BLOCK, // BC2
VK_FORMAT_BC3_UNORM_BLOCK, // BC3
VK_FORMAT_BC7_UNORM_BLOCK, // BC7
};
// Handles are always 64-bit, even on 32-bit platforms.
@ -640,6 +644,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay
enabled_features.sampleRateShading = available_features.sampleRateShading;
enabled_features.geometryShader = available_features.geometryShader;
enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
enabled_features.textureCompressionBC = available_features.textureCompressionBC;
device_info.pEnabledFeatures = &enabled_features;
VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
@ -2456,6 +2461,10 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
m_features.raster_order_views =
(!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics &&
m_optional_extensions.vk_ext_fragment_shader_interlock);
// Same feature bit for both.
m_features.dxt_textures = m_features.bptc_textures =
(!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && vk_features.textureCompressionBC);
}
void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,

View File

@ -230,20 +230,13 @@ VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate()
return dev.GetCurrentInitCommandBuffer();
}
void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch,
u32 upload_pitch) const
{
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height);
}
VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width,
u32 height) const
u32 height, u32 buffer_size) const
{
const u32 size = upload_pitch * height;
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr,
0,
static_cast<VkDeviceSize>(size),
static_cast<VkDeviceSize>(buffer_size),
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_SHARING_MODE_EXCLUSIVE,
0,
@ -270,8 +263,8 @@ VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch,
VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation);
// And write the data.
CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch);
vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size);
CopyTextureDataForUpload(width, height, m_format, ai.pMappedData, upload_pitch, data, pitch);
vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, buffer_size);
return buffer;
}
@ -282,7 +275,7 @@ void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 w
if (old_layout != Layout::TransferDst)
TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst);
const u32 row_length = pitch / GetPixelSize();
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
const VkBufferImageCopy bic = {static_cast<VkDeviceSize>(buffer_offset),
row_length,
@ -302,8 +295,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
DebugAssert(layer < m_layers && level < m_levels);
DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level));
const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment());
const u32 required_size = height * upload_pitch;
const u32 upload_pitch =
Common::AlignUpPow2(CalcUploadPitch(width), VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment());
const u32 required_size = CalcUploadSize(height, upload_pitch);
VulkanDevice& dev = VulkanDevice::GetInstance();
VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer();
@ -314,7 +308,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
if (required_size > (sbuffer.GetCurrentSize() / 2))
{
buffer_offset = 0;
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height);
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size);
if (buffer == VK_NULL_HANDLE)
return false;
}
@ -332,7 +326,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data
buffer = sbuffer.GetBuffer();
buffer_offset = sbuffer.GetCurrentOffset();
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch);
CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch);
sbuffer.CommitMemory(required_size);
}
@ -372,8 +366,8 @@ bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3
CommitClear(GetCommandBufferForUpdate());
// see note in Update() for the reason why.
const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = CalcUploadSize(height, aligned_pitch);
VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer();
if (req_size >= (buffer.GetCurrentSize() / 2))
return false;
@ -402,8 +396,8 @@ void VulkanTexture::Unmap()
{
VulkanDevice& dev = VulkanDevice::GetInstance();
VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer();
const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = m_map_height * aligned_pitch;
const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), dev.GetBufferCopyRowPitchAlignment());
const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch);
const u32 offset = sb.GetCurrentOffset();
sb.CommitMemory(req_size);

View File

@ -85,8 +85,8 @@ private:
VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format);
VkCommandBuffer GetCommandBufferForUpdate();
void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const;
VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const;
VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height,
u32 buffer_size) const;
void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch,
VkBuffer buffer, u32 buffer_offset);