GS: Support BC1/BC2/BC3/BC7 compressed texture formats

This commit is contained in:
Connor McLaughlin 2022-02-20 22:32:05 +10:00 committed by refractionpcsx2
parent 69a500d657
commit 32dc68f103
11 changed files with 149 additions and 7 deletions

View File

@ -486,6 +486,7 @@ namespace Vulkan
m_device_features.largePoints = available_features.largePoints;
m_device_features.wideLines = available_features.wideLines;
m_device_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
m_device_features.textureCompressionBC = available_features.textureCompressionBC;
return true;
}

View File

@ -542,6 +542,8 @@ public:
bool point_expand : 1; ///< Supports point expansion in hardware without using geometry shaders.
bool line_expand : 1; ///< Supports line expansion in hardware without using geometry shaders.
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
FeatureSupport()
{
memset(this, 0, sizeof(*this));

View File

@ -59,7 +59,7 @@ bool GSTexture::Save(const std::string& fn)
GSMap map;
if (!g_gs_device->DownloadTexture(this, GSVector4i(0, 0, m_size.x, m_size.y), map))
{
Console.Error("(GSTextureVK) DownloadTexture() failed.");
Console.Error("(GSTexture) DownloadTexture() failed.");
return false;
}
@ -88,6 +88,47 @@ void GSTexture::Swap(GSTexture* tex)
std::swap(OffsetHack_mody, tex->OffsetHack_mody);
}
u32 GSTexture::GetCompressedBytesPerBlock() const
{
static constexpr u32 bytes_per_block[] = {
1, // Invalid
4, // Color/RGBA8
16, // FloatColor/RGBA32F
32, // DepthStencil
1, // UNorm8/R8
2, // UInt16/R16UI
4, // UInt32/R32UI
4, // Int32/R32I
8, // BC1 - 16 pixels in 64 bits
16, // BC2 - 16 pixels in 128 bits
16, // BC3 - 16 pixels in 128 bits
16, // BC4 - 16 pixels in 128 bits
};
return bytes_per_block[static_cast<u32>(m_format)];
}
u32 GSTexture::GetCompressedBlockSize() const
{
if (m_format >= Format::BC1 && m_format <= Format::BC7)
return 4;
else
return 1;
}
u32 GSTexture::CalcUploadRowLengthFromPitch(u32 pitch) const
{
const u32 block_size = GetCompressedBlockSize();
const u32 bytes_per_block = GetCompressedBytesPerBlock();
return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size;
}
u32 GSTexture::CalcUploadSize(u32 height, u32 pitch) const
{
const u32 block_size = GetCompressedBlockSize();
return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
}
void GSTexture::GenerateMipmapsIfNeeded()
{
if (!m_needs_mipmaps_generated || m_mipmap_levels <= 1)

View File

@ -47,6 +47,10 @@ public:
UInt16, ///< UInt16 texture for reading back 16-bit depth
UInt32, ///< UInt32 texture for reading back 24 and 32-bit depth
Int32, ///< Int32 texture for date emulation
BC1, ///< BC1, aka DXT1 compressed texture for replacements
BC2, ///< BC2, aka DXT2/3 compressed texture for replacements
BC3, ///< BC3, aka DXT4/5 compressed texture for replacements
BC7, ///< BC7, aka BPTC compressed texture for replacements
};
enum class State : u8
@ -100,6 +104,12 @@ public:
Type GetType() const { return m_type; }
Format GetFormat() const { return m_format; }
bool IsCompressedFormat() const { return (m_format >= Format::BC1 && m_format <= Format::BC7); }
u32 GetCompressedBytesPerBlock() const;
u32 GetCompressedBlockSize() const;
u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
u32 CalcUploadSize(u32 height, u32 pitch) const;
bool IsRenderTargetOrDepthStencil() const
{

View File

@ -28,6 +28,15 @@
#include <VersionHelpers.h>
#include <d3dcompiler.h>
static bool SupportsTextureFormat(ID3D11Device* dev, DXGI_FORMAT format)
{
UINT support;
if (FAILED(dev->CheckFormatSupport(DXGI_FORMAT_BC1_UNORM, &support)))
return false;
return (support & D3D11_FORMAT_SUPPORT_TEXTURE2D) != 0;
}
GSDevice11::GSDevice11()
{
memset(&m_state, 0, sizeof(m_state));
@ -42,6 +51,8 @@ GSDevice11::GSDevice11()
m_features.point_expand = false;
m_features.line_expand = false;
m_features.prefer_new_textures = false;
m_features.dxt_textures = false;
m_features.bptc_textures = false;
}
bool GSDevice11::Create(HostDisplay* display)
@ -113,6 +124,8 @@ bool GSDevice11::Create(HostDisplay* display)
m_features.broken_point_sampler = amd_vendor;
}
SetFeatures();
std::optional<std::string> shader = Host::ReadResourceFileToString("shaders/dx11/tfx.fx");
if (!shader.has_value())
return false;
@ -324,6 +337,16 @@ bool GSDevice11::Create(HostDisplay* display)
return true;
}
void GSDevice11::SetFeatures()
{
// Check all three formats, since the feature means any can be used.
m_features.dxt_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC1_UNORM) &&
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC2_UNORM) &&
SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC3_UNORM);
m_features.bptc_textures = SupportsTextureFormat(m_dev.get(), DXGI_FORMAT_BC7_UNORM);
}
void GSDevice11::ResetAPIState()
{
// Clear out the GS, since the imgui draw doesn't get rid of it.
@ -433,6 +456,10 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height
case GSTexture::Format::UInt16: dxformat = DXGI_FORMAT_R16_UINT; break;
case GSTexture::Format::UInt32: dxformat = DXGI_FORMAT_R32_UINT; break;
case GSTexture::Format::Int32: dxformat = DXGI_FORMAT_R32_SINT; break;
case GSTexture::Format::BC1: dxformat = DXGI_FORMAT_BC1_UNORM; break;
case GSTexture::Format::BC2: dxformat = DXGI_FORMAT_BC2_UNORM; break;
case GSTexture::Format::BC3: dxformat = DXGI_FORMAT_BC3_UNORM; break;
case GSTexture::Format::BC7: dxformat = DXGI_FORMAT_BC7_UNORM; break;
case GSTexture::Format::Invalid:
ASSERT(0);
dxformat = DXGI_FORMAT_UNKNOWN;

View File

@ -117,6 +117,8 @@ private:
float m_hack_topleft_offset;
int m_d3d_texsize;
void SetFeatures();
GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) final;
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) final;

View File

@ -83,6 +83,12 @@ namespace Emulate_DSA
glTexSubImage2D(GL_TEXTURE_2D, level, xoffset, yoffset, width, height, format, type, pixels);
}
void APIENTRY CompressedTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data)
{
BindTextureUnit(7, texture);
glCompressedTexSubImage2D(GL_TEXTURE_2D, level, xoffset, yoffset, width, height, format, imageSize, data);
}
void APIENTRY GetTexureImage(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels)
{
BindTextureUnit(7, texture);
@ -121,6 +127,7 @@ namespace Emulate_DSA
glCreateTextures = CreateTexture;
glTextureStorage2D = TextureStorage;
glTextureSubImage2D = TextureSubImage;
glCompressedTextureSubImage2D = CompressedTextureSubImage;
glGetTextureImage = GetTexureImage;
glTextureParameteri = TextureParameteri;

View File

@ -232,6 +232,8 @@ bool GSDeviceOGL::Create(HostDisplay* display)
m_features.texture_barrier = true;
m_features.provoking_vertex_last = true;
m_features.prefer_new_textures = false;
m_features.dxt_textures = GL_EXT_texture_compression_s3tc;
m_features.bptc_textures = GL_VERSION_4_2 || GL_ARB_texture_compression_bptc || GL_EXT_texture_compression_bptc;
GLint point_range[2] = {};
GLint line_range[2] = {};

View File

@ -240,6 +240,34 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
m_int_shift = 3; // 4 bytes for depth + 4 bytes for stencil by texels
break;
case Format::BC1:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC2:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC3:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC7:
gl_fmt = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_format = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::Invalid:
m_int_format = 0;
m_int_type = 0;
@ -286,6 +314,14 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
SetGpuPageSize(GSVector2i(127, 127));
break;
case GSTexture::Format::BC1:
case GSTexture::Format::BC2:
case GSTexture::Format::BC3:
case GSTexture::Format::BC7:
m_sparse = false;
SetGpuPageSize(GSVector2i(127, 127));
break;
case Format::Invalid:
ASSERT(0);
}
@ -399,7 +435,15 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
// Don't use PBOs for huge texture uploads, let the driver sort it out.
// Otherwise we'll just be syncing, or worse, crashing because the PBO routine above isn't great.
if (map_size >= PboPool::m_seg_size)
if (IsCompressedFormat())
{
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
const u32 upload_size = CalcUploadSize(r.height(), pitch);
glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length);
glCompressedTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, upload_size, data);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
else if (map_size >= PboPool::m_seg_size)
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
@ -437,7 +481,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
{
if (layer >= m_mipmap_levels)
if (layer >= m_mipmap_levels || IsCompressedFormat())
return false;
GSVector4i r = _r ? *_r : GSVector4i(0, 0, m_size.x, m_size.y);

View File

@ -280,6 +280,8 @@ bool GSDeviceVK::CheckFeatures()
}
}
m_features.dxt_textures = g_vulkan_context->GetDeviceFeatures().textureCompressionBC;
m_features.bptc_textures = g_vulkan_context->GetDeviceFeatures().textureCompressionBC;
return true;
}

View File

@ -132,7 +132,7 @@ std::unique_ptr<GSTextureVK> GSTextureVK::Create(Type type, u32 width, u32 heigh
VkFormat GSTextureVK::LookupNativeFormat(Format format)
{
static constexpr std::array<VkFormat, static_cast<int>(GSTexture::Format::Int32) + 1> s_format_mapping = {{
static constexpr std::array<VkFormat, static_cast<int>(GSTexture::Format::BC7) + 1> s_format_mapping = {{
VK_FORMAT_UNDEFINED, // Invalid
VK_FORMAT_R8G8B8A8_UNORM, // Color
VK_FORMAT_R32G32B32A32_SFLOAT, // FloatColor
@ -141,6 +141,10 @@ VkFormat GSTextureVK::LookupNativeFormat(Format format)
VK_FORMAT_R16_UINT, // UInt16
VK_FORMAT_R32_UINT, // UInt32
VK_FORMAT_R32_SFLOAT, // Int32
VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // BC1
VK_FORMAT_BC2_UNORM_BLOCK, // BC2
VK_FORMAT_BC3_UNORM_BLOCK, // BC3
VK_FORMAT_BC7_UNORM_BLOCK, // BC7
}};
@ -201,8 +205,8 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
const u32 width = r.width();
const u32 height = r.height();
const u32 row_length = static_cast<u32>(pitch) / Vulkan::Util::GetTexelSize(m_texture.GetFormat());
const u32 required_size = static_cast<u32>(pitch) * height;
const u32 row_length = CalcUploadRowLengthFromPitch(pitch);
const u32 required_size = CalcUploadSize(height, pitch);
// If the texture is larger than half our streaming buffer size, use a separate buffer.
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
@ -262,7 +266,7 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
bool GSTextureVK::Map(GSMap& m, const GSVector4i* r, int layer)
{
if (layer >= m_mipmap_levels)
if (layer >= m_mipmap_levels || IsCompressedFormat())
return false;
// map for writing