OGL: Implement GPU texture decoding backend
This commit is contained in:
parent
79ba946d70
commit
b01bcb80f4
|
@ -606,6 +606,13 @@ Renderer::Renderer()
|
||||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||||
g_ogl_config.max_samples = 1;
|
g_ogl_config.max_samples = 1;
|
||||||
|
|
||||||
|
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||||
|
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||||
|
// enabled in the version check below.
|
||||||
|
g_Config.backend_info.bSupportsGPUTextureDecoding =
|
||||||
|
g_Config.backend_info.bSupportsPaletteConversion &&
|
||||||
|
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
|
||||||
|
|
||||||
if (g_ogl_config.bSupportsDebug)
|
if (g_ogl_config.bSupportsDebug)
|
||||||
{
|
{
|
||||||
if (GLExtensions::Supports("GL_KHR_debug"))
|
if (GLExtensions::Supports("GL_KHR_debug"))
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "VideoBackends/OGL/TextureConverter.h"
|
#include "VideoBackends/OGL/TextureConverter.h"
|
||||||
|
|
||||||
#include "VideoCommon/ImageWrite.h"
|
#include "VideoCommon/ImageWrite.h"
|
||||||
|
#include "VideoCommon/TextureConversionShader.h"
|
||||||
#include "VideoCommon/TextureDecoder.h"
|
#include "VideoCommon/TextureDecoder.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
|
@ -49,6 +50,24 @@ static GLuint s_palette_buffer_offset_uniform[3];
|
||||||
static GLuint s_palette_multiplier_uniform[3];
|
static GLuint s_palette_multiplier_uniform[3];
|
||||||
static GLuint s_palette_copy_position_uniform[3];
|
static GLuint s_palette_copy_position_uniform[3];
|
||||||
|
|
||||||
|
struct TextureDecodingProgramInfo
|
||||||
|
{
|
||||||
|
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
|
||||||
|
SHADER program;
|
||||||
|
GLint uniform_dst_size = -1;
|
||||||
|
GLint uniform_src_size = -1;
|
||||||
|
GLint uniform_src_row_stride = -1;
|
||||||
|
GLint uniform_src_offset = -1;
|
||||||
|
GLint uniform_palette_offset = -1;
|
||||||
|
bool valid = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
|
||||||
|
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
|
||||||
|
s_texture_decoding_buffer_views;
|
||||||
|
static void CreateTextureDecodingResources();
|
||||||
|
static void DestroyTextureDecodingResources();
|
||||||
|
|
||||||
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
|
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
|
||||||
int virtual_height, unsigned int level)
|
int virtual_height, unsigned int level)
|
||||||
{
|
{
|
||||||
|
@ -285,26 +304,31 @@ TextureCache::TextureCache()
|
||||||
|
|
||||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||||
{
|
{
|
||||||
s32 buffer_size = 1024 * 1024;
|
s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
|
||||||
|
s32 buffer_size = buffer_size_mb * 1024 * 1024;
|
||||||
s32 max_buffer_size = 0;
|
s32 max_buffer_size = 0;
|
||||||
|
|
||||||
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
|
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
|
||||||
// is 65KB, we are asking for a 1MB buffer here.
|
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
|
||||||
// Make sure to check the maximum size and if it is below 1MB
|
// texture decoding is enabled, in which case the requested size is 32MB.
|
||||||
// then use the maximum the hardware supports instead.
|
|
||||||
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
|
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
|
||||||
|
|
||||||
|
// Clamp the buffer size to the maximum size that the driver supports.
|
||||||
buffer_size = std::min(buffer_size, max_buffer_size);
|
buffer_size = std::min(buffer_size, max_buffer_size);
|
||||||
|
|
||||||
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
|
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
|
||||||
glGenTextures(1, &s_palette_resolv_texture);
|
glGenTextures(1, &s_palette_resolv_texture);
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
|
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
|
||||||
|
|
||||||
|
CreateTextureDecodingResources();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCache::~TextureCache()
|
TextureCache::~TextureCache()
|
||||||
{
|
{
|
||||||
DeleteShaders();
|
DeleteShaders();
|
||||||
|
DestroyTextureDecodingResources();
|
||||||
|
|
||||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||||
{
|
{
|
||||||
|
@ -606,4 +630,150 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
|
||||||
FramebufferManager::SetFramebuffer(0);
|
FramebufferManager::SetFramebuffer(0);
|
||||||
g_renderer->RestoreAPIState();
|
g_renderer->RestoreAPIState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const std::string decoding_vertex_shader = R"(
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
|
||||||
|
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
void CreateTextureDecodingResources()
|
||||||
|
{
|
||||||
|
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
|
||||||
|
GL_R8UI, // BUFFER_FORMAT_R8_UINT
|
||||||
|
GL_R16UI, // BUFFER_FORMAT_R16_UINT
|
||||||
|
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
|
||||||
|
};
|
||||||
|
|
||||||
|
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||||
|
s_texture_decoding_buffer_views.data());
|
||||||
|
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
|
||||||
|
{
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
|
||||||
|
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DestroyTextureDecodingResources()
|
||||||
|
{
|
||||||
|
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||||
|
s_texture_decoding_buffer_views.data());
|
||||||
|
s_texture_decoding_buffer_views.fill(0);
|
||||||
|
s_texture_decoding_program_info.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
|
||||||
|
{
|
||||||
|
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||||
|
auto iter = s_texture_decoding_program_info.find(key);
|
||||||
|
if (iter != s_texture_decoding_program_info.end())
|
||||||
|
return iter->second.valid;
|
||||||
|
|
||||||
|
TextureDecodingProgramInfo info;
|
||||||
|
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
|
||||||
|
if (!info.base_info)
|
||||||
|
{
|
||||||
|
s_texture_decoding_program_info.emplace(key, info);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string shader_source =
|
||||||
|
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
|
||||||
|
if (shader_source.empty())
|
||||||
|
{
|
||||||
|
s_texture_decoding_program_info.emplace(key, info);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
|
||||||
|
{
|
||||||
|
s_texture_decoding_program_info.emplace(key, info);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
|
||||||
|
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
|
||||||
|
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
|
||||||
|
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
|
||||||
|
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
|
||||||
|
info.valid = true;
|
||||||
|
s_texture_decoding_program_info.emplace(key, info);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
|
||||||
|
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||||
|
u32 aligned_width, u32 aligned_height, u32 row_stride,
|
||||||
|
const u8* palette, TlutFormat palette_format)
|
||||||
|
{
|
||||||
|
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||||
|
auto iter = s_texture_decoding_program_info.find(key);
|
||||||
|
if (iter == s_texture_decoding_program_info.end())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Copy to GPU-visible buffer, aligned to the data type.
|
||||||
|
auto info = iter->second;
|
||||||
|
u32 bytes_per_buffer_elem =
|
||||||
|
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
|
||||||
|
|
||||||
|
// Only copy palette if it is required.
|
||||||
|
bool has_palette = info.base_info->palette_size > 0;
|
||||||
|
u32 total_upload_size = static_cast<u32>(data_size);
|
||||||
|
u32 palette_offset = total_upload_size;
|
||||||
|
if (has_palette)
|
||||||
|
{
|
||||||
|
// Align to u16.
|
||||||
|
if ((total_upload_size % sizeof(u16)) != 0)
|
||||||
|
{
|
||||||
|
total_upload_size++;
|
||||||
|
palette_offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
total_upload_size += info.base_info->palette_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate space in stream buffer, and copy texture + palette across.
|
||||||
|
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
|
||||||
|
memcpy(buffer.first, data, data_size);
|
||||||
|
if (has_palette)
|
||||||
|
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
|
||||||
|
s_palette_stream_buffer->Unmap(total_upload_size);
|
||||||
|
|
||||||
|
info.program.Bind();
|
||||||
|
|
||||||
|
// Calculate stride in buffer elements
|
||||||
|
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
|
||||||
|
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
|
||||||
|
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
|
||||||
|
if (info.uniform_dst_size >= 0)
|
||||||
|
glUniform2ui(info.uniform_dst_size, width, height);
|
||||||
|
if (info.uniform_src_size >= 0)
|
||||||
|
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
|
||||||
|
if (info.uniform_src_offset >= 0)
|
||||||
|
glUniform1ui(info.uniform_src_offset, offset_in_elements);
|
||||||
|
if (info.uniform_src_row_stride >= 0)
|
||||||
|
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
|
||||||
|
if (info.uniform_palette_offset >= 0)
|
||||||
|
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE9);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
|
||||||
|
|
||||||
|
if (has_palette)
|
||||||
|
{
|
||||||
|
// Use an R16UI view for the palette.
|
||||||
|
glActiveTexture(GL_TEXTURE10);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
|
||||||
|
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
|
||||||
|
GL_WRITE_ONLY, GL_RGBA8);
|
||||||
|
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
|
||||||
|
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
|
||||||
|
|
||||||
|
TextureCache::SetStage();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,12 @@ public:
|
||||||
static void DisableStage(unsigned int stage);
|
static void DisableStage(unsigned int stage);
|
||||||
static void SetStage();
|
static void SetStage();
|
||||||
|
|
||||||
|
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
|
||||||
|
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
|
||||||
|
TextureFormat format, u32 width, u32 height, u32 aligned_width,
|
||||||
|
u32 aligned_height, u32 row_stride, const u8* palette,
|
||||||
|
TlutFormat palette_format) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct TCacheEntry : TCacheEntryBase
|
struct TCacheEntry : TCacheEntryBase
|
||||||
{
|
{
|
||||||
|
|
|
@ -108,7 +108,11 @@ void VideoBackend::InitBackendInfo()
|
||||||
g_Config.backend_info.bSupportsReversedDepthRange = true;
|
g_Config.backend_info.bSupportsReversedDepthRange = true;
|
||||||
g_Config.backend_info.bSupportsMultithreading = false;
|
g_Config.backend_info.bSupportsMultithreading = false;
|
||||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
|
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
|
||||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
|
||||||
|
// TODO: There is a bug here, if texel buffers are not supported the graphics options
|
||||||
|
// will show the option when it is not supported. The only way around this would be
|
||||||
|
// creating a context when calling this function to determine what is available.
|
||||||
|
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
|
||||||
|
|
||||||
// Overwritten in Render.cpp later
|
// Overwritten in Render.cpp later
|
||||||
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
||||||
|
|
Loading…
Reference in New Issue