OGL: Implement GPU texture decoding backend

This commit is contained in:
Stenzek 2016-11-27 18:15:00 +10:00
parent 79ba946d70
commit b01bcb80f4
4 changed files with 193 additions and 6 deletions

View File

@ -606,6 +606,13 @@ Renderer::Renderer()
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
// enabled in the version check below.
g_Config.backend_info.bSupportsGPUTextureDecoding =
g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))

View File

@ -23,6 +23,7 @@
#include "VideoBackends/OGL/TextureConverter.h"
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoConfig.h"
@ -49,6 +50,24 @@ static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3];
struct TextureDecodingProgramInfo
{
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
SHADER program;
GLint uniform_dst_size = -1;
GLint uniform_src_size = -1;
GLint uniform_src_row_stride = -1;
GLint uniform_src_offset = -1;
GLint uniform_palette_offset = -1;
bool valid = false;
};
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
s_texture_decoding_buffer_views;
static void CreateTextureDecodingResources();
static void DestroyTextureDecodingResources();
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
int virtual_height, unsigned int level)
{
@ -285,26 +304,31 @@ TextureCache::TextureCache()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
s32 buffer_size = 1024 * 1024;
s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
s32 buffer_size = buffer_size_mb * 1024 * 1024;
s32 max_buffer_size = 0;
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
// is 65KB, we are asking for a 1MB buffer here.
// Make sure to check the maximum size and if it is below 1MB
// then use the maximum the hardware supports instead.
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
// texture decoding is enabled, in which case the requested size is 32MB.
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
// Clamp the buffer size to the maximum size that the driver supports.
buffer_size = std::min(buffer_size, max_buffer_size);
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
glGenTextures(1, &s_palette_resolv_texture);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
CreateTextureDecodingResources();
}
}
TextureCache::~TextureCache()
{
DeleteShaders();
DestroyTextureDecodingResources();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
@ -606,4 +630,150 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState();
}
static const std::string decoding_vertex_shader = R"(
void main()
{
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
}
)";
void CreateTextureDecodingResources()
{
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
GL_R8UI, // BUFFER_FORMAT_R8_UINT
GL_R16UI, // BUFFER_FORMAT_R16_UINT
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
};
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
{
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
}
}
void DestroyTextureDecodingResources()
{
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
s_texture_decoding_buffer_views.fill(0);
s_texture_decoding_program_info.clear();
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter != s_texture_decoding_program_info.end())
return iter->second.valid;
TextureDecodingProgramInfo info;
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
if (!info.base_info)
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
if (shader_source.empty())
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
info.valid = true;
s_texture_decoding_program_info.emplace(key, info);
return true;
}
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter == s_texture_decoding_program_info.end())
return;
// Copy to GPU-visible buffer, aligned to the data type.
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
// Only copy palette if it is required.
bool has_palette = info.base_info->palette_size > 0;
u32 total_upload_size = static_cast<u32>(data_size);
u32 palette_offset = total_upload_size;
if (has_palette)
{
// Align to u16.
if ((total_upload_size % sizeof(u16)) != 0)
{
total_upload_size++;
palette_offset++;
}
total_upload_size += info.base_info->palette_size;
}
// Allocate space in stream buffer, and copy texture + palette across.
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
memcpy(buffer.first, data, data_size);
if (has_palette)
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
s_palette_stream_buffer->Unmap(total_upload_size);
info.program.Bind();
// Calculate stride in buffer elements
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
if (info.uniform_dst_size >= 0)
glUniform2ui(info.uniform_dst_size, width, height);
if (info.uniform_src_size >= 0)
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
if (info.uniform_src_offset >= 0)
glUniform1ui(info.uniform_src_offset, offset_in_elements);
if (info.uniform_src_row_stride >= 0)
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
if (info.uniform_palette_offset >= 0)
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
if (has_palette)
{
// Use an R16UI view for the palette.
glActiveTexture(GL_TEXTURE10);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
}
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
TextureCache::SetStage();
}
}

View File

@ -23,6 +23,12 @@ public:
static void DisableStage(unsigned int stage);
static void SetStage();
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
private:
struct TCacheEntry : TCacheEntryBase
{

View File

@ -108,7 +108,11 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = true;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// TODO: There is a bug here, if texel buffers are not supported the graphics options
// will show the option when it is not supported. The only way around this would be
// creating a context when calling this function to determine what is available.
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
// Overwritten in Render.cpp later
g_Config.backend_info.bSupportsDualSourceBlend = true;