OGL: Implement GPU texture decoding backend
This commit is contained in:
parent
79ba946d70
commit
b01bcb80f4
|
@ -606,6 +606,13 @@ Renderer::Renderer()
|
|||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||
g_ogl_config.max_samples = 1;
|
||||
|
||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||
// enabled in the version check below.
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding =
|
||||
g_Config.backend_info.bSupportsPaletteConversion &&
|
||||
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
|
||||
|
||||
if (g_ogl_config.bSupportsDebug)
|
||||
{
|
||||
if (GLExtensions::Supports("GL_KHR_debug"))
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "VideoBackends/OGL/TextureConverter.h"
|
||||
|
||||
#include "VideoCommon/ImageWrite.h"
|
||||
#include "VideoCommon/TextureConversionShader.h"
|
||||
#include "VideoCommon/TextureDecoder.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
||||
|
@ -49,6 +50,24 @@ static GLuint s_palette_buffer_offset_uniform[3];
|
|||
static GLuint s_palette_multiplier_uniform[3];
|
||||
static GLuint s_palette_copy_position_uniform[3];
|
||||
|
||||
struct TextureDecodingProgramInfo
|
||||
{
|
||||
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
|
||||
SHADER program;
|
||||
GLint uniform_dst_size = -1;
|
||||
GLint uniform_src_size = -1;
|
||||
GLint uniform_src_row_stride = -1;
|
||||
GLint uniform_src_offset = -1;
|
||||
GLint uniform_palette_offset = -1;
|
||||
bool valid = false;
|
||||
};
|
||||
|
||||
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
|
||||
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
|
||||
s_texture_decoding_buffer_views;
|
||||
static void CreateTextureDecodingResources();
|
||||
static void DestroyTextureDecodingResources();
|
||||
|
||||
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
|
||||
int virtual_height, unsigned int level)
|
||||
{
|
||||
|
@ -285,26 +304,31 @@ TextureCache::TextureCache()
|
|||
|
||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||
{
|
||||
s32 buffer_size = 1024 * 1024;
|
||||
s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
|
||||
s32 buffer_size = buffer_size_mb * 1024 * 1024;
|
||||
s32 max_buffer_size = 0;
|
||||
|
||||
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
|
||||
// is 65KB, we are asking for a 1MB buffer here.
|
||||
// Make sure to check the maximum size and if it is below 1MB
|
||||
// then use the maximum the hardware supports instead.
|
||||
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
|
||||
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
|
||||
// texture decoding is enabled, in which case the requested size is 32MB.
|
||||
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
|
||||
|
||||
// Clamp the buffer size to the maximum size that the driver supports.
|
||||
buffer_size = std::min(buffer_size, max_buffer_size);
|
||||
|
||||
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
|
||||
glGenTextures(1, &s_palette_resolv_texture);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
|
||||
|
||||
CreateTextureDecodingResources();
|
||||
}
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache()
|
||||
{
|
||||
DeleteShaders();
|
||||
DestroyTextureDecodingResources();
|
||||
|
||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||
{
|
||||
|
@ -606,4 +630,150 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
|
|||
FramebufferManager::SetFramebuffer(0);
|
||||
g_renderer->RestoreAPIState();
|
||||
}
|
||||
|
||||
static const std::string decoding_vertex_shader = R"(
|
||||
void main()
|
||||
{
|
||||
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
|
||||
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
void CreateTextureDecodingResources()
|
||||
{
|
||||
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
|
||||
GL_R8UI, // BUFFER_FORMAT_R8_UINT
|
||||
GL_R16UI, // BUFFER_FORMAT_R16_UINT
|
||||
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
|
||||
};
|
||||
|
||||
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||
s_texture_decoding_buffer_views.data());
|
||||
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void DestroyTextureDecodingResources()
|
||||
{
|
||||
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||
s_texture_decoding_buffer_views.data());
|
||||
s_texture_decoding_buffer_views.fill(0);
|
||||
s_texture_decoding_program_info.clear();
|
||||
}
|
||||
|
||||
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||
auto iter = s_texture_decoding_program_info.find(key);
|
||||
if (iter != s_texture_decoding_program_info.end())
|
||||
return iter->second.valid;
|
||||
|
||||
TextureDecodingProgramInfo info;
|
||||
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
|
||||
if (!info.base_info)
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string shader_source =
|
||||
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
|
||||
if (shader_source.empty())
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
|
||||
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
|
||||
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
|
||||
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
|
||||
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
|
||||
info.valid = true;
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
|
||||
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||
u32 aligned_width, u32 aligned_height, u32 row_stride,
|
||||
const u8* palette, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||
auto iter = s_texture_decoding_program_info.find(key);
|
||||
if (iter == s_texture_decoding_program_info.end())
|
||||
return;
|
||||
|
||||
// Copy to GPU-visible buffer, aligned to the data type.
|
||||
auto info = iter->second;
|
||||
u32 bytes_per_buffer_elem =
|
||||
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
|
||||
|
||||
// Only copy palette if it is required.
|
||||
bool has_palette = info.base_info->palette_size > 0;
|
||||
u32 total_upload_size = static_cast<u32>(data_size);
|
||||
u32 palette_offset = total_upload_size;
|
||||
if (has_palette)
|
||||
{
|
||||
// Align to u16.
|
||||
if ((total_upload_size % sizeof(u16)) != 0)
|
||||
{
|
||||
total_upload_size++;
|
||||
palette_offset++;
|
||||
}
|
||||
|
||||
total_upload_size += info.base_info->palette_size;
|
||||
}
|
||||
|
||||
// Allocate space in stream buffer, and copy texture + palette across.
|
||||
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
|
||||
memcpy(buffer.first, data, data_size);
|
||||
if (has_palette)
|
||||
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
|
||||
s_palette_stream_buffer->Unmap(total_upload_size);
|
||||
|
||||
info.program.Bind();
|
||||
|
||||
// Calculate stride in buffer elements
|
||||
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
|
||||
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
|
||||
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
|
||||
if (info.uniform_dst_size >= 0)
|
||||
glUniform2ui(info.uniform_dst_size, width, height);
|
||||
if (info.uniform_src_size >= 0)
|
||||
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
|
||||
if (info.uniform_src_offset >= 0)
|
||||
glUniform1ui(info.uniform_src_offset, offset_in_elements);
|
||||
if (info.uniform_src_row_stride >= 0)
|
||||
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
|
||||
if (info.uniform_palette_offset >= 0)
|
||||
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
|
||||
|
||||
glActiveTexture(GL_TEXTURE9);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
|
||||
|
||||
if (has_palette)
|
||||
{
|
||||
// Use an R16UI view for the palette.
|
||||
glActiveTexture(GL_TEXTURE10);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||
}
|
||||
|
||||
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
|
||||
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, GL_RGBA8);
|
||||
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
|
||||
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
|
||||
|
||||
TextureCache::SetStage();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,12 @@ public:
|
|||
static void DisableStage(unsigned int stage);
|
||||
static void SetStage();
|
||||
|
||||
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
|
||||
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
|
||||
TextureFormat format, u32 width, u32 height, u32 aligned_width,
|
||||
u32 aligned_height, u32 row_stride, const u8* palette,
|
||||
TlutFormat palette_format) override;
|
||||
|
||||
private:
|
||||
struct TCacheEntry : TCacheEntryBase
|
||||
{
|
||||
|
|
|
@ -108,7 +108,11 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsReversedDepthRange = true;
|
||||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
|
||||
// TODO: There is a bug here, if texel buffers are not supported the graphics options
|
||||
// will show the option when it is not supported. The only way around this would be
|
||||
// creating a context when calling this function to determine what is available.
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
|
||||
|
||||
// Overwritten in Render.cpp later
|
||||
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
||||
|
|
Loading…
Reference in New Issue