diff --git a/Source/Core/VideoBackends/OGL/OGLPipeline.cpp b/Source/Core/VideoBackends/OGL/OGLPipeline.cpp index bd439898db..00ae8aa873 100644 --- a/Source/Core/VideoBackends/OGL/OGLPipeline.cpp +++ b/Source/Core/VideoBackends/OGL/OGLPipeline.cpp @@ -9,6 +9,7 @@ #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/VertexManager.h" +#include "VideoCommon/VideoConfig.h" namespace OGL { @@ -31,7 +32,7 @@ static GLenum MapToGLPrimitive(PrimitiveType primitive_type) OGLPipeline::OGLPipeline(const GLVertexFormat* vertex_format, const RasterizationState& rasterization_state, const DepthState& depth_state, const BlendingState& blending_state, - const PipelineProgram* program, GLuint gl_primitive) + PipelineProgram* program, GLuint gl_primitive) : m_vertex_format(vertex_format), m_rasterization_state(rasterization_state), m_depth_state(depth_state), m_blending_state(blending_state), m_program(program), m_gl_primitive(gl_primitive) @@ -44,13 +45,47 @@ OGLPipeline::~OGLPipeline() ProgramShaderCache::ReleasePipelineProgram(m_program); } -std::unique_ptr OGLPipeline::Create(const AbstractPipelineConfig& config) +AbstractPipeline::CacheData OGLPipeline::GetCacheData() const { - const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram( + // More than one pipeline can share the same shaders. To avoid bloating the cache with multiple + // copies of the same program combination, we set a flag on the program object so that it can't + // be retrieved again. When booting, the pipeline cache is loaded in-order, so the additional + // pipelines which use the program combination will re-use the already-created object. + if (!g_ActiveConfig.backend_info.bSupportsPipelineCacheData || m_program->binary_retrieved) + return {}; + + GLint program_size = 0; + glGetProgramiv(m_program->shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &program_size); + if (program_size == 0) + return {}; + + // Clear any existing error. + glGetError(); + + // We pack the format at the start of the buffer. + CacheData data(program_size + sizeof(u32)); + GLsizei data_size = 0; + GLenum program_format = 0; + glGetProgramBinary(m_program->shader.glprogid, program_size, &data_size, &program_format, + &data[sizeof(u32)]); + if (glGetError() != GL_NO_ERROR || data_size == 0) + return {}; + + u32 program_format_u32 = static_cast(program_format); + std::memcpy(&data[0], &program_format_u32, sizeof(u32)); + data.resize(data_size + sizeof(u32)); + m_program->binary_retrieved = true; + return data; +} + +std::unique_ptr OGLPipeline::Create(const AbstractPipelineConfig& config, + const void* cache_data, size_t cache_data_size) +{ + PipelineProgram* program = ProgramShaderCache::GetPipelineProgram( static_cast(config.vertex_format), static_cast(config.vertex_shader), static_cast(config.geometry_shader), - static_cast(config.pixel_shader)); + static_cast(config.pixel_shader), cache_data, cache_data_size); if (!program) return nullptr; diff --git a/Source/Core/VideoBackends/OGL/OGLPipeline.h b/Source/Core/VideoBackends/OGL/OGLPipeline.h index 42371ad927..9d74842df5 100644 --- a/Source/Core/VideoBackends/OGL/OGLPipeline.h +++ b/Source/Core/VideoBackends/OGL/OGLPipeline.h @@ -18,7 +18,7 @@ class OGLPipeline final : public AbstractPipeline public: explicit OGLPipeline(const GLVertexFormat* vertex_format, const RasterizationState& rasterization_state, const DepthState& depth_state, - const BlendingState& blending_state, const PipelineProgram* program, + const BlendingState& blending_state, PipelineProgram* program, GLenum gl_primitive); ~OGLPipeline() override; @@ -29,14 +29,16 @@ public: const PipelineProgram* GetProgram() const { return m_program; } bool HasVertexInput() const { return m_vertex_format != nullptr; } GLenum GetGLPrimitive() const { return m_gl_primitive; } - static std::unique_ptr Create(const AbstractPipelineConfig& config); + CacheData GetCacheData() const override; + static std::unique_ptr Create(const AbstractPipelineConfig& config, + const void* cache_data, size_t cache_data_size); private: const GLVertexFormat* m_vertex_format; RasterizationState m_rasterization_state; DepthState m_depth_state; BlendingState m_blending_state; - const PipelineProgram* m_program; + PipelineProgram* m_program; GLenum m_gl_primitive; }; diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index 61cd953b69..414f7dfa57 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -312,7 +312,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode, if (shader.gsid) glAttachShader(shader.glprogid, shader.gsid); - if (g_ogl_config.bSupportsGLSLCache) + if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData) glProgramParameteri(shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); shader.SetProgramBindings(false); @@ -562,10 +562,12 @@ void ProgramShaderCache::InvalidateLastProgram() CurrentProgram = 0; } -const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format, - const OGLShader* vertex_shader, - const OGLShader* geometry_shader, - const OGLShader* pixel_shader) +PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format, + const OGLShader* vertex_shader, + const OGLShader* geometry_shader, + const OGLShader* pixel_shader, + const void* cache_data, + size_t cache_data_size) { PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0, geometry_shader ? geometry_shader->GetID() : 0, @@ -580,39 +582,69 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm } } - // We temporarily change the vertex array to the pipeline's vertex format. - // This can prevent the NVIDIA OpenGL driver from recompiling on first use. - GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO; - if (s_is_shared_context || vao != s_last_VAO) - glBindVertexArray(vao); - std::unique_ptr prog = std::make_unique(); prog->key = key; - - // Attach shaders. - ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex); - ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel); prog->shader.glprogid = glCreateProgram(); - glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID()); - glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID()); - if (geometry_shader) + + // Use the cache data, if present. If this fails, we want to return an error, so the shader cache + // doesn't attempt to use the same binary data in the future. + if (cache_data_size >= sizeof(u32)) { - ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry); - glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID()); + u32 program_binary_type; + std::memcpy(&program_binary_type, cache_data, sizeof(u32)); + glProgramBinary(prog->shader.glprogid, static_cast(program_binary_type), + static_cast(cache_data) + sizeof(u32), + static_cast(cache_data_size - sizeof(u32))); + + // Check the link status. If this fails, it means the binary was invalid. + GLint link_status; + glGetProgramiv(prog->shader.glprogid, GL_LINK_STATUS, &link_status); + if (link_status != GL_TRUE) + { + WARN_LOG(VIDEO, "Failed to create GL program from program binary."); + prog->shader.Destroy(); + return nullptr; + } + + // We don't want to retrieve this binary and duplicate entries in the cache again. + // See the explanation in OGLPipeline.cpp. + prog->binary_retrieved = true; } - - // Link program. - prog->shader.SetProgramBindings(false); - glLinkProgram(prog->shader.glprogid); - - // Restore VAO binding after linking. - if (!s_is_shared_context && vao != s_last_VAO) - glBindVertexArray(s_last_VAO); - - if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {})) + else { - prog->shader.Destroy(); - return nullptr; + // We temporarily change the vertex array to the pipeline's vertex format. + // This can prevent the NVIDIA OpenGL driver from recompiling on first use. + GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO; + if (s_is_shared_context || vao != s_last_VAO) + glBindVertexArray(vao); + + // Attach shaders. + ASSERT(vertex_shader && vertex_shader->GetStage() == ShaderStage::Vertex); + ASSERT(pixel_shader && pixel_shader->GetStage() == ShaderStage::Pixel); + glAttachShader(prog->shader.glprogid, vertex_shader->GetGLShaderID()); + glAttachShader(prog->shader.glprogid, pixel_shader->GetGLShaderID()); + if (geometry_shader) + { + ASSERT(geometry_shader->GetStage() == ShaderStage::Geometry); + glAttachShader(prog->shader.glprogid, geometry_shader->GetGLShaderID()); + } + + if (g_ActiveConfig.backend_info.bSupportsPipelineCacheData) + glProgramParameteri(prog->shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); + + // Link program. + prog->shader.SetProgramBindings(false); + glLinkProgram(prog->shader.glprogid); + + // Restore VAO binding after linking. + if (!s_is_shared_context && vao != s_last_VAO) + glBindVertexArray(s_last_VAO); + + if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {})) + { + prog->shader.Destroy(); + return nullptr; + } } // Lock to insert. A duplicate program may have been created in the meantime. @@ -639,16 +671,17 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm return ip.first->second.get(); } -void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog) +void ProgramShaderCache::ReleasePipelineProgram(PipelineProgram* prog) { + if (--prog->reference_count > 0) + return; + + prog->shader.Destroy(); + + std::lock_guard guard(s_pipeline_program_lock); auto iter = s_pipeline_programs.find(prog->key); ASSERT(iter != s_pipeline_programs.end() && prog == iter->second.get()); - - if (--iter->second->reference_count == 0) - { - iter->second->shader.Destroy(); - s_pipeline_programs.erase(iter); - } + s_pipeline_programs.erase(iter); } void ProgramShaderCache::CreateHeader() diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index 7fc267dc5d..b7e4dd6d00 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -63,6 +63,7 @@ struct PipelineProgram PipelineProgramKey key; SHADER shader; std::atomic_size_t reference_count{1}; + bool binary_retrieved = false; }; class ProgramShaderCache @@ -97,11 +98,12 @@ public: // pipeline do not match the pipeline configuration. static u64 GenerateShaderID(); - static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format, - const OGLShader* vertex_shader, - const OGLShader* geometry_shader, - const OGLShader* pixel_shader); - static void ReleasePipelineProgram(const PipelineProgram* prog); + static PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format, + const OGLShader* vertex_shader, + const OGLShader* geometry_shader, + const OGLShader* pixel_shader, const void* cache_data, + size_t cache_data_size); + static void ReleasePipelineProgram(PipelineProgram* prog); private: typedef std::unordered_map, diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index 8e13aa9ff2..1e263b6422 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -350,6 +350,7 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ } bool bSuccess = true; + bool supports_glsl_cache = false; g_ogl_config.gl_vendor = (const char*)glGetString(GL_VENDOR); g_ogl_config.gl_renderer = (const char*)glGetString(GL_RENDERER); @@ -466,7 +467,7 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ GLExtensions::Supports("GL_ARB_gpu_shader5"); g_ogl_config.bIsES = m_main_gl_context->IsGLES(); - g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary"); + supports_glsl_cache = GLExtensions::Supports("GL_ARB_get_program_binary"); g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory"); g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync"); g_ogl_config.bSupportsGLBaseVertex = GLExtensions::Supports("GL_ARB_draw_elements_base_vertex") || @@ -507,7 +508,7 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ EsTexbufType::TexbufExt : EsTexbufType::TexbufNone; - g_ogl_config.bSupportsGLSLCache = true; + supports_glsl_cache = true; g_ogl_config.bSupportsGLSync = true; // TODO: Implement support for GL_EXT_clip_cull_distance when there is an extension for @@ -675,6 +676,16 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ g_Config.backend_info.bSupportsBackgroundCompiling = !DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION); + // Program binaries are supported on GL4.1+, ARB_get_program_binary, or ES3. + if (supports_glsl_cache) + { + // We need to check the number of formats supported. If zero, don't bother getting the binaries. + GLint num_formats = 0; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + supports_glsl_cache = num_formats > 0; + } + g_Config.backend_info.bSupportsPipelineCacheData = supports_glsl_cache; + if (g_ogl_config.bSupportsDebug) { if (GLExtensions::Supports("GL_KHR_debug")) @@ -739,7 +750,7 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? "" : "PrimitiveRestart ", g_ActiveConfig.backend_info.bSupportsEarlyZ ? "" : "EarlyZ ", g_ogl_config.bSupportsGLPinnedMemory ? "" : "PinnedMemory ", - g_ogl_config.bSupportsGLSLCache ? "" : "ShaderCache ", + supports_glsl_cache ? "" : "ShaderCache ", g_ogl_config.bSupportsGLBaseVertex ? "" : "BaseVertex ", g_ogl_config.bSupportsGLBufferStorage ? "" : "BufferStorage ", g_ogl_config.bSupportsGLSync ? "" : "Sync ", g_ogl_config.bSupportsMSAA ? "" : "MSAA ", @@ -832,7 +843,7 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin const void* cache_data, size_t cache_data_length) { - return OGLPipeline::Create(config); + return OGLPipeline::Create(config, cache_data, cache_data_length); } void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index f45bebbec1..842a6d9570 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -48,7 +48,6 @@ enum class EsFbFetchType struct VideoConfig { bool bIsES; - bool bSupportsGLSLCache; bool bSupportsGLPinnedMemory; bool bSupportsGLSync; bool bSupportsGLBaseVertex;