// Copyright 2011 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. #include "VideoBackends/OGL/ProgramShaderCache.h" #include #include #include #include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/FileUtil.h" #include "Common/GL/GLInterfaceBase.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" #include "Common/StringUtil.h" #include "Common/Timer.h" #include "Core/ConfigManager.h" #include "Core/Host.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/AsyncShaderCompiler.h" #include "VideoCommon/Debugger.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/UberShaderPixel.h" #include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" namespace OGL { static constexpr u32 UBO_LENGTH = 32 * 1024 * 1024; static constexpr u32 INVALID_VAO = std::numeric_limits::max(); std::unique_ptr ProgramShaderCache::s_async_compiler; u32 ProgramShaderCache::s_ubo_buffer_size; s32 ProgramShaderCache::s_ubo_align; u32 ProgramShaderCache::s_last_VAO = INVALID_VAO; static std::unique_ptr s_buffer; static int num_failures = 0; static LinearDiskCache s_program_disk_cache; static LinearDiskCache s_uber_program_disk_cache; static GLuint CurrentProgram = 0; ProgramShaderCache::PCache ProgramShaderCache::pshaders; ProgramShaderCache::UberPCache ProgramShaderCache::ubershaders; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry; ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_uber_entry; SHADERUID ProgramShaderCache::last_uid; UBERSHADERUID ProgramShaderCache::last_uber_uid; static std::string s_glsl_header = ""; static std::string GetGLSLVersionString() { GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion; switch (v) { case GLSLES_300: return "#version 300 es"; case GLSLES_310: return "#version 310 es"; case GLSLES_320: return "#version 320 es"; case GLSL_130: return "#version 130"; case GLSL_140: return "#version 140"; case GLSL_150: return "#version 150"; case GLSL_330: return "#version 330"; case GLSL_400: return "#version 400"; case GLSL_430: return "#version 430"; default: // Shouldn't ever hit this return "#version ERROR"; } } void SHADER::SetProgramVariables() { // Bind UBO and texture samplers if (!g_ActiveConfig.backend_info.bSupportsBindingLayout) { // glsl shader must be bind to set samplers if we don't support binding layout Bind(); GLint PSBlock_id = glGetUniformBlockIndex(glprogid, "PSBlock"); GLint VSBlock_id = glGetUniformBlockIndex(glprogid, "VSBlock"); GLint GSBlock_id = glGetUniformBlockIndex(glprogid, "GSBlock"); GLint UBERBlock_id = glGetUniformBlockIndex(glprogid, "UBERBlock"); if (PSBlock_id != -1) glUniformBlockBinding(glprogid, PSBlock_id, 1); if (VSBlock_id != -1) glUniformBlockBinding(glprogid, VSBlock_id, 2); if (GSBlock_id != -1) glUniformBlockBinding(glprogid, GSBlock_id, 3); if (UBERBlock_id != -1) glUniformBlockBinding(glprogid, UBERBlock_id, 4); // Bind Texture Samplers for (int a = 0; a <= 9; ++a) { std::string name = StringFromFormat(a < 8 ? "samp[%d]" : "samp%d", a); // Still need to get sampler locations since we aren't binding them statically in the shaders int loc = glGetUniformLocation(glprogid, name.c_str()); if (loc != -1) glUniform1i(loc, a); } } } void SHADER::SetProgramBindings(bool is_compute) { if (!is_compute) { if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend) { // So we do support extended blending // So we need to set a few more things here. // Bind our out locations glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0"); glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1"); } // Need to set some attribute locations glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos"); glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx"); glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "rawcolor0"); glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "rawcolor1"); glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0"); glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1"); glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2"); } for (int i = 0; i < 8; i++) { std::string attrib_name = StringFromFormat("rawtex%d", i); glBindAttribLocation(glprogid, SHADER_TEXTURE0_ATTRIB + i, attrib_name.c_str()); } } void SHADER::Bind() const { if (CurrentProgram != glprogid) { INCSTAT(stats.thisFrame.numShaderChanges); glUseProgram(glprogid); CurrentProgram = glprogid; } } void SHADER::DestroyShaders() { if (vsid) { glDeleteShader(vsid); vsid = 0; } if (gsid) { glDeleteShader(gsid); gsid = 0; } if (psid) { glDeleteShader(psid); psid = 0; } } void ProgramShaderCache::UploadConstants() { if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty) { auto buffer = s_buffer->Map(s_ubo_buffer_size, s_ubo_align); memcpy(buffer.first, &PixelShaderManager::constants, sizeof(PixelShaderConstants)); memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align), &VertexShaderManager::constants, sizeof(VertexShaderConstants)); memcpy(buffer.first + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) + Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align), &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); s_buffer->Unmap(s_ubo_buffer_size); glBindBufferRange(GL_UNIFORM_BUFFER, 1, s_buffer->m_buffer, buffer.second, sizeof(PixelShaderConstants)); glBindBufferRange(GL_UNIFORM_BUFFER, 2, s_buffer->m_buffer, buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align), sizeof(VertexShaderConstants)); glBindBufferRange(GL_UNIFORM_BUFFER, 3, s_buffer->m_buffer, buffer.second + Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) + Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align), sizeof(GeometryShaderConstants)); PixelShaderManager::dirty = false; VertexShaderManager::dirty = false; GeometryShaderManager::dirty = false; ADDSTAT(stats.thisFrame.bytesUniformStreamed, s_ubo_buffer_size); } } SHADER* ProgramShaderCache::SetShader(u32 primitive_type, const GLVertexFormat* vertex_format) { if (g_ActiveConfig.bDisableSpecializedShaders) return SetUberShader(primitive_type, vertex_format); SHADERUID uid; std::memset(&uid, 0, sizeof(uid)); uid.puid = GetPixelShaderUid(); uid.vuid = GetVertexShaderUid(); uid.guid = GetGeometryShaderUid(primitive_type); // Check if the shader is already set if (last_entry && uid == last_uid) { last_entry->shader.Bind(); BindVertexFormat(vertex_format); return &last_entry->shader; } // Check if shader is already in cache auto iter = pshaders.find(uid); if (iter != pshaders.end()) { PCacheEntry* entry = &iter->second; if (entry->pending) return SetUberShader(primitive_type, vertex_format); last_uid = uid; last_entry = entry; BindVertexFormat(vertex_format); last_entry->shader.Bind(); return &last_entry->shader; } // Compile the new shader program. PCacheEntry& newentry = pshaders[uid]; newentry.in_cache = false; newentry.pending = false; // Can we background compile this shader? Requires background shader compiling to be enabled, // and all ubershaders to have been successfully compiled. if (g_ActiveConfig.CanBackgroundCompileShaders() && !ubershaders.empty() && s_async_compiler) { newentry.pending = true; s_async_compiler->QueueWorkItem(s_async_compiler->CreateWorkItem(uid)); return SetUberShader(primitive_type, vertex_format); } // Synchronous shader compiling. ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); ShaderCode vcode; if (!g_ActiveConfig.bForceVertexUberShaders) vcode = GenerateVertexShaderCode(APIType::OpenGL, host_config, uid.vuid.GetUidData()); else vcode = UberShader::GenVertexShader(APIType::OpenGL, host_config, UberShader::GetVertexShaderUid().GetUidData()); ShaderCode pcode; if (!g_ActiveConfig.bForcePixelUberShaders) pcode = GeneratePixelShaderCode(APIType::OpenGL, host_config, uid.puid.GetUidData()); else pcode = UberShader::GenPixelShader(APIType::OpenGL, host_config, UberShader::GetPixelShaderUid().GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) return nullptr; INCSTAT(stats.numPixelShadersCreated); SETSTAT(stats.numPixelShadersAlive, pshaders.size()); last_uid = uid; last_entry = &newentry; BindVertexFormat(vertex_format); last_entry->shader.Bind(); return &last_entry->shader; } SHADER* ProgramShaderCache::SetUberShader(u32 primitive_type, const GLVertexFormat* vertex_format) { UBERSHADERUID uid; std::memset(&uid, 0, sizeof(uid)); uid.puid = UberShader::GetPixelShaderUid(); uid.vuid = UberShader::GetVertexShaderUid(); uid.guid = GetGeometryShaderUid(primitive_type); // We need to use the ubershader vertex format with all attributes enabled. // Otherwise, the NV driver can generate variants for the vertex shaders. const GLVertexFormat* uber_vertex_format = static_cast( VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration())); // Check if the shader is already set if (last_uber_entry && last_uber_uid == uid) { BindVertexFormat(uber_vertex_format); last_uber_entry->shader.Bind(); return &last_uber_entry->shader; } // Check if shader is already in cache auto iter = ubershaders.find(uid); if (iter != ubershaders.end()) { PCacheEntry* entry = &iter->second; last_uber_uid = uid; last_uber_entry = entry; BindVertexFormat(uber_vertex_format); last_uber_entry->shader.Bind(); return &last_uber_entry->shader; } // Make an entry in the table PCacheEntry& newentry = ubershaders[uid]; newentry.in_cache = false; newentry.pending = false; ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); ShaderCode vcode = UberShader::GenVertexShader(APIType::OpenGL, host_config, uid.vuid.GetUidData()); ShaderCode pcode = UberShader::GenPixelShader(APIType::OpenGL, host_config, uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) { gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); } if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) { GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true); return nullptr; } last_uber_uid = uid; last_uber_entry = &newentry; BindVertexFormat(uber_vertex_format); last_uber_entry->shader.Bind(); return &last_uber_entry->shader; } bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, const std::string& gcode) { #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) { static int counter = 0; std::string filename = StringFromFormat("%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, vcode.c_str()); filename = StringFromFormat("%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, pcode.c_str()); if (!gcode.empty()) { filename = StringFromFormat("%sgs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); SaveData(filename, gcode.c_str()); } } #endif shader.vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode); shader.psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode); // Optional geometry shader shader.gsid = 0; if (!gcode.empty()) shader.gsid = CompileSingleShader(GL_GEOMETRY_SHADER, gcode); if (!shader.vsid || !shader.psid || (!gcode.empty() && !shader.gsid)) { shader.Destroy(); return false; } // Create and link the program. shader.glprogid = glCreateProgram(); glAttachShader(shader.glprogid, shader.vsid); glAttachShader(shader.glprogid, shader.psid); if (shader.gsid) glAttachShader(shader.glprogid, shader.gsid); if (g_ogl_config.bSupportsGLSLCache) glProgramParameteri(shader.glprogid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); shader.SetProgramBindings(false); glLinkProgram(shader.glprogid); if (!CheckProgramLinkResult(shader.glprogid, vcode, pcode, gcode)) { // Don't try to use this shader shader.Destroy(); return false; } // For drivers that don't support binding layout, we need to bind it here. shader.SetProgramVariables(); // Original shaders aren't needed any more. shader.DestroyShaders(); return true; } bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& code) { // We need to enable GL_ARB_compute_shader for drivers that support the extension, // but not GLSL 4.3. Mesa is one example. std::string header; if (g_ActiveConfig.backend_info.bSupportsComputeShaders && g_ogl_config.eSupportedGLSLVersion < GLSL_430) { header = "#extension GL_ARB_compute_shader : enable\n"; } std::string full_code = header + code; GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, full_code); if (!shader_id) return false; shader.glprogid = glCreateProgram(); glAttachShader(shader.glprogid, shader_id); shader.SetProgramBindings(true); glLinkProgram(shader.glprogid); // original shaders aren't needed any more glDeleteShader(shader_id); if (!CheckProgramLinkResult(shader.glprogid, full_code, "", "")) { shader.Destroy(); return false; } shader.SetProgramVariables(); return true; } GLuint ProgramShaderCache::CompileSingleShader(GLenum type, const std::string& code) { GLuint result = glCreateShader(type); const char* src[] = {s_glsl_header.c_str(), code.c_str()}; glShaderSource(result, 2, src, nullptr); glCompileShader(result); if (!CheckShaderCompileResult(result, type, code)) { // Don't try to use this shader glDeleteShader(result); return 0; } return result; } bool ProgramShaderCache::CheckShaderCompileResult(GLuint id, GLenum type, const std::string& code) { GLint compileStatus; glGetShaderiv(id, GL_COMPILE_STATUS, &compileStatus); GLsizei length = 0; glGetShaderiv(id, GL_INFO_LOG_LENGTH, &length); if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) { std::string info_log; info_log.resize(length); glGetShaderInfoLog(id, length, &length, &info_log[0]); const char* prefix = ""; switch (type) { case GL_VERTEX_SHADER: prefix = "vs"; break; case GL_GEOMETRY_SHADER: prefix = "gs"; break; case GL_FRAGMENT_SHADER: prefix = "ps"; break; case GL_COMPUTE_SHADER: prefix = "cs"; break; } ERROR_LOG(VIDEO, "%s Shader info log:\n%s", prefix, info_log.c_str()); std::string filename = StringFromFormat( "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), prefix, num_failures++); std::ofstream file; File::OpenFStream(file, filename, std::ios_base::out); file << s_glsl_header << code << info_log; file.close(); if (compileStatus != GL_TRUE) { PanicAlert("Failed to compile %s shader: %s\n" "Debug info (%s, %s, %s):\n%s", prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, g_ogl_config.gl_version, info_log.c_str()); } } if (compileStatus != GL_TRUE) { // Compile failed ERROR_LOG(VIDEO, "Shader compilation failed; see info log"); return false; } return true; } bool ProgramShaderCache::CheckProgramLinkResult(GLuint id, const std::string& vcode, const std::string& pcode, const std::string& gcode) { GLint linkStatus; glGetProgramiv(id, GL_LINK_STATUS, &linkStatus); GLsizei length = 0; glGetProgramiv(id, GL_INFO_LOG_LENGTH, &length); if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) { std::string info_log; info_log.resize(length); glGetProgramInfoLog(id, length, &length, &info_log[0]); ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str()); std::string filename = StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); std::ofstream file; File::OpenFStream(file, filename, std::ios_base::out); file << s_glsl_header << vcode << s_glsl_header << pcode; if (!gcode.empty()) file << s_glsl_header << gcode; file << info_log; file.close(); if (linkStatus != GL_TRUE) { PanicAlert("Failed to link shaders: %s\n" "Debug info (%s, %s, %s):\n%s", filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, g_ogl_config.gl_version, info_log.c_str()); return false; } } return true; } ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram() { return *last_entry; } void ProgramShaderCache::Init() { // We have to get the UBO alignment here because // if we generate a buffer that isn't aligned // then the UBO will fail. glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &s_ubo_align); s_ubo_buffer_size = static_cast(Common::AlignUp(sizeof(PixelShaderConstants), s_ubo_align) + Common::AlignUp(sizeof(VertexShaderConstants), s_ubo_align) + Common::AlignUp(sizeof(GeometryShaderConstants), s_ubo_align)); // We multiply by *4*4 because we need to get down to basic machine units. // So multiply by four to get how many floats we have from vec4s // Then once more to get bytes s_buffer = StreamBuffer::Create(GL_UNIFORM_BUFFER, UBO_LENGTH); // The GPU shader code appears to be context-specific on Mesa/i965. // This means that if we compiled the ubershaders asynchronously, they will be recompiled // on the main thread the first time they are used, causing stutter. Nouveau has been // reported to crash if draw calls are invoked on the shared context threads. For now, // disable asynchronous compilation on Mesa. if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION)) s_async_compiler = std::make_unique(); // Read our shader cache, only if supported and enabled if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache) LoadProgramBinaries(); CreateHeader(); CurrentProgram = 0; last_entry = nullptr; last_uber_entry = nullptr; if (g_ActiveConfig.CanPrecompileUberShaders()) { if (s_async_compiler) s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); PrecompileUberShaders(); } if (s_async_compiler) { // No point using the async compiler without workers. s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); if (!s_async_compiler->HasWorkerThreads()) s_async_compiler.reset(); } } void ProgramShaderCache::RetrieveAsyncShaders() { if (s_async_compiler) s_async_compiler->RetrieveWorkItems(); } void ProgramShaderCache::Reload() { if (s_async_compiler) { s_async_compiler->WaitUntilCompletion(); s_async_compiler->RetrieveWorkItems(); } const bool use_cache = g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache; if (use_cache) SaveProgramBinaries(); s_program_disk_cache.Close(); s_uber_program_disk_cache.Close(); DestroyShaders(); if (use_cache) LoadProgramBinaries(); if (g_ActiveConfig.CanPrecompileUberShaders()) PrecompileUberShaders(); InvalidateVertexFormat(); CurrentProgram = 0; last_entry = nullptr; last_uber_entry = nullptr; last_uid = {}; last_uber_uid = {}; } void ProgramShaderCache::Shutdown() { if (s_async_compiler) { s_async_compiler->WaitUntilCompletion(); s_async_compiler->StopWorkerThreads(); s_async_compiler->RetrieveWorkItems(); s_async_compiler.reset(); } // store all shaders in cache on disk if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache) SaveProgramBinaries(); s_program_disk_cache.Close(); s_uber_program_disk_cache.Close(); InvalidateVertexFormat(); DestroyShaders(); s_buffer.reset(); } void ProgramShaderCache::BindVertexFormat(const GLVertexFormat* vertex_format) { u32 new_VAO = vertex_format ? vertex_format->VAO : 0; if (s_last_VAO == new_VAO) return; glBindVertexArray(new_VAO); s_last_VAO = new_VAO; } void ProgramShaderCache::InvalidateVertexFormat() { s_last_VAO = INVALID_VAO; } void ProgramShaderCache::BindLastVertexFormat() { if (s_last_VAO != INVALID_VAO) glBindVertexArray(s_last_VAO); else glBindVertexArray(0); } GLuint ProgramShaderCache::CreateProgramFromBinary(const u8* value, u32 value_size) { const u8* binary = value + sizeof(GLenum); GLint binary_size = value_size - sizeof(GLenum); GLenum prog_format; std::memcpy(&prog_format, value, sizeof(GLenum)); GLuint progid = glCreateProgram(); glProgramBinary(progid, prog_format, binary, binary_size); GLint success; glGetProgramiv(progid, GL_LINK_STATUS, &success); if (!success) { glDeleteProgram(progid); return 0; } return progid; } bool ProgramShaderCache::CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size) { entry->in_cache = true; entry->pending = false; entry->shader.glprogid = CreateProgramFromBinary(value, value_size); if (entry->shader.glprogid == 0) return false; entry->shader.SetProgramVariables(); return true; } void ProgramShaderCache::LoadProgramBinaries() { GLint Supported; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &Supported); if (!Supported) { ERROR_LOG(VIDEO, "GL_ARB_get_program_binary is supported, but no binary format is known. So " "disable shader cache."); g_ogl_config.bSupportsGLSLCache = false; } else { // Load game-specific shaders. std::string cache_filename = GetDiskShaderCacheFileName(APIType::OpenGL, "ProgramBinaries", true, true); ProgramShaderCacheInserter inserter(pshaders); s_program_disk_cache.OpenAndRead(cache_filename, inserter); // Load global ubershaders. cache_filename = GetDiskShaderCacheFileName(APIType::OpenGL, "UberProgramBinaries", false, true); ProgramShaderCacheInserter uber_inserter(ubershaders); s_uber_program_disk_cache.OpenAndRead(cache_filename, uber_inserter); } SETSTAT(stats.numPixelShadersAlive, pshaders.size()); } static bool GetProgramBinary(const ProgramShaderCache::PCacheEntry& entry, std::vector& data) { // Clear any prior error code glGetError(); GLint link_status = GL_FALSE, delete_status = GL_TRUE, binary_size = 0; glGetProgramiv(entry.shader.glprogid, GL_LINK_STATUS, &link_status); glGetProgramiv(entry.shader.glprogid, GL_DELETE_STATUS, &delete_status); glGetProgramiv(entry.shader.glprogid, GL_PROGRAM_BINARY_LENGTH, &binary_size); if (glGetError() != GL_NO_ERROR || link_status == GL_FALSE || delete_status == GL_TRUE || binary_size == 0) { return false; } data.resize(binary_size + sizeof(GLenum)); GLsizei length = binary_size; GLenum prog_format; glGetProgramBinary(entry.shader.glprogid, binary_size, &length, &prog_format, &data[sizeof(GLenum)]); if (glGetError() != GL_NO_ERROR) return false; std::memcpy(&data[0], &prog_format, sizeof(prog_format)); return true; } template static void SaveProgramBinaryMap(CacheMapType& program_map, DiskCacheType& disk_cache) { std::vector binary_data; for (auto& entry : program_map) { if (entry.second.in_cache || entry.second.pending) continue; // Entry is now in cache (even if it fails, we don't want to try to save it again). entry.second.in_cache = true; if (!GetProgramBinary(entry.second, binary_data)) continue; disk_cache.Append(entry.first, &binary_data[0], static_cast(binary_data.size())); } disk_cache.Sync(); } void ProgramShaderCache::SaveProgramBinaries() { SaveProgramBinaryMap(pshaders, s_program_disk_cache); SaveProgramBinaryMap(ubershaders, s_uber_program_disk_cache); } void ProgramShaderCache::DestroyShaders() { glUseProgram(0); for (auto& entry : pshaders) entry.second.Destroy(); pshaders.clear(); for (auto& entry : ubershaders) entry.second.Destroy(); ubershaders.clear(); } void ProgramShaderCache::CreateHeader() { GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion; bool is_glsles = v >= GLSLES_300; std::string SupportedESPointSize; std::string SupportedESTextureBuffer; switch (g_ogl_config.SupportedESPointSize) { case 1: SupportedESPointSize = "#extension GL_OES_geometry_point_size : enable"; break; case 2: SupportedESPointSize = "#extension GL_EXT_geometry_point_size : enable"; break; default: SupportedESPointSize = ""; break; } switch (g_ogl_config.SupportedESTextureBuffer) { case ES_TEXBUF_TYPE::TEXBUF_EXT: SupportedESTextureBuffer = "#extension GL_EXT_texture_buffer : enable"; break; case ES_TEXBUF_TYPE::TEXBUF_OES: SupportedESTextureBuffer = "#extension GL_OES_texture_buffer : enable"; break; case ES_TEXBUF_TYPE::TEXBUF_CORE: case ES_TEXBUF_TYPE::TEXBUF_NONE: SupportedESTextureBuffer = ""; break; } std::string earlyz_string = ""; if (g_ActiveConfig.backend_info.bSupportsEarlyZ) { if (g_ogl_config.bSupportsImageLoadStore) { earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n"; } else if (g_ogl_config.bSupportsConservativeDepth) { // See PixelShaderGen for details about this fallback. earlyz_string = "#define FORCE_EARLY_Z layout(depth_unchanged) out float gl_FragDepth\n"; earlyz_string += "#extension GL_ARB_conservative_depth : enable\n"; } } s_glsl_header = StringFromFormat( "%s\n" "%s\n" // ubo "%s\n" // early-z "%s\n" // 420pack "%s\n" // msaa "%s\n" // Input/output/sampler binding "%s\n" // Varying location "%s\n" // storage buffer "%s\n" // shader5 "%s\n" // SSAA "%s\n" // Geometry point size "%s\n" // AEP "%s\n" // texture buffer "%s\n" // ES texture buffer "%s\n" // ES dual source blend "%s\n" // shader image load store // Precision defines for GLSL ES "%s\n" "%s\n" "%s\n" "%s\n" "%s\n" "%s\n" // Silly differences "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" "#define uint2 uvec2\n" "#define uint3 uvec3\n" "#define uint4 uvec4\n" "#define int2 ivec2\n" "#define int3 ivec3\n" "#define int4 ivec4\n" // hlsl to glsl function translation "#define frac fract\n" "#define lerp mix\n" , GetGLSLVersionString().c_str(), v < GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : "", earlyz_string.c_str(), (g_ActiveConfig.backend_info.bSupportsBindingLayout && v < GLSLES_310) ? "#extension GL_ARB_shading_language_420pack : enable" : "", (g_ogl_config.bSupportsMSAA && v < GLSL_150) ? "#extension GL_ARB_texture_multisample : enable" : "", // Attribute and fragment output bindings are still done via glBindAttribLocation and // glBindFragDataLocation. In the future this could be moved to the layout qualifier // in GLSL, but requires verification of GL_ARB_explicit_attrib_location. g_ActiveConfig.backend_info.bSupportsBindingLayout ? "#define ATTRIBUTE_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing, binding = x)\n" "#define SAMPLER_BINDING(x) layout(binding = x)\n" "#define SSBO_BINDING(x) layout(binding = x)\n" : "#define ATTRIBUTE_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing)\n" "#define SAMPLER_BINDING(x)\n", // Input/output blocks are matched by name during program linking "#define VARYING_LOCATION(x)\n", !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ? "#extension GL_ARB_shader_storage_buffer_object : enable" : "", v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsGSInstancing ? "#extension GL_ARB_gpu_shader5 : enable" : "", v < GLSL_400 && g_ActiveConfig.backend_info.bSupportsSSAA ? "#extension GL_ARB_sample_shading : enable" : "", SupportedESPointSize.c_str(), g_ogl_config.bSupportsAEP ? "#extension GL_ANDROID_extension_pack_es31a : enable" : "", v < GLSL_140 && g_ActiveConfig.backend_info.bSupportsPaletteConversion ? "#extension GL_ARB_texture_buffer_object : enable" : "", SupportedESTextureBuffer.c_str(), is_glsles && g_ActiveConfig.backend_info.bSupportsDualSourceBlend ? "#extension GL_EXT_blend_func_extended : enable" : "" , g_ogl_config.bSupportsImageLoadStore && ((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ? "#extension GL_ARB_shader_image_load_store : enable" : "", is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : "", v > GLSLES_300 ? "precision highp sampler2DMS;" : "", v >= GLSLES_310 ? "precision highp image2DArray;" : ""); } void ProgramShaderCache::PrecompileUberShaders() { bool success = true; UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) { UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& puid) { // UIDs must have compatible texgens, a mismatching combination will never be queried. if (vuid.GetUidData()->num_texgens != puid.GetUidData()->num_texgens) return; EnumerateGeometryShaderUids([&](const GeometryShaderUid& guid) { if (guid.GetUidData()->numTexGens != vuid.GetUidData()->num_texgens) return; UBERSHADERUID uid; std::memcpy(&uid.vuid, &vuid, sizeof(uid.vuid)); std::memcpy(&uid.puid, &puid, sizeof(uid.puid)); std::memcpy(&uid.guid, &guid, sizeof(uid.guid)); // The ubershader may already exist if shader caching is enabled. if (!success || ubershaders.find(uid) != ubershaders.end()) return; PCacheEntry& entry = ubershaders[uid]; entry.in_cache = false; entry.pending = false; // Multi-context path? if (s_async_compiler) { entry.pending = true; s_async_compiler->QueueWorkItem( s_async_compiler->CreateWorkItem(uid)); return; } ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); ShaderCode vcode = UberShader::GenVertexShader(APIType::OpenGL, host_config, uid.vuid.GetUidData()); ShaderCode pcode = UberShader::GenPixelShader(APIType::OpenGL, host_config, uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !uid.guid.GetUidData()->IsPassthrough()) { GenerateGeometryShaderCode(APIType::OpenGL, host_config, uid.guid.GetUidData()); } // Always background compile, even when it's not supported. // This way hopefully the driver can still compile the shaders in parallel. if (!CompileShader(entry.shader, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer())) { // Stop compiling shaders if any of them fail, no point continuing. success = false; return; } }); }); }); if (s_async_compiler) { s_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) { Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(), static_cast(completed), static_cast(total)); }); s_async_compiler->RetrieveWorkItems(); Host_UpdateProgressDialog("", -1, -1); } if (!success) { PanicAlert("One or more ubershaders failed to compile. Disabling ubershaders."); for (auto& it : ubershaders) it.second.Destroy(); ubershaders.clear(); } } bool ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param) { SharedContextData* ctx_data = new SharedContextData(); ctx_data->context = GLInterface->CreateSharedContext(); if (!ctx_data->context) { PanicAlert("Failed to create shared context for shader compiling."); delete ctx_data; return false; } *param = ctx_data; return true; } bool ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param) { SharedContextData* ctx_data = reinterpret_cast(param); if (!ctx_data->context->MakeCurrent()) { PanicAlert("Failed to make shared context current."); ctx_data->context->Shutdown(); delete ctx_data; return false; } CreatePrerenderArrays(ctx_data); return true; } void ProgramShaderCache::SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param) { SharedContextData* ctx_data = reinterpret_cast(param); DestroyPrerenderArrays(ctx_data); ctx_data->context->Shutdown(); delete ctx_data; } ProgramShaderCache::ShaderCompileWorkItem::ShaderCompileWorkItem(const SHADERUID& uid) { std::memcpy(&m_uid, &uid, sizeof(m_uid)); } bool ProgramShaderCache::ShaderCompileWorkItem::Compile() { ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); ShaderCode vcode = GenerateVertexShaderCode(APIType::OpenGL, host_config, m_uid.vuid.GetUidData()); ShaderCode pcode = GeneratePixelShaderCode(APIType::OpenGL, host_config, m_uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !m_uid.guid.GetUidData()->IsPassthrough()) gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, m_uid.guid.GetUidData()); CompileShader(m_program, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer()); DrawPrerenderArray(m_program, m_uid.guid.GetUidData()->primitive_type); return true; } void ProgramShaderCache::ShaderCompileWorkItem::Retrieve() { auto iter = pshaders.find(m_uid); if (iter != pshaders.end() && !iter->second.pending) { // Main thread already compiled this shader. m_program.Destroy(); return; } PCacheEntry& entry = pshaders[m_uid]; entry.shader = m_program; entry.in_cache = false; entry.pending = false; } ProgramShaderCache::UberShaderCompileWorkItem::UberShaderCompileWorkItem(const UBERSHADERUID& uid) { std::memcpy(&m_uid, &uid, sizeof(m_uid)); } bool ProgramShaderCache::UberShaderCompileWorkItem::Compile() { ShaderHostConfig host_config = ShaderHostConfig::GetCurrent(); ShaderCode vcode = UberShader::GenVertexShader(APIType::OpenGL, host_config, m_uid.vuid.GetUidData()); ShaderCode pcode = UberShader::GenPixelShader(APIType::OpenGL, host_config, m_uid.puid.GetUidData()); ShaderCode gcode; if (g_ActiveConfig.backend_info.bSupportsGeometryShaders && !m_uid.guid.GetUidData()->IsPassthrough()) gcode = GenerateGeometryShaderCode(APIType::OpenGL, host_config, m_uid.guid.GetUidData()); CompileShader(m_program, vcode.GetBuffer(), pcode.GetBuffer(), gcode.GetBuffer()); DrawPrerenderArray(m_program, m_uid.guid.GetUidData()->primitive_type); return true; } void ProgramShaderCache::UberShaderCompileWorkItem::Retrieve() { auto iter = ubershaders.find(m_uid); if (iter != ubershaders.end() && !iter->second.pending) { // Main thread already compiled this shader. m_program.Destroy(); return; } PCacheEntry& entry = ubershaders[m_uid]; entry.shader = m_program; entry.in_cache = false; entry.pending = false; } void ProgramShaderCache::CreatePrerenderArrays(SharedContextData* data) { // Create VAO for the prerender vertices. // We don't use the normal VAO map, since we need to change the VBO pointer. glGenVertexArrays(1, &data->prerender_VAO); glBindVertexArray(data->prerender_VAO); // Create and populate the prerender VBO. We need enough space to draw 3 triangles. static constexpr float vbo_data[] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; constexpr u32 vbo_stride = sizeof(float) * 3; glGenBuffers(1, &data->prerender_VBO); glBindBuffer(GL_ARRAY_BUFFER, data->prerender_VBO); glBufferData(GL_ARRAY_BUFFER, sizeof(vbo_data), vbo_data, GL_STATIC_DRAW); // We only need a position in our prerender vertex. glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); glVertexAttribPointer(SHADER_POSITION_ATTRIB, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); // The other attributes have to be active to avoid variant generation. glEnableVertexAttribArray(SHADER_POSMTX_ATTRIB); glVertexAttribIPointer(SHADER_POSMTX_ATTRIB, 1, GL_UNSIGNED_BYTE, vbo_stride, nullptr); for (u32 i = 0; i < 3; i++) { glEnableVertexAttribArray(SHADER_NORM0_ATTRIB + i); glVertexAttribPointer(SHADER_NORM0_ATTRIB + i, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); } for (u32 i = 0; i < 2; i++) { glEnableVertexAttribArray(SHADER_COLOR0_ATTRIB + i); glVertexAttribPointer(SHADER_COLOR0_ATTRIB + i, 4, GL_UNSIGNED_BYTE, GL_TRUE, vbo_stride, nullptr); } for (u32 i = 0; i < 8; i++) { glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB + i); glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB + i, 3, GL_FLOAT, GL_FALSE, vbo_stride, nullptr); } // We need an index buffer to set up the same drawing state on Mesa. static constexpr u16 ibo_data[] = {0, 1, 2}; glGenBuffers(1, &data->prerender_IBO); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data->prerender_IBO); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(ibo_data), ibo_data, GL_STATIC_DRAW); // Mesa also requires the primitive restart state matches? if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart) { if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) { glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); } else { if (GLExtensions::Version() >= 310) { glEnable(GL_PRIMITIVE_RESTART); glPrimitiveRestartIndex(65535); } else { glEnableClientState(GL_PRIMITIVE_RESTART_NV); glPrimitiveRestartIndexNV(65535); } } } } void ProgramShaderCache::DestroyPrerenderArrays(SharedContextData* data) { if (data->prerender_VAO) { glDeleteVertexArrays(1, &data->prerender_VAO); data->prerender_VAO = 0; } if (data->prerender_VBO) { glDeleteBuffers(1, &data->prerender_VBO); data->prerender_VBO = 0; } if (data->prerender_IBO) { glDeleteBuffers(1, &data->prerender_IBO); data->prerender_IBO = 0; } } void ProgramShaderCache::DrawPrerenderArray(const SHADER& shader, u32 primitive_type) { // This is called on a worker thread, so we don't want to use the normal binding process. glUseProgram(shader.glprogid); // The number of primitives drawn depends on the type. switch (primitive_type) { case PRIMITIVE_POINTS: glDrawElements(GL_POINTS, 1, GL_UNSIGNED_SHORT, nullptr); break; case PRIMITIVE_LINES: glDrawElements(GL_LINES, 2, GL_UNSIGNED_SHORT, nullptr); break; case PRIMITIVE_TRIANGLES: glDrawElements(GL_TRIANGLES, 3, GL_UNSIGNED_SHORT, nullptr); break; } // Has to be finished by the time the main thread picks it up. GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); glDeleteSync(sync); } } // namespace OGL