OGL: Re-implement async shader compiling

This commit is contained in:
Stenzek 2018-02-25 17:56:09 +10:00
parent dec0c3bce8
commit f9c829c7f7
16 changed files with 136 additions and 33 deletions

View File

@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = true;
IDXGIFactory2* factory; IDXGIFactory2* factory;
IDXGIAdapter* ad; IDXGIAdapter* ad;

View File

@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
// aamodes: We only support 1 sample, so no MSAA // aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear(); g_Config.backend_info.Adapters.clear();

View File

@ -46,10 +46,11 @@ OGLPipeline::~OGLPipeline()
std::unique_ptr<OGLPipeline> OGLPipeline::Create(const AbstractPipelineConfig& config) std::unique_ptr<OGLPipeline> OGLPipeline::Create(const AbstractPipelineConfig& config)
{ {
const PipelineProgram* program = const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram(
ProgramShaderCache::GetPipelineProgram(static_cast<const OGLShader*>(config.vertex_shader), static_cast<const GLVertexFormat*>(config.vertex_format),
static_cast<const OGLShader*>(config.geometry_shader), static_cast<const OGLShader*>(config.vertex_shader),
static_cast<const OGLShader*>(config.pixel_shader)); static_cast<const OGLShader*>(config.geometry_shader),
static_cast<const OGLShader*>(config.pixel_shader));
if (!program) if (!program)
return nullptr; return nullptr;

View File

@ -51,9 +51,10 @@ static std::unique_ptr<StreamBuffer> s_buffer;
static int num_failures = 0; static int num_failures = 0;
static GLuint CurrentProgram = 0; static GLuint CurrentProgram = 0;
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::pipelineprograms; ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs;
std::mutex ProgramShaderCache::pipelineprogramlock; std::mutex ProgramShaderCache::s_pipeline_program_lock;
static std::string s_glsl_header = ""; static std::string s_glsl_header = "";
static thread_local bool s_is_shared_context = false;
static std::string GetGLSLVersionString() static std::string GetGLSLVersionString()
{ {
@ -506,8 +507,8 @@ void ProgramShaderCache::Shutdown()
s_last_VAO = 0; s_last_VAO = 0;
// All pipeline programs should have been released. // All pipeline programs should have been released.
_dbg_assert_(VIDEO, pipelineprograms.empty()); _dbg_assert_(VIDEO, s_pipeline_programs.empty());
pipelineprograms.clear(); s_pipeline_programs.clear();
} }
void ProgramShaderCache::CreateAttributelessVAO() void ProgramShaderCache::CreateAttributelessVAO()
@ -548,21 +549,28 @@ void ProgramShaderCache::InvalidateLastProgram()
CurrentProgram = 0; CurrentProgram = 0;
} }
const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* vertex_shader, const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader, const OGLShader* geometry_shader,
const OGLShader* pixel_shader) const OGLShader* pixel_shader)
{ {
PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader}; PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader};
{ {
std::lock_guard<std::mutex> guard(pipelineprogramlock); std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
auto iter = pipelineprograms.find(key); auto iter = s_pipeline_programs.find(key);
if (iter != pipelineprograms.end()) if (iter != s_pipeline_programs.end())
{ {
iter->second->reference_count++; iter->second->reference_count++;
return iter->second.get(); return iter->second.get();
} }
} }
// We temporarily change the vertex array to the pipeline's vertex format.
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
if (s_is_shared_context || vao != s_last_VAO)
glBindVertexArray(vao);
std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>(); std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
prog->key = key; prog->key = key;
@ -581,6 +589,11 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
// Link program. // Link program.
prog->shader.SetProgramBindings(false); prog->shader.SetProgramBindings(false);
glLinkProgram(prog->shader.glprogid); glLinkProgram(prog->shader.glprogid);
// Restore VAO binding after linking.
if (!s_is_shared_context && vao != s_last_VAO)
glBindVertexArray(s_last_VAO);
if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {})) if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {}))
{ {
prog->shader.Destroy(); prog->shader.Destroy();
@ -588,9 +601,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
} }
// Lock to insert. A duplicate program may have been created in the meantime. // Lock to insert. A duplicate program may have been created in the meantime.
std::lock_guard<std::mutex> guard(pipelineprogramlock); std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
auto iter = pipelineprograms.find(key); auto iter = s_pipeline_programs.find(key);
if (iter != pipelineprograms.end()) if (iter != s_pipeline_programs.end())
{ {
// Destroy this program, and use the one which was created first. // Destroy this program, and use the one which was created first.
prog->shader.Destroy(); prog->shader.Destroy();
@ -601,19 +614,25 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
// Set program variables on the shader which will be returned. // Set program variables on the shader which will be returned.
// This is only needed for drivers which don't support binding layout. // This is only needed for drivers which don't support binding layout.
prog->shader.SetProgramVariables(); prog->shader.SetProgramVariables();
auto ip = pipelineprograms.emplace(key, std::move(prog));
// If this is a shared context, ensure we sync before we return the program to
// the main thread. If we don't do this, some driver can lock up (e.g. AMD).
if (s_is_shared_context)
glFinish();
auto ip = s_pipeline_programs.emplace(key, std::move(prog));
return ip.first->second.get(); return ip.first->second.get();
} }
void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog) void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog)
{ {
auto iter = pipelineprograms.find(prog->key); auto iter = s_pipeline_programs.find(prog->key);
_assert_(iter != pipelineprograms.end() && prog == iter->second.get()); _assert_(iter != s_pipeline_programs.end() && prog == iter->second.get());
if (--iter->second->reference_count == 0) if (--iter->second->reference_count == 0)
{ {
iter->second->shader.Destroy(); iter->second->shader.Destroy();
pipelineprograms.erase(iter); s_pipeline_programs.erase(iter);
} }
} }
@ -783,4 +802,55 @@ void ProgramShaderCache::CreateHeader()
v > GlslEs300 ? "precision highp sampler2DMS;" : "", v > GlslEs300 ? "precision highp sampler2DMS;" : "",
v >= GlslEs310 ? "precision highp image2DArray;" : ""); v >= GlslEs310 ? "precision highp image2DArray;" : "");
} }
bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
{
std::unique_ptr<cInterfaceBase> context = GLInterface->CreateSharedContext();
if (!context)
{
PanicAlert("Failed to create shared context for shader compiling.");
return false;
}
*param = context.release();
return true;
}
bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
{
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
if (!context->MakeCurrent())
return false;
s_is_shared_context = true;
if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
{
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{
glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
}
else
{
if (GLExtensions::Version() >= 310)
{
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(65535);
}
else
{
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
glPrimitiveRestartIndexNV(65535);
}
}
}
return true;
}
void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param)
{
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
context->ClearCurrent();
delete context;
}
} // namespace OGL } // namespace OGL

View File

@ -11,6 +11,7 @@
#include <unordered_map> #include <unordered_map>
#include "Common/GL/GLUtil.h" #include "Common/GL/GLUtil.h"
#include "VideoCommon/AsyncShaderCompiler.h"
namespace OGL namespace OGL
{ {
@ -87,7 +88,8 @@ public:
static void Shutdown(); static void Shutdown();
static void CreateHeader(); static void CreateHeader();
static const PipelineProgram* GetPipelineProgram(const OGLShader* vertex_shader, static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
const OGLShader* vertex_shader,
const OGLShader* geometry_shader, const OGLShader* geometry_shader,
const OGLShader* pixel_shader); const OGLShader* pixel_shader);
static void ReleasePipelineProgram(const PipelineProgram* prog); static void ReleasePipelineProgram(const PipelineProgram* prog);
@ -99,8 +101,8 @@ private:
static void CreateAttributelessVAO(); static void CreateAttributelessVAO();
static PipelineProgramMap pipelineprograms; static PipelineProgramMap s_pipeline_programs;
static std::mutex pipelineprogramlock; static std::mutex s_pipeline_program_lock;
static u32 s_ubo_buffer_size; static u32 s_ubo_buffer_size;
static s32 s_ubo_align; static s32 s_ubo_align;
@ -110,4 +112,12 @@ private:
static GLuint s_last_VAO; static GLuint s_last_VAO;
}; };
class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
{
protected:
bool WorkerThreadInitMainThread(void** param) override;
bool WorkerThreadInitWorkerThread(void* param) override;
void WorkerThreadExit(void* param) override;
};
} // namespace OGL } // namespace OGL

View File

@ -81,8 +81,8 @@ static bool s_efbCacheIsCleared = false;
static std::vector<u32> static std::vector<u32>
s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor
static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
GLsizei length, const char* message, const void* userParam) const char* message, const void* userParam)
{ {
const char* s_source; const char* s_source;
const char* s_type; const char* s_type;
@ -677,6 +677,10 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsPaletteConversion && g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore; g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
// Background compiling is supported only when shared contexts aren't broken.
g_Config.backend_info.bSupportsBackgroundCompiling =
!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION);
if (g_ogl_config.bSupportsDebug) if (g_ogl_config.bSupportsDebug)
{ {
if (GLExtensions::Supports("GL_KHR_debug")) if (GLExtensions::Supports("GL_KHR_debug"))
@ -1695,4 +1699,9 @@ void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* u
glDispatchCompute(groups_x, groups_y, groups_z); glDispatchCompute(groups_x, groups_y, groups_z);
ProgramShaderCache::InvalidateLastProgram(); ProgramShaderCache::InvalidateLastProgram();
} }
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<SharedContextAsyncShaderCompiler>();
}
} }

View File

@ -139,6 +139,8 @@ public:
void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size, void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size,
u32 groups_x, u32 groups_y, u32 groups_z) override; u32 groups_x, u32 groups_y, u32 groups_z) override;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;
private: private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
const TargetRectangle& targetPixelRc, const void* data); const TargetRectangle& targetPixelRc, const void* data);

View File

@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false;
g_Config.backend_info.bSupportsCopyToVram = false; g_Config.backend_info.bSupportsCopyToVram = false;
g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsFramebufferFetch = false;
g_Config.backend_info.bSupportsBackgroundCompiling = false;
// aamodes // aamodes
g_Config.backend_info.AAModes = {1}; g_Config.backend_info.AAModes = {1};

View File

@ -235,6 +235,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsBitfield = true; // Assumed support. config->backend_info.bSupportsBitfield = true; // Assumed support.
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support. config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = true; // Assumed support. config->backend_info.bSupportsPostProcessing = true; // Assumed support.
config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGeometryShaders = false; // Dependent on features.
config->backend_info.bSupportsGSInstancing = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features.

View File

@ -102,8 +102,6 @@ static BugInfo m_known_bugs[] = {
true}, true},
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN, {API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}, BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKEN_MSAA_CLEAR, -1.0, {API_VULKAN, OS_ALL, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKEN_MSAA_CLEAR, -1.0,
-1.0, true}, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN, {API_VULKAN, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN,

View File

@ -252,8 +252,10 @@ enum Bug
// the negated value to a temporary variable then using that in the bitwise op. // the negated value to a temporary variable then using that in the bitwise op.
BUG_BROKEN_BITWISE_OP_NEGATION, BUG_BROKEN_BITWISE_OP_NEGATION,
// Bug: Shaders are recompiled on the main thread after being previously compiled on // BUG: The GPU shader code appears to be context-specific on Mesa/i965.
// a worker thread on Mesa i965. // This means that if we compiled the ubershaders asynchronously, they will be recompiled
// on the main thread the first time they are used, causing stutter. For now, disable
// asynchronous compilation on Mesa i965.
// Started version: -1 // Started version: -1
// Ended Version: -1 // Ended Version: -1
BUG_SHARED_CONTEXT_SHADER_COMPILATION, BUG_SHARED_CONTEXT_SHADER_COMPILATION,

View File

@ -1027,3 +1027,8 @@ bool Renderer::UseVertexDepthRange() const
// in the vertex shader. // in the vertex shader.
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f; return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
} }
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
{
return std::make_unique<VideoCommon::AsyncShaderCompiler>();
}

View File

@ -28,6 +28,7 @@
#include "Common/Flag.h" #include "Common/Flag.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "VideoCommon/AVIDump.h" #include "VideoCommon/AVIDump.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
#include "VideoCommon/FPSCounter.h" #include "VideoCommon/FPSCounter.h"
#include "VideoCommon/RenderState.h" #include "VideoCommon/RenderState.h"
@ -189,6 +190,8 @@ public:
void ResizeSurface(int new_width, int new_height); void ResizeSurface(int new_width, int new_height);
bool UseVertexDepthRange() const; bool UseVertexDepthRange() const;
virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();
virtual void Shutdown(); virtual void Shutdown();
// Drawing utility shaders. // Drawing utility shaders.

View File

@ -27,7 +27,7 @@ bool ShaderCache::Initialize()
m_efb_multisamples = g_ActiveConfig.iMultisamples; m_efb_multisamples = g_ActiveConfig.iMultisamples;
// Create the async compiler, and start the worker threads. // Create the async compiler, and start the worker threads.
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>(); m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler();
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
// Load shader and UID caches. // Load shader and UID caches.

View File

@ -187,8 +187,7 @@ static u32 GetNumAutoShaderCompilerThreads()
u32 VideoConfig::GetShaderCompilerThreads() const u32 VideoConfig::GetShaderCompilerThreads() const
{ {
// videocommon shader cache is currently broken on OGL, needs multiple contexts. if (!backend_info.bSupportsBackgroundCompiling)
if (backend_info.api_type == APIType::OpenGL)
return 0; return 0;
if (iShaderCompilerThreads >= 0) if (iShaderCompilerThreads >= 0)
@ -199,8 +198,7 @@ u32 VideoConfig::GetShaderCompilerThreads() const
u32 VideoConfig::GetShaderPrecompilerThreads() const u32 VideoConfig::GetShaderPrecompilerThreads() const
{ {
// videocommon shader cache is currently broken on OGL, needs multiple contexts. if (!backend_info.bSupportsBackgroundCompiling)
if (backend_info.api_type == APIType::OpenGL)
return 0; return 0;
if (iShaderPrecompilerThreads >= 0) if (iShaderPrecompilerThreads >= 0)

View File

@ -227,6 +227,7 @@ struct VideoConfig final
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsBPTCTextures; bool bSupportsBPTCTextures;
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
bool bSupportsBackgroundCompiling;
} backend_info; } backend_info;
// Utility // Utility