OGL: Re-implement async shader compiling
This commit is contained in:
parent
dec0c3bce8
commit
f9c829c7f7
|
@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
|
||||
g_Config.backend_info.bSupportsBPTCTextures = false;
|
||||
g_Config.backend_info.bSupportsFramebufferFetch = false;
|
||||
g_Config.backend_info.bSupportsBackgroundCompiling = true;
|
||||
|
||||
IDXGIFactory2* factory;
|
||||
IDXGIAdapter* ad;
|
||||
|
|
|
@ -46,6 +46,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsST3CTextures = false;
|
||||
g_Config.backend_info.bSupportsBPTCTextures = false;
|
||||
g_Config.backend_info.bSupportsFramebufferFetch = false;
|
||||
g_Config.backend_info.bSupportsBackgroundCompiling = false;
|
||||
|
||||
// aamodes: We only support 1 sample, so no MSAA
|
||||
g_Config.backend_info.Adapters.clear();
|
||||
|
|
|
@ -46,10 +46,11 @@ OGLPipeline::~OGLPipeline()
|
|||
|
||||
std::unique_ptr<OGLPipeline> OGLPipeline::Create(const AbstractPipelineConfig& config)
|
||||
{
|
||||
const PipelineProgram* program =
|
||||
ProgramShaderCache::GetPipelineProgram(static_cast<const OGLShader*>(config.vertex_shader),
|
||||
static_cast<const OGLShader*>(config.geometry_shader),
|
||||
static_cast<const OGLShader*>(config.pixel_shader));
|
||||
const PipelineProgram* program = ProgramShaderCache::GetPipelineProgram(
|
||||
static_cast<const GLVertexFormat*>(config.vertex_format),
|
||||
static_cast<const OGLShader*>(config.vertex_shader),
|
||||
static_cast<const OGLShader*>(config.geometry_shader),
|
||||
static_cast<const OGLShader*>(config.pixel_shader));
|
||||
if (!program)
|
||||
return nullptr;
|
||||
|
||||
|
|
|
@ -51,9 +51,10 @@ static std::unique_ptr<StreamBuffer> s_buffer;
|
|||
static int num_failures = 0;
|
||||
|
||||
static GLuint CurrentProgram = 0;
|
||||
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::pipelineprograms;
|
||||
std::mutex ProgramShaderCache::pipelineprogramlock;
|
||||
ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs;
|
||||
std::mutex ProgramShaderCache::s_pipeline_program_lock;
|
||||
static std::string s_glsl_header = "";
|
||||
static thread_local bool s_is_shared_context = false;
|
||||
|
||||
static std::string GetGLSLVersionString()
|
||||
{
|
||||
|
@ -506,8 +507,8 @@ void ProgramShaderCache::Shutdown()
|
|||
s_last_VAO = 0;
|
||||
|
||||
// All pipeline programs should have been released.
|
||||
_dbg_assert_(VIDEO, pipelineprograms.empty());
|
||||
pipelineprograms.clear();
|
||||
_dbg_assert_(VIDEO, s_pipeline_programs.empty());
|
||||
s_pipeline_programs.clear();
|
||||
}
|
||||
|
||||
void ProgramShaderCache::CreateAttributelessVAO()
|
||||
|
@ -548,21 +549,28 @@ void ProgramShaderCache::InvalidateLastProgram()
|
|||
CurrentProgram = 0;
|
||||
}
|
||||
|
||||
const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* vertex_shader,
|
||||
const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexFormat* vertex_format,
|
||||
const OGLShader* vertex_shader,
|
||||
const OGLShader* geometry_shader,
|
||||
const OGLShader* pixel_shader)
|
||||
{
|
||||
PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader};
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(pipelineprogramlock);
|
||||
auto iter = pipelineprograms.find(key);
|
||||
if (iter != pipelineprograms.end())
|
||||
std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
|
||||
auto iter = s_pipeline_programs.find(key);
|
||||
if (iter != s_pipeline_programs.end())
|
||||
{
|
||||
iter->second->reference_count++;
|
||||
return iter->second.get();
|
||||
}
|
||||
}
|
||||
|
||||
// We temporarily change the vertex array to the pipeline's vertex format.
|
||||
// This can prevent the NVIDIA OpenGL driver from recompiling on first use.
|
||||
GLuint vao = vertex_format ? vertex_format->VAO : s_attributeless_VAO;
|
||||
if (s_is_shared_context || vao != s_last_VAO)
|
||||
glBindVertexArray(vao);
|
||||
|
||||
std::unique_ptr<PipelineProgram> prog = std::make_unique<PipelineProgram>();
|
||||
prog->key = key;
|
||||
|
||||
|
@ -581,6 +589,11 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
|
|||
// Link program.
|
||||
prog->shader.SetProgramBindings(false);
|
||||
glLinkProgram(prog->shader.glprogid);
|
||||
|
||||
// Restore VAO binding after linking.
|
||||
if (!s_is_shared_context && vao != s_last_VAO)
|
||||
glBindVertexArray(s_last_VAO);
|
||||
|
||||
if (!ProgramShaderCache::CheckProgramLinkResult(prog->shader.glprogid, {}, {}, {}))
|
||||
{
|
||||
prog->shader.Destroy();
|
||||
|
@ -588,9 +601,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
|
|||
}
|
||||
|
||||
// Lock to insert. A duplicate program may have been created in the meantime.
|
||||
std::lock_guard<std::mutex> guard(pipelineprogramlock);
|
||||
auto iter = pipelineprograms.find(key);
|
||||
if (iter != pipelineprograms.end())
|
||||
std::lock_guard<std::mutex> guard(s_pipeline_program_lock);
|
||||
auto iter = s_pipeline_programs.find(key);
|
||||
if (iter != s_pipeline_programs.end())
|
||||
{
|
||||
// Destroy this program, and use the one which was created first.
|
||||
prog->shader.Destroy();
|
||||
|
@ -601,19 +614,25 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const OGLShader* v
|
|||
// Set program variables on the shader which will be returned.
|
||||
// This is only needed for drivers which don't support binding layout.
|
||||
prog->shader.SetProgramVariables();
|
||||
auto ip = pipelineprograms.emplace(key, std::move(prog));
|
||||
|
||||
// If this is a shared context, ensure we sync before we return the program to
|
||||
// the main thread. If we don't do this, some driver can lock up (e.g. AMD).
|
||||
if (s_is_shared_context)
|
||||
glFinish();
|
||||
|
||||
auto ip = s_pipeline_programs.emplace(key, std::move(prog));
|
||||
return ip.first->second.get();
|
||||
}
|
||||
|
||||
void ProgramShaderCache::ReleasePipelineProgram(const PipelineProgram* prog)
|
||||
{
|
||||
auto iter = pipelineprograms.find(prog->key);
|
||||
_assert_(iter != pipelineprograms.end() && prog == iter->second.get());
|
||||
auto iter = s_pipeline_programs.find(prog->key);
|
||||
_assert_(iter != s_pipeline_programs.end() && prog == iter->second.get());
|
||||
|
||||
if (--iter->second->reference_count == 0)
|
||||
{
|
||||
iter->second->shader.Destroy();
|
||||
pipelineprograms.erase(iter);
|
||||
s_pipeline_programs.erase(iter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -783,4 +802,55 @@ void ProgramShaderCache::CreateHeader()
|
|||
v > GlslEs300 ? "precision highp sampler2DMS;" : "",
|
||||
v >= GlslEs310 ? "precision highp image2DArray;" : "");
|
||||
}
|
||||
|
||||
bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
|
||||
{
|
||||
std::unique_ptr<cInterfaceBase> context = GLInterface->CreateSharedContext();
|
||||
if (!context)
|
||||
{
|
||||
PanicAlert("Failed to create shared context for shader compiling.");
|
||||
return false;
|
||||
}
|
||||
|
||||
*param = context.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SharedContextAsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
|
||||
{
|
||||
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
|
||||
if (!context->MakeCurrent())
|
||||
return false;
|
||||
|
||||
s_is_shared_context = true;
|
||||
if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart)
|
||||
{
|
||||
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
|
||||
{
|
||||
glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (GLExtensions::Version() >= 310)
|
||||
{
|
||||
glEnable(GL_PRIMITIVE_RESTART);
|
||||
glPrimitiveRestartIndex(65535);
|
||||
}
|
||||
else
|
||||
{
|
||||
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
|
||||
glPrimitiveRestartIndexNV(65535);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SharedContextAsyncShaderCompiler::WorkerThreadExit(void* param)
|
||||
{
|
||||
cInterfaceBase* context = static_cast<cInterfaceBase*>(param);
|
||||
context->ClearCurrent();
|
||||
delete context;
|
||||
}
|
||||
} // namespace OGL
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <unordered_map>
|
||||
|
||||
#include "Common/GL/GLUtil.h"
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
|
||||
namespace OGL
|
||||
{
|
||||
|
@ -87,7 +88,8 @@ public:
|
|||
static void Shutdown();
|
||||
static void CreateHeader();
|
||||
|
||||
static const PipelineProgram* GetPipelineProgram(const OGLShader* vertex_shader,
|
||||
static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format,
|
||||
const OGLShader* vertex_shader,
|
||||
const OGLShader* geometry_shader,
|
||||
const OGLShader* pixel_shader);
|
||||
static void ReleasePipelineProgram(const PipelineProgram* prog);
|
||||
|
@ -99,8 +101,8 @@ private:
|
|||
|
||||
static void CreateAttributelessVAO();
|
||||
|
||||
static PipelineProgramMap pipelineprograms;
|
||||
static std::mutex pipelineprogramlock;
|
||||
static PipelineProgramMap s_pipeline_programs;
|
||||
static std::mutex s_pipeline_program_lock;
|
||||
|
||||
static u32 s_ubo_buffer_size;
|
||||
static s32 s_ubo_align;
|
||||
|
@ -110,4 +112,12 @@ private:
|
|||
static GLuint s_last_VAO;
|
||||
};
|
||||
|
||||
class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
|
||||
{
|
||||
protected:
|
||||
bool WorkerThreadInitMainThread(void** param) override;
|
||||
bool WorkerThreadInitWorkerThread(void* param) override;
|
||||
void WorkerThreadExit(void* param) override;
|
||||
};
|
||||
|
||||
} // namespace OGL
|
||||
|
|
|
@ -81,8 +81,8 @@ static bool s_efbCacheIsCleared = false;
|
|||
static std::vector<u32>
|
||||
s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor
|
||||
|
||||
static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity,
|
||||
GLsizei length, const char* message, const void* userParam)
|
||||
void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
|
||||
const char* message, const void* userParam)
|
||||
{
|
||||
const char* s_source;
|
||||
const char* s_type;
|
||||
|
@ -677,6 +677,10 @@ Renderer::Renderer()
|
|||
g_Config.backend_info.bSupportsPaletteConversion &&
|
||||
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
|
||||
|
||||
// Background compiling is supported only when shared contexts aren't broken.
|
||||
g_Config.backend_info.bSupportsBackgroundCompiling =
|
||||
!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION);
|
||||
|
||||
if (g_ogl_config.bSupportsDebug)
|
||||
{
|
||||
if (GLExtensions::Supports("GL_KHR_debug"))
|
||||
|
@ -1695,4 +1699,9 @@ void Renderer::DispatchComputeShader(const AbstractShader* shader, const void* u
|
|||
glDispatchCompute(groups_x, groups_y, groups_z);
|
||||
ProgramShaderCache::InvalidateLastProgram();
|
||||
}
|
||||
|
||||
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
|
||||
{
|
||||
return std::make_unique<SharedContextAsyncShaderCompiler>();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -139,6 +139,8 @@ public:
|
|||
void DispatchComputeShader(const AbstractShader* shader, const void* uniforms, u32 uniforms_size,
|
||||
u32 groups_x, u32 groups_y, u32 groups_z) override;
|
||||
|
||||
std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler() override;
|
||||
|
||||
private:
|
||||
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc,
|
||||
const TargetRectangle& targetPixelRc, const void* data);
|
||||
|
|
|
@ -73,6 +73,7 @@ void VideoSoftware::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsBPTCTextures = false;
|
||||
g_Config.backend_info.bSupportsCopyToVram = false;
|
||||
g_Config.backend_info.bSupportsFramebufferFetch = false;
|
||||
g_Config.backend_info.bSupportsBackgroundCompiling = false;
|
||||
|
||||
// aamodes
|
||||
g_Config.backend_info.AAModes = {1};
|
||||
|
|
|
@ -235,6 +235,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
|
|||
config->backend_info.bSupportsBitfield = true; // Assumed support.
|
||||
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
|
||||
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
|
||||
config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support.
|
||||
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
|
||||
config->backend_info.bSupportsGeometryShaders = false; // Dependent on features.
|
||||
config->backend_info.bSupportsGSInstancing = false; // Dependent on features.
|
||||
|
|
|
@ -102,8 +102,6 @@ static BugInfo m_known_bugs[] = {
|
|||
true},
|
||||
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN,
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
|
||||
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN,
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
|
||||
{API_VULKAN, OS_ALL, VENDOR_NVIDIA, DRIVER_NVIDIA, Family::UNKNOWN, BUG_BROKEN_MSAA_CLEAR, -1.0,
|
||||
-1.0, true},
|
||||
{API_VULKAN, OS_ALL, VENDOR_IMGTEC, DRIVER_IMGTEC, Family::UNKNOWN,
|
||||
|
|
|
@ -252,8 +252,10 @@ enum Bug
|
|||
// the negated value to a temporary variable then using that in the bitwise op.
|
||||
BUG_BROKEN_BITWISE_OP_NEGATION,
|
||||
|
||||
// Bug: Shaders are recompiled on the main thread after being previously compiled on
|
||||
// a worker thread on Mesa i965.
|
||||
// BUG: The GPU shader code appears to be context-specific on Mesa/i965.
|
||||
// This means that if we compiled the ubershaders asynchronously, they will be recompiled
|
||||
// on the main thread the first time they are used, causing stutter. For now, disable
|
||||
// asynchronous compilation on Mesa i965.
|
||||
// Started version: -1
|
||||
// Ended Version: -1
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION,
|
||||
|
|
|
@ -1027,3 +1027,8 @@ bool Renderer::UseVertexDepthRange() const
|
|||
// in the vertex shader.
|
||||
return fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f;
|
||||
}
|
||||
|
||||
std::unique_ptr<VideoCommon::AsyncShaderCompiler> Renderer::CreateAsyncShaderCompiler()
|
||||
{
|
||||
return std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "Common/Flag.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "VideoCommon/AVIDump.h"
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/BPMemory.h"
|
||||
#include "VideoCommon/FPSCounter.h"
|
||||
#include "VideoCommon/RenderState.h"
|
||||
|
@ -189,6 +190,8 @@ public:
|
|||
void ResizeSurface(int new_width, int new_height);
|
||||
bool UseVertexDepthRange() const;
|
||||
|
||||
virtual std::unique_ptr<VideoCommon::AsyncShaderCompiler> CreateAsyncShaderCompiler();
|
||||
|
||||
virtual void Shutdown();
|
||||
|
||||
// Drawing utility shaders.
|
||||
|
|
|
@ -27,7 +27,7 @@ bool ShaderCache::Initialize()
|
|||
m_efb_multisamples = g_ActiveConfig.iMultisamples;
|
||||
|
||||
// Create the async compiler, and start the worker threads.
|
||||
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler();
|
||||
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
|
||||
|
||||
// Load shader and UID caches.
|
||||
|
|
|
@ -187,8 +187,7 @@ static u32 GetNumAutoShaderCompilerThreads()
|
|||
|
||||
u32 VideoConfig::GetShaderCompilerThreads() const
|
||||
{
|
||||
// videocommon shader cache is currently broken on OGL, needs multiple contexts.
|
||||
if (backend_info.api_type == APIType::OpenGL)
|
||||
if (!backend_info.bSupportsBackgroundCompiling)
|
||||
return 0;
|
||||
|
||||
if (iShaderCompilerThreads >= 0)
|
||||
|
@ -199,8 +198,7 @@ u32 VideoConfig::GetShaderCompilerThreads() const
|
|||
|
||||
u32 VideoConfig::GetShaderPrecompilerThreads() const
|
||||
{
|
||||
// videocommon shader cache is currently broken on OGL, needs multiple contexts.
|
||||
if (backend_info.api_type == APIType::OpenGL)
|
||||
if (!backend_info.bSupportsBackgroundCompiling)
|
||||
return 0;
|
||||
|
||||
if (iShaderPrecompilerThreads >= 0)
|
||||
|
|
|
@ -227,6 +227,7 @@ struct VideoConfig final
|
|||
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
|
||||
bool bSupportsBPTCTextures;
|
||||
bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES
|
||||
bool bSupportsBackgroundCompiling;
|
||||
} backend_info;
|
||||
|
||||
// Utility
|
||||
|
|
Loading…
Reference in New Issue