VideoBackends: Support a different number of threads for precompiling
At runtime, we only really want a single shader compiler thread. However, for initial boots, we can use a higher number to speed things up.
This commit is contained in:
parent
e17efb1d8d
commit
c8f31656cb
|
@ -85,6 +85,8 @@ const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS{
|
|||
{System::GFX, "Settings", "PrecompileUberShaders"}, true};
|
||||
const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS{
|
||||
{System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
|
||||
const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{
|
||||
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
|
||||
const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS{
|
||||
{System::GFX, "Settings", "ForceVertexUberShaders"}, false};
|
||||
const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS{
|
||||
|
|
|
@ -63,6 +63,7 @@ extern const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING;
|
|||
extern const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS;
|
||||
extern const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS;
|
||||
extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS;
|
||||
extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||
extern const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS;
|
||||
extern const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS;
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
|
|||
Config::GFX_BACKGROUND_SHADER_COMPILING.location,
|
||||
Config::GFX_DISABLE_SPECIALIZED_SHADERS.location,
|
||||
Config::GFX_PRECOMPILE_UBER_SHADERS.location, Config::GFX_SHADER_COMPILER_THREADS.location,
|
||||
Config::GFX_SHADER_PRECOMPILER_THREADS.location,
|
||||
Config::GFX_FORCE_VERTEX_UBER_SHADERS.location, Config::GFX_FORCE_PIXEL_UBER_SHADERS.location,
|
||||
|
||||
Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location,
|
||||
|
|
|
@ -513,7 +513,7 @@ void PixelShaderCache::Init()
|
|||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileUberShaders();
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void PixelShaderCache::LoadShaderCache()
|
||||
|
@ -538,7 +538,7 @@ void PixelShaderCache::Reload()
|
|||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileUberShaders();
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
// ONLY to be used during shutdown.
|
||||
|
@ -757,7 +757,7 @@ bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid,
|
|||
return (shader != nullptr);
|
||||
}
|
||||
|
||||
void PixelShaderCache::PrecompileUberShaders()
|
||||
void PixelShaderCache::QueueUberShaderCompiles()
|
||||
{
|
||||
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) {
|
||||
if (UberPixelShaders.find(uid) != UberPixelShaders.end())
|
||||
|
|
|
@ -28,7 +28,7 @@ public:
|
|||
static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len);
|
||||
static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader);
|
||||
static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader);
|
||||
static void PrecompileUberShaders();
|
||||
static void QueueUberShaderCompiles();
|
||||
|
||||
static ID3D11Buffer* GetConstantBuffer();
|
||||
|
||||
|
|
|
@ -174,11 +174,12 @@ void VertexShaderCache::Init()
|
|||
LoadShaderCache();
|
||||
|
||||
g_async_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
if (g_ActiveConfig.GetShaderCompilerThreads() > 0)
|
||||
g_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
|
||||
g_ActiveConfig.GetShaderPrecompilerThreads() :
|
||||
g_ActiveConfig.GetShaderCompilerThreads());
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileUberShaders();
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void VertexShaderCache::LoadShaderCache()
|
||||
|
@ -206,7 +207,7 @@ void VertexShaderCache::Reload()
|
|||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileUberShaders();
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void VertexShaderCache::Clear()
|
||||
|
@ -426,7 +427,7 @@ void VertexShaderCache::RetreiveAsyncShaders()
|
|||
g_async_compiler->RetrieveWorkItems();
|
||||
}
|
||||
|
||||
void VertexShaderCache::PrecompileUberShaders()
|
||||
void VertexShaderCache::QueueUberShaderCompiles()
|
||||
{
|
||||
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) {
|
||||
if (ubervshaders.find(uid) != ubervshaders.end())
|
||||
|
@ -435,13 +436,19 @@ void VertexShaderCache::PrecompileUberShaders()
|
|||
g_async_compiler->QueueWorkItem(
|
||||
g_async_compiler->CreateWorkItem<UberVertexShaderCompilerWorkItem>(uid));
|
||||
});
|
||||
}
|
||||
|
||||
void VertexShaderCache::WaitForBackgroundCompilesToComplete()
|
||||
{
|
||||
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
|
||||
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
|
||||
static_cast<int>(completed), static_cast<int>(total));
|
||||
});
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
Host_UpdateProgressDialog("", -1, -1);
|
||||
|
||||
// Switch from precompile -> runtime compiler threads.
|
||||
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
}
|
||||
|
||||
VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem(
|
||||
|
|
|
@ -27,7 +27,8 @@ public:
|
|||
static bool SetShader(D3DVertexFormat* vertex_format);
|
||||
static bool SetUberShader(D3DVertexFormat* vertex_format);
|
||||
static void RetreiveAsyncShaders();
|
||||
static void PrecompileUberShaders();
|
||||
static void QueueUberShaderCompiles();
|
||||
static void WaitForBackgroundCompilesToComplete();
|
||||
|
||||
static ID3D11Buffer*& GetConstantBuffer();
|
||||
|
||||
|
|
|
@ -161,6 +161,7 @@ void VideoBackend::Video_Prepare()
|
|||
VertexShaderCache::Init();
|
||||
PixelShaderCache::Init();
|
||||
GeometryShaderCache::Init();
|
||||
VertexShaderCache::WaitForBackgroundCompilesToComplete();
|
||||
D3D::InitUtils();
|
||||
BBox::Init();
|
||||
}
|
||||
|
|
|
@ -607,17 +607,8 @@ void ProgramShaderCache::Init()
|
|||
// on the main thread the first time they are used, causing stutter. Nouveau has been
|
||||
// reported to crash if draw calls are invoked on the shared context threads. For now,
|
||||
// disable asynchronous compilation on Mesa.
|
||||
if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION) &&
|
||||
g_ActiveConfig.GetShaderCompilerThreads() > 0)
|
||||
{
|
||||
if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION))
|
||||
s_async_compiler = std::make_unique<SharedContextAsyncShaderCompiler>();
|
||||
s_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
if (!s_async_compiler->HasWorkerThreads())
|
||||
{
|
||||
// No point using the async compiler without workers.
|
||||
s_async_compiler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Read our shader cache, only if supported and enabled
|
||||
if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache)
|
||||
|
@ -630,7 +621,19 @@ void ProgramShaderCache::Init()
|
|||
last_uber_entry = nullptr;
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
{
|
||||
if (s_async_compiler)
|
||||
s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
|
||||
PrecompileUberShaders();
|
||||
}
|
||||
|
||||
if (s_async_compiler)
|
||||
{
|
||||
// No point using the async compiler without workers.
|
||||
s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
if (!s_async_compiler->HasWorkerThreads())
|
||||
s_async_compiler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramShaderCache::RetrieveAsyncShaders()
|
||||
|
|
|
@ -63,9 +63,9 @@ bool ShaderCache::Initialize()
|
|||
return false;
|
||||
|
||||
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
if (g_ActiveConfig.GetShaderCompilerThreads() > 0)
|
||||
m_async_shader_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
|
||||
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
|
||||
g_ActiveConfig.GetShaderPrecompilerThreads() :
|
||||
g_ActiveConfig.GetShaderCompilerThreads());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1236,6 +1236,9 @@ void ShaderCache::PrecompileUberShaders()
|
|||
});
|
||||
|
||||
WaitForBackgroundCompilesToComplete();
|
||||
|
||||
// Switch to the runtime/background thread config.
|
||||
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
}
|
||||
|
||||
void ShaderCache::WaitForBackgroundCompilesToComplete()
|
||||
|
|
|
@ -106,8 +106,11 @@ void AsyncShaderCompiler::WaitUntilCompletion(
|
|||
}
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
|
||||
bool AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
|
||||
{
|
||||
if (num_worker_threads == 0)
|
||||
return true;
|
||||
|
||||
for (u32 i = 0; i < num_worker_threads; i++)
|
||||
{
|
||||
void* thread_param = nullptr;
|
||||
|
@ -131,6 +134,17 @@ void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
|
|||
|
||||
m_worker_threads.push_back(std::move(thr));
|
||||
}
|
||||
|
||||
return HasWorkerThreads();
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::ResizeWorkerThreads(u32 num_worker_threads)
|
||||
{
|
||||
if (m_worker_threads.size() == num_worker_threads)
|
||||
return true;
|
||||
|
||||
StopWorkerThreads();
|
||||
return StartWorkerThreads(num_worker_threads);
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::HasWorkerThreads() const
|
||||
|
@ -140,6 +154,9 @@ bool AsyncShaderCompiler::HasWorkerThreads() const
|
|||
|
||||
void AsyncShaderCompiler::StopWorkerThreads()
|
||||
{
|
||||
if (!HasWorkerThreads())
|
||||
return;
|
||||
|
||||
// Signal worker threads to stop, and wake all of them.
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_pending_work_lock);
|
||||
|
@ -151,6 +168,7 @@ void AsyncShaderCompiler::StopWorkerThreads()
|
|||
for (std::thread& thr : m_worker_threads)
|
||||
thr.join();
|
||||
m_worker_threads.clear();
|
||||
m_exit_flag.Clear();
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
|
||||
|
|
|
@ -52,7 +52,8 @@ public:
|
|||
void WaitUntilCompletion(const std::function<void(size_t, size_t)>& progress_callback);
|
||||
|
||||
// Needed because of calling virtual methods in shutdown procedure.
|
||||
void StartWorkerThreads(u32 num_worker_threads);
|
||||
bool StartWorkerThreads(u32 num_worker_threads);
|
||||
bool ResizeWorkerThreads(u32 num_worker_threads);
|
||||
bool HasWorkerThreads() const;
|
||||
void StopWorkerThreads();
|
||||
|
||||
|
|
|
@ -98,6 +98,7 @@ void VideoConfig::Refresh()
|
|||
bDisableSpecializedShaders = Config::Get(Config::GFX_DISABLE_SPECIALIZED_SHADERS);
|
||||
bPrecompileUberShaders = Config::Get(Config::GFX_PRECOMPILE_UBER_SHADERS);
|
||||
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
||||
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
||||
bForceVertexUberShaders = Config::Get(Config::GFX_FORCE_VERTEX_UBER_SHADERS);
|
||||
bForcePixelUberShaders = Config::Get(Config::GFX_FORCE_PIXEL_UBER_SHADERS);
|
||||
|
||||
|
@ -196,13 +197,26 @@ bool VideoConfig::IsVSync()
|
|||
return bVSync && !Core::GetIsThrottlerTempDisabled();
|
||||
}
|
||||
|
||||
static u32 GetNumAutoShaderCompilerThreads()
|
||||
{
|
||||
// Automatic number. We use clamp(cpus - 3, 1, 4).
|
||||
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
|
||||
}
|
||||
|
||||
u32 VideoConfig::GetShaderCompilerThreads() const
|
||||
{
|
||||
if (iShaderCompilerThreads >= 0)
|
||||
return static_cast<u32>(iShaderCompilerThreads);
|
||||
else
|
||||
return GetNumAutoShaderCompilerThreads();
|
||||
}
|
||||
|
||||
// Automatic number. We use clamp(cpus - 3, 1, 4).
|
||||
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
|
||||
u32 VideoConfig::GetShaderPrecompilerThreads() const
|
||||
{
|
||||
if (iShaderPrecompilerThreads >= 0)
|
||||
return static_cast<u32>(iShaderPrecompilerThreads);
|
||||
else
|
||||
return GetNumAutoShaderCompilerThreads();
|
||||
}
|
||||
|
||||
bool VideoConfig::CanUseUberShaders() const
|
||||
|
|
|
@ -192,6 +192,7 @@ struct VideoConfig final
|
|||
// 0 disables background compilation.
|
||||
// -1 uses an automatic number based on the CPU threads.
|
||||
int iShaderCompilerThreads;
|
||||
int iShaderPrecompilerThreads;
|
||||
|
||||
// Temporary toggling of ubershaders, for debugging
|
||||
bool bForceVertexUberShaders;
|
||||
|
@ -256,6 +257,7 @@ struct VideoConfig final
|
|||
}
|
||||
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; }
|
||||
u32 GetShaderCompilerThreads() const;
|
||||
u32 GetShaderPrecompilerThreads() const;
|
||||
bool CanUseUberShaders() const;
|
||||
bool CanPrecompileUberShaders() const;
|
||||
bool CanBackgroundCompileShaders() const;
|
||||
|
|
Loading…
Reference in New Issue