VideoBackends: Support a different number of threads for precompiling

At runtime, we only really want a single shader compiler thread.
However, for initial boots, we can use a higher number to speed things
up.
This commit is contained in:
Stenzek 2017-07-27 13:15:38 +10:00
parent e17efb1d8d
commit c8f31656cb
14 changed files with 81 additions and 27 deletions

View File

@ -85,6 +85,8 @@ const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS{
{System::GFX, "Settings", "PrecompileUberShaders"}, true};
const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS{
{System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS{
{System::GFX, "Settings", "ForceVertexUberShaders"}, false};
const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS{

View File

@ -63,6 +63,7 @@ extern const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING;
extern const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS;
extern const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS;
extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS;
extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS;
extern const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS;

View File

@ -46,6 +46,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
Config::GFX_BACKGROUND_SHADER_COMPILING.location,
Config::GFX_DISABLE_SPECIALIZED_SHADERS.location,
Config::GFX_PRECOMPILE_UBER_SHADERS.location, Config::GFX_SHADER_COMPILER_THREADS.location,
Config::GFX_SHADER_PRECOMPILER_THREADS.location,
Config::GFX_FORCE_VERTEX_UBER_SHADERS.location, Config::GFX_FORCE_PIXEL_UBER_SHADERS.location,
Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location,

View File

@ -513,7 +513,7 @@ void PixelShaderCache::Init()
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders();
QueueUberShaderCompiles();
}
void PixelShaderCache::LoadShaderCache()
@ -538,7 +538,7 @@ void PixelShaderCache::Reload()
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders();
QueueUberShaderCompiles();
}
// ONLY to be used during shutdown.
@ -757,7 +757,7 @@ bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid,
return (shader != nullptr);
}
void PixelShaderCache::PrecompileUberShaders()
void PixelShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) {
if (UberPixelShaders.find(uid) != UberPixelShaders.end())

View File

@ -28,7 +28,7 @@ public:
static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len);
static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader);
static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader);
static void PrecompileUberShaders();
static void QueueUberShaderCompiles();
static ID3D11Buffer* GetConstantBuffer();

View File

@ -174,11 +174,12 @@ void VertexShaderCache::Init()
LoadShaderCache();
g_async_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
if (g_ActiveConfig.GetShaderCompilerThreads() > 0)
g_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
g_ActiveConfig.GetShaderPrecompilerThreads() :
g_ActiveConfig.GetShaderCompilerThreads());
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders();
QueueUberShaderCompiles();
}
void VertexShaderCache::LoadShaderCache()
@ -206,7 +207,7 @@ void VertexShaderCache::Reload()
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders();
QueueUberShaderCompiles();
}
void VertexShaderCache::Clear()
@ -426,7 +427,7 @@ void VertexShaderCache::RetreiveAsyncShaders()
g_async_compiler->RetrieveWorkItems();
}
void VertexShaderCache::PrecompileUberShaders()
void VertexShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) {
if (ubervshaders.find(uid) != ubervshaders.end())
@ -435,13 +436,19 @@ void VertexShaderCache::PrecompileUberShaders()
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<UberVertexShaderCompilerWorkItem>(uid));
});
}
void VertexShaderCache::WaitForBackgroundCompilesToComplete()
{
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
g_async_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);
// Switch from precompile -> runtime compiler threads.
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
}
VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem(

View File

@ -27,7 +27,8 @@ public:
static bool SetShader(D3DVertexFormat* vertex_format);
static bool SetUberShader(D3DVertexFormat* vertex_format);
static void RetreiveAsyncShaders();
static void PrecompileUberShaders();
static void QueueUberShaderCompiles();
static void WaitForBackgroundCompilesToComplete();
static ID3D11Buffer*& GetConstantBuffer();

View File

@ -161,6 +161,7 @@ void VideoBackend::Video_Prepare()
VertexShaderCache::Init();
PixelShaderCache::Init();
GeometryShaderCache::Init();
VertexShaderCache::WaitForBackgroundCompilesToComplete();
D3D::InitUtils();
BBox::Init();
}

View File

@ -607,17 +607,8 @@ void ProgramShaderCache::Init()
// on the main thread the first time they are used, causing stutter. Nouveau has been
// reported to crash if draw calls are invoked on the shared context threads. For now,
// disable asynchronous compilation on Mesa.
if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION) &&
g_ActiveConfig.GetShaderCompilerThreads() > 0)
{
if (!DriverDetails::HasBug(DriverDetails::BUG_SHARED_CONTEXT_SHADER_COMPILATION))
s_async_compiler = std::make_unique<SharedContextAsyncShaderCompiler>();
s_async_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
if (!s_async_compiler->HasWorkerThreads())
{
// No point using the async compiler without workers.
s_async_compiler.reset();
}
}
// Read our shader cache, only if supported and enabled
if (g_ogl_config.bSupportsGLSLCache && g_ActiveConfig.bShaderCache)
@ -630,7 +621,19 @@ void ProgramShaderCache::Init()
last_uber_entry = nullptr;
if (g_ActiveConfig.CanPrecompileUberShaders())
{
if (s_async_compiler)
s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads());
PrecompileUberShaders();
}
if (s_async_compiler)
{
// No point using the async compiler without workers.
s_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
if (!s_async_compiler->HasWorkerThreads())
s_async_compiler.reset();
}
}
void ProgramShaderCache::RetrieveAsyncShaders()

View File

@ -63,9 +63,9 @@ bool ShaderCache::Initialize()
return false;
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
if (g_ActiveConfig.GetShaderCompilerThreads() > 0)
m_async_shader_compiler->StartWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
g_ActiveConfig.GetShaderPrecompilerThreads() :
g_ActiveConfig.GetShaderCompilerThreads());
return true;
}
@ -1236,6 +1236,9 @@ void ShaderCache::PrecompileUberShaders()
});
WaitForBackgroundCompilesToComplete();
// Switch to the runtime/background thread config.
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
}
void ShaderCache::WaitForBackgroundCompilesToComplete()

View File

@ -106,8 +106,11 @@ void AsyncShaderCompiler::WaitUntilCompletion(
}
}
void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
bool AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
{
if (num_worker_threads == 0)
return true;
for (u32 i = 0; i < num_worker_threads; i++)
{
void* thread_param = nullptr;
@ -131,6 +134,17 @@ void AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
m_worker_threads.push_back(std::move(thr));
}
return HasWorkerThreads();
}
bool AsyncShaderCompiler::ResizeWorkerThreads(u32 num_worker_threads)
{
if (m_worker_threads.size() == num_worker_threads)
return true;
StopWorkerThreads();
return StartWorkerThreads(num_worker_threads);
}
bool AsyncShaderCompiler::HasWorkerThreads() const
@ -140,6 +154,9 @@ bool AsyncShaderCompiler::HasWorkerThreads() const
void AsyncShaderCompiler::StopWorkerThreads()
{
if (!HasWorkerThreads())
return;
// Signal worker threads to stop, and wake all of them.
{
std::lock_guard<std::mutex> guard(m_pending_work_lock);
@ -151,6 +168,7 @@ void AsyncShaderCompiler::StopWorkerThreads()
for (std::thread& thr : m_worker_threads)
thr.join();
m_worker_threads.clear();
m_exit_flag.Clear();
}
bool AsyncShaderCompiler::WorkerThreadInitMainThread(void** param)

View File

@ -52,7 +52,8 @@ public:
void WaitUntilCompletion(const std::function<void(size_t, size_t)>& progress_callback);
// Needed because of calling virtual methods in shutdown procedure.
void StartWorkerThreads(u32 num_worker_threads);
bool StartWorkerThreads(u32 num_worker_threads);
bool ResizeWorkerThreads(u32 num_worker_threads);
bool HasWorkerThreads() const;
void StopWorkerThreads();

View File

@ -98,6 +98,7 @@ void VideoConfig::Refresh()
bDisableSpecializedShaders = Config::Get(Config::GFX_DISABLE_SPECIALIZED_SHADERS);
bPrecompileUberShaders = Config::Get(Config::GFX_PRECOMPILE_UBER_SHADERS);
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
bForceVertexUberShaders = Config::Get(Config::GFX_FORCE_VERTEX_UBER_SHADERS);
bForcePixelUberShaders = Config::Get(Config::GFX_FORCE_PIXEL_UBER_SHADERS);
@ -196,13 +197,26 @@ bool VideoConfig::IsVSync()
return bVSync && !Core::GetIsThrottlerTempDisabled();
}
static u32 GetNumAutoShaderCompilerThreads()
{
// Automatic number. We use clamp(cpus - 3, 1, 4).
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
}
u32 VideoConfig::GetShaderCompilerThreads() const
{
if (iShaderCompilerThreads >= 0)
return static_cast<u32>(iShaderCompilerThreads);
else
return GetNumAutoShaderCompilerThreads();
}
// Automatic number. We use clamp(cpus - 3, 1, 4).
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
u32 VideoConfig::GetShaderPrecompilerThreads() const
{
if (iShaderPrecompilerThreads >= 0)
return static_cast<u32>(iShaderPrecompilerThreads);
else
return GetNumAutoShaderCompilerThreads();
}
bool VideoConfig::CanUseUberShaders() const

View File

@ -192,6 +192,7 @@ struct VideoConfig final
// 0 disables background compilation.
// -1 uses an automatic number based on the CPU threads.
int iShaderCompilerThreads;
int iShaderPrecompilerThreads;
// Temporary toggling of ubershaders, for debugging
bool bForceVertexUberShaders;
@ -256,6 +257,7 @@ struct VideoConfig final
}
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; }
u32 GetShaderCompilerThreads() const;
u32 GetShaderPrecompilerThreads() const;
bool CanUseUberShaders() const;
bool CanPrecompileUberShaders() const;
bool CanBackgroundCompileShaders() const;