Merge pull request #5702 from stenzek/ubershaders

Ubershaders 2.0
This commit is contained in:
Anthony 2017-07-30 01:00:16 -07:00 committed by GitHub
commit ba57605266
91 changed files with 5510 additions and 959 deletions

View File

@ -274,6 +274,10 @@ public final class SettingsAdapter extends RecyclerView.Adapter<SettingViewHolde
{
putXfbSetting(which);
}
else if (scSetting.getKey().equals(SettingsFile.KEY_UBERSHADER_MODE))
{
putUberShaderModeSetting(which);
}
else if (scSetting.getKey().equals(SettingsFile.KEY_WIIMOTE_EXTENSION))
{
putExtensionSetting(which, Character.getNumericValue(scSetting.getSection().charAt(scSetting.getSection().length() - 1)));
@ -437,6 +441,33 @@ public final class SettingsAdapter extends RecyclerView.Adapter<SettingViewHolde
mView.putSetting(xfbReal);
}
public void putUberShaderModeSetting(int which)
{
BooleanSetting disableSpecializedShaders = null;
BooleanSetting backgroundShaderCompilation = null;
switch (which)
{
case 0:
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
break;
case 1:
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
break;
case 2:
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
break;
}
mView.putSetting(disableSpecializedShaders);
mView.putSetting(backgroundShaderCompilation);
}
public void putExtensionSetting(int which, int wiimoteNumber)
{
StringSetting extension = new StringSetting(SettingsFile.KEY_WIIMOTE_EXTENSION, SettingsFile.SECTION_WIIMOTE + wiimoteNumber,

View File

@ -276,6 +276,8 @@ public final class SettingsFragmentPresenter
private void addEnhanceSettings(ArrayList<SettingsItem> sl)
{
int uberShaderModeValue = getUberShaderModeValue();
Setting resolution = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_INTERNAL_RES);
Setting fsaa = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_FSAA);
Setting anisotropic = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_ENHANCEMENTS).getSetting(SettingsFile.KEY_ANISOTROPY);
@ -283,6 +285,7 @@ public final class SettingsFragmentPresenter
Setting perPixel = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_PER_PIXEL);
Setting forceFilter = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_ENHANCEMENTS).getSetting(SettingsFile.KEY_FORCE_FILTERING);
Setting disableFog = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_FOG);
IntSetting uberShaderMode = new IntSetting(SettingsFile.KEY_UBERSHADER_MODE, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, uberShaderModeValue);
sl.add(new SingleChoiceSetting(SettingsFile.KEY_INTERNAL_RES, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.internal_resolution, R.string.internal_resolution_descrip, R.array.internalResolutionEntries, R.array.internalResolutionValues, 0, resolution));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_FSAA, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.FSAA, R.string.FSAA_descrip, R.array.FSAAEntries, R.array.FSAAValues, 0, fsaa));
@ -296,6 +299,7 @@ public final class SettingsFragmentPresenter
sl.add(new CheckBoxSetting(SettingsFile.KEY_PER_PIXEL, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.per_pixel_lighting, R.string.per_pixel_lighting_descrip, false, perPixel));
sl.add(new CheckBoxSetting(SettingsFile.KEY_FORCE_FILTERING, SettingsFile.SECTION_GFX_ENHANCEMENTS, SettingsFile.SETTINGS_GFX, R.string.force_texture_filtering, R.string.force_texture_filtering_descrip, false, forceFilter));
sl.add(new CheckBoxSetting(SettingsFile.KEY_DISABLE_FOG, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.disable_fog, R.string.disable_fog_descrip, false, disableFog));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_UBERSHADER_MODE, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.ubershader_mode, R.string.ubershader_mode_descrip, R.array.uberShaderModeEntries, R.array.uberShaderModeValues, 0, uberShaderMode));
/*
Check if we support stereo
@ -903,6 +907,29 @@ public final class SettingsFragmentPresenter
return xfbValue;
}
private int getUberShaderModeValue()
{
int uberShaderModeValue = 0;
try
{
boolean backgroundShaderCompiling = ((BooleanSetting) mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING)).getValue();
boolean disableSpecializedShaders = ((BooleanSetting) mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS)).getValue();
if (disableSpecializedShaders)
uberShaderModeValue = 2; // Exclusive
else if (backgroundShaderCompiling)
uberShaderModeValue = 1; // Hybrid
else
uberShaderModeValue = 0; // Disabled
}
catch (NullPointerException ex)
{
}
return uberShaderModeValue;
}
private int getExtensionValue(int wiimoteNumber)
{
int extensionValue;

View File

@ -79,6 +79,9 @@ public final class SettingsFile
public static final String KEY_XFB_REAL = "UseRealXFB";
public static final String KEY_FAST_DEPTH = "FastDepthCalc";
public static final String KEY_ASPECT_RATIO = "AspectRatio";
public static final String KEY_UBERSHADER_MODE = "UberShaderMode";
public static final String KEY_DISABLE_SPECIALIZED_SHADERS = "DisableSpecializedShaders";
public static final String KEY_BACKGROUND_SHADER_COMPILING = "BackgroundShaderCompiling";
public static final String KEY_GCPAD_TYPE = "SIDevice";

View File

@ -89,6 +89,18 @@
<item>2</item>
</integer-array>
<!-- Ubershader Mode Preference -->
<string-array name="uberShaderModeEntries" translatable="false">
<item>Disabled</item>
<item>Hybrid</item>
<item>Exclusive</item>
</string-array>
<integer-array name="uberShaderModeValues" translatable="false">
<item>0</item>
<item>1</item>
<item>2</item>
</integer-array>
<!-- Internal Resolution Preference -->
<string-array name="internalResolutionEntries" translatable="false">
<item>1x Native (640x528)</item>

View File

@ -179,6 +179,8 @@
<string name="fast_depth_calculation_descrip">Uses a less accurate algorithm to calculate depth values.</string>
<string name="aspect_ratio">Aspect Ratio</string>
<string name="aspect_ratio_descrip">Select what aspect ratio to use when rendering</string>
<string name="ubershader_mode">Ubershader Mode</string>
<string name="ubershader_mode_descrip">Specifies when to use Ubershaders. Disabled - Never, Hybrid - Use ubershaders while compiling specialized shaders. Exclusive - Use only ubershaders, largest performance impact.</string>
<!-- Miscellaneous -->
<string name="yes">Yes</string>

View File

@ -77,6 +77,20 @@ const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING{
{System::GFX, "Settings", "BackgroundShaderCompiling"}, false};
const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS{
{System::GFX, "Settings", "DisableSpecializedShaders"}, false};
const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS{
{System::GFX, "Settings", "PrecompileUberShaders"}, true};
const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS{
{System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS{
{System::GFX, "Settings", "ForceVertexUberShaders"}, false};
const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS{
{System::GFX, "Settings", "ForcePixelUberShaders"}, false};
const ConfigInfo<bool> GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true};
const ConfigInfo<bool> GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true};

View File

@ -59,6 +59,13 @@ extern const ConfigInfo<bool> GFX_ENABLE_VALIDATION_LAYER;
extern const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING;
extern const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL;
extern const ConfigInfo<bool> GFX_SHADER_CACHE;
extern const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING;
extern const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS;
extern const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS;
extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS;
extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS;
extern const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS;
extern const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS;
extern const ConfigInfo<bool> GFX_SW_ZCOMPLOC;
extern const ConfigInfo<bool> GFX_SW_ZFREEZE;

View File

@ -43,6 +43,11 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
Config::GFX_DISABLE_FOG.location, Config::GFX_BORDERLESS_FULLSCREEN.location,
Config::GFX_ENABLE_VALIDATION_LAYER.location, Config::GFX_BACKEND_MULTITHREADING.location,
Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL.location, Config::GFX_SHADER_CACHE.location,
Config::GFX_BACKGROUND_SHADER_COMPILING.location,
Config::GFX_DISABLE_SPECIALIZED_SHADERS.location,
Config::GFX_PRECOMPILE_UBER_SHADERS.location, Config::GFX_SHADER_COMPILER_THREADS.location,
Config::GFX_SHADER_PRECOMPILER_THREADS.location,
Config::GFX_FORCE_VERTEX_UBER_SHADERS.location, Config::GFX_FORCE_PIXEL_UBER_SHADERS.location,
Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location,
Config::GFX_SW_DUMP_OBJECTS.location, Config::GFX_SW_DUMP_TEV_STAGES.location,

View File

@ -341,6 +341,7 @@ static void CpuThread()
{
Common::SetCurrentThreadName("CPU-GPU thread");
g_video_backend->Video_Prepare();
Host_Message(WM_USER_CREATE);
}
// This needs to be delayed until after the video backend is ready.
@ -409,6 +410,7 @@ static void FifoPlayerThread()
else
{
g_video_backend->Video_Prepare();
Host_Message(WM_USER_CREATE);
Common::SetCurrentThreadName("FIFO-GPU thread");
}
@ -601,6 +603,7 @@ static void EmuThread(std::unique_ptr<BootParameters> boot)
Common::SetCurrentThreadName("Video thread");
g_video_backend->Video_Prepare();
Host_Message(WM_USER_CREATE);
// Spawn the CPU thread
s_cpu_thread = std::thread(cpuThreadFunc);

View File

@ -308,6 +308,14 @@ static wxString gpu_texture_decoding_desc =
wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
"performance gains in some scenarios, or on systems where the CPU is the "
"bottleneck.\n\nIf unsure, leave this unchecked.");
static wxString ubershader_desc =
wxTRANSLATE("Disabled: Ubershaders are never used. Stuttering will occur during shader "
"compilation, but GPU demands are low. Recommended for low-end hardware.\n\n"
"Hybrid: Ubershaders will be used to prevent stuttering during shader "
"compilation, but traditional shaders will be used when they will not cause "
"stuttering. Balances performance and smoothness.\n\n"
"Exclusive: Ubershaders will always be used. Only recommended for high-end "
"systems.");
VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
: wxDialog(parent, wxID_ANY, wxString::Format(_("Dolphin %s Graphics Configuration"),
@ -561,6 +569,29 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
row += 1;
}
// ubershaders
{
const std::array<wxString, 3> mode_choices = {{_("Disabled"), _("Hybrid"), _("Exclusive")}};
wxChoice* const choice_mode =
new wxChoice(page_enh, wxID_ANY, wxDefaultPosition, wxDefaultSize,
static_cast<int>(mode_choices.size()), mode_choices.data());
RegisterControl(choice_mode, wxGetTranslation(ubershader_desc));
szr_enh->Add(new wxStaticText(page_enh, wxID_ANY, _("Ubershaders:")), wxGBPosition(row, 0),
wxDefaultSpan, wxALIGN_CENTER_VERTICAL);
szr_enh->Add(choice_mode, wxGBPosition(row, 1), span2, wxALIGN_CENTER_VERTICAL);
row += 1;
// Determine ubershader mode
choice_mode->Bind(wxEVT_CHOICE, &VideoConfigDiag::OnUberShaderModeChanged, this);
if (Config::GetBase(Config::GFX_DISABLE_SPECIALIZED_SHADERS))
choice_mode->SetSelection(2);
else if (Config::GetBase(Config::GFX_BACKGROUND_SHADER_COMPILING))
choice_mode->SetSelection(1);
else
choice_mode->SetSelection(0);
}
// postproc shader
if (vconfig.backend_info.bSupportsPostProcessing)
{
@ -1326,3 +1357,13 @@ void VideoConfigDiag::OnAAChanged(wxCommandEvent& ev)
Config::SetBaseOrCurrent(Config::GFX_MSAA, vconfig.backend_info.AAModes[mode]);
}
void VideoConfigDiag::OnUberShaderModeChanged(wxCommandEvent& ev)
{
// 0: No ubershaders
// 1: Hybrid ubershaders
// 2: Only ubershaders
int mode = ev.GetInt();
Config::SetBaseOrCurrent(Config::GFX_BACKGROUND_SHADER_COMPILING, mode == 1);
Config::SetBaseOrCurrent(Config::GFX_DISABLE_SPECIALIZED_SHADERS, mode == 2);
}

View File

@ -140,6 +140,7 @@ protected:
void PopulatePostProcessingShaders();
void PopulateAAList();
void OnAAChanged(wxCommandEvent& ev);
void OnUberShaderModeChanged(wxCommandEvent& ev);
wxChoice* choice_backend;
wxChoice* choice_adapter;

View File

@ -185,10 +185,9 @@ std::vector<DXGI_SAMPLE_DESC> EnumAAModes(IDXGIAdapter* adapter)
ID3D11Device* _device;
ID3D11DeviceContext* _context;
D3D_FEATURE_LEVEL feat_level;
HRESULT hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED, supported_feature_levels,
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, &_device,
&feat_level, &_context);
HRESULT hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0,
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS,
D3D11_SDK_VERSION, &_device, &feat_level, &_context);
if (FAILED(hr) || feat_level == D3D_FEATURE_LEVEL_10_0)
{
DXGI_SAMPLE_DESC desc;
@ -221,9 +220,9 @@ std::vector<DXGI_SAMPLE_DESC> EnumAAModes(IDXGIAdapter* adapter)
D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter)
{
D3D_FEATURE_LEVEL feat_level = D3D_FEATURE_LEVEL_9_1;
PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, D3D11_CREATE_DEVICE_SINGLETHREADED,
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION,
nullptr, &feat_level, nullptr);
PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, supported_feature_levels,
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, nullptr, &feat_level,
nullptr);
return feat_level;
}
@ -311,8 +310,7 @@ HRESULT Create(HWND wnd)
// Creating debug devices can sometimes fail if the user doesn't have the correct
// version of the DirectX SDK. If it does, simply fallback to a non-debug device.
{
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED | D3D11_CREATE_DEVICE_DEBUG,
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, D3D11_CREATE_DEVICE_DEBUG,
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS,
D3D11_SDK_VERSION, &device, &featlevel, &context);
@ -339,8 +337,7 @@ HRESULT Create(HWND wnd)
if (FAILED(hr))
#endif
{
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
D3D11_CREATE_DEVICE_SINGLETHREADED, supported_feature_levels,
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, supported_feature_levels,
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, &device, &featlevel,
&context);
}

View File

@ -18,7 +18,7 @@ namespace DX11
namespace D3D
{
// bytecode->shader
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len)
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11VertexShader* v_shader;
HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, &v_shader);
@ -73,7 +73,7 @@ bool CompileVertexShader(const std::string& code, D3DBlob** blob)
}
// bytecode->shader
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len)
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11GeometryShader* g_shader;
HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, &g_shader);
@ -131,7 +131,7 @@ bool CompileGeometryShader(const std::string& code, D3DBlob** blob,
}
// bytecode->shader
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len)
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len)
{
ID3D11PixelShader* p_shader;
HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, &p_shader);

View File

@ -16,9 +16,9 @@ namespace DX11
{
namespace D3D
{
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len);
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len);
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len);
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len);
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len);
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len);
// The returned bytecode buffers should be Release()d.
bool CompileVertexShader(const std::string& code, D3DBlob** blob);

View File

@ -136,7 +136,7 @@ void StateManager::Apply()
m_current.pixelConstants[1] != m_pending.pixelConstants[1])
{
D3D::context->PSSetConstantBuffers(0, m_pending.pixelConstants[1] ? 2 : 1,
m_pending.pixelConstants);
m_pending.pixelConstants.data());
m_current.pixelConstants[0] = m_pending.pixelConstants[0];
m_current.pixelConstants[1] = m_pending.pixelConstants[1];
}

View File

@ -4,6 +4,7 @@
#pragma once
#include <array>
#include <cstddef>
#include <stack>
#include <unordered_map>
@ -269,9 +270,9 @@ private:
struct Resources
{
ID3D11ShaderResourceView* textures[8];
ID3D11SamplerState* samplers[8];
ID3D11Buffer* pixelConstants[2];
std::array<ID3D11ShaderResourceView*, 8> textures;
std::array<ID3D11SamplerState*, 8> samplers;
std::array<ID3D11Buffer*, 2> pixelConstants;
ID3D11Buffer* vertexConstants;
ID3D11Buffer* geometryConstants;
ID3D11Buffer* vertexBuffer;

View File

@ -13,6 +13,7 @@
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DShader.h"
#include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3D/FramebufferManager.h"
#include "VideoBackends/D3D/GeometryShaderCache.h"
@ -159,6 +160,9 @@ void GeometryShaderCache::Init()
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileShaders();
}
void GeometryShaderCache::LoadShaderCache()
@ -175,6 +179,9 @@ void GeometryShaderCache::Reload()
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileShaders();
}
// ONLY to be used during shutdown.
@ -203,78 +210,74 @@ void GeometryShaderCache::Shutdown()
bool GeometryShaderCache::SetShader(u32 primitive_type)
{
GeometryShaderUid uid = GetGeometryShaderUid(primitive_type);
// Check if the shader is already set
if (last_entry)
if (last_entry && uid == last_uid)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return true;
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
D3D::stateman->SetGeometryShader(last_entry->shader);
return true;
}
last_uid = uid;
// Check if the shader is a pass-through shader
if (uid.GetUidData()->IsPassthrough())
{
// Return the default pass-through shader
last_uid = uid;
last_entry = &pass_entry;
D3D::stateman->SetGeometryShader(last_entry->shader);
return true;
}
// Check if the shader is already in the cache
GSCache::iterator iter;
iter = GeometryShaders.find(uid);
auto iter = GeometryShaders.find(uid);
if (iter != GeometryShaders.end())
{
const GSCacheEntry& entry = iter->second;
last_uid = uid;
last_entry = &entry;
D3D::stateman->SetGeometryShader(last_entry->shader);
return (entry.shader != nullptr);
}
// Need to compile a new shader
if (CompileShader(uid))
return SetShader(primitive_type);
else
return false;
}
bool GeometryShaderCache::CompileShader(const GeometryShaderUid& uid)
{
D3DBlob* bytecode;
ShaderCode code =
GenerateGeometryShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3DBlob* pbytecode;
if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode))
if (!D3D::CompileGeometryShader(code.GetBuffer(), &bytecode) ||
!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
SAFE_RELEASE(bytecode);
return false;
}
// Insert the bytecode into the caches
g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
pbytecode->Release();
return success;
}
bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid& uid, const void* bytecode,
unsigned int bytecodelen)
{
ID3D11GeometryShader* shader = D3D::CreateGeometryShaderFromByteCode(bytecode, bytecodelen);
if (shader == nullptr)
return false;
// TODO: Somehow make the debug name a bit more specific
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of GeometryShaderCache");
// Make an entry in the table
GSCacheEntry newentry;
newentry.shader = shader;
GeometryShaders[uid] = newentry;
last_entry = &GeometryShaders[uid];
if (!shader)
return false;
g_gs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
return true;
}
bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode,
size_t len)
{
GSCacheEntry& newentry = GeometryShaders[uid];
newentry.shader = bytecode ? D3D::CreateGeometryShaderFromByteCode(bytecode, len) : nullptr;
return newentry.shader != nullptr;
}
void GeometryShaderCache::PrecompileShaders()
{
EnumerateGeometryShaderUids([](const GeometryShaderUid& uid) {
if (GeometryShaders.find(uid) != GeometryShaders.end())
return;
CompileShader(uid);
});
}
} // DX11

View File

@ -18,14 +18,14 @@ public:
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader(u32 primitive_type); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const GeometryShaderUid& uid, const void* bytecode,
unsigned int bytecodelen);
static bool SetShader(u32 primitive_type);
static bool CompileShader(const GeometryShaderUid& uid);
static bool InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode, size_t len);
static void PrecompileShaders();
static ID3D11GeometryShader* GetClearGeometryShader();
static ID3D11GeometryShader* GetCopyGeometryShader();
static ID3D11GeometryShader* GetActiveShader() { return last_entry->shader; }
static ID3D11Buffer*& GetConstantBuffer();
private:

View File

@ -13,20 +13,6 @@
namespace DX11
{
class D3DVertexFormat : public NativeVertexFormat
{
public:
D3DVertexFormat(const PortableVertexDeclaration& vtx_decl);
~D3DVertexFormat() { SAFE_RELEASE(m_layout); }
void SetupVertexPointers() override;
private:
std::array<D3D11_INPUT_ELEMENT_DESC, 32> m_elems{};
UINT m_num_elems = 0;
ID3D11InputLayout* m_layout = nullptr;
};
std::unique_ptr<NativeVertexFormat>
VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
{
@ -66,7 +52,6 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl)
this->vtx_decl = _vtx_decl;
const AttributeFormat* format = &_vtx_decl.position;
if (format->enable)
{
m_elems[m_num_elems].SemanticName = "POSITION";
@ -129,15 +114,18 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl)
}
}
void D3DVertexFormat::SetupVertexPointers()
D3DVertexFormat::~D3DVertexFormat()
{
SAFE_RELEASE(m_layout);
}
void D3DVertexFormat::SetInputLayout(D3DBlob* vs_bytecode)
{
if (!m_layout)
{
// CreateInputLayout requires a shader input, but it only looks at the
// signature of the shader, so we don't need to recompute it if the shader
// changes.
D3DBlob* vs_bytecode = DX11::VertexShaderCache::GetActiveShaderBytecode();
HRESULT hr = DX11::D3D::device->CreateInputLayout(
m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &m_layout);
if (FAILED(hr))

View File

@ -8,12 +8,15 @@
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "Core/ConfigManager.h"
#include "Core/Host.h"
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DShader.h"
#include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3D/PixelShaderCache.h"
#include "VideoCommon/Debugger.h"
@ -25,10 +28,15 @@
namespace DX11
{
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
PixelShaderCache::UberPSCache PixelShaderCache::UberPixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_uber_entry;
PixelShaderUid PixelShaderCache::last_uid;
UberShader::PixelShaderUid PixelShaderCache::last_uber_uid;
LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
LinearDiskCache<UberShader::PixelShaderUid, u8> g_uber_ps_disk_cache;
extern std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;
ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
@ -429,10 +437,8 @@ ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram()
return s_DepthResolveProgram;
}
ID3D11Buffer*& PixelShaderCache::GetConstantBuffer()
static void UpdateConstantBuffers()
{
// TODO: divide the global variables of the generated shaders into about 5 constant buffers to
// speed this up
if (PixelShaderManager::dirty)
{
D3D11_MAPPED_SUBRESOURCE map;
@ -443,14 +449,20 @@ ID3D11Buffer*& PixelShaderCache::GetConstantBuffer()
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
}
}
ID3D11Buffer* PixelShaderCache::GetConstantBuffer()
{
UpdateConstantBuffers();
return pscbuf;
}
// this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
template <typename UidType>
class PixelShaderCacheInserter : public LinearDiskCacheReader<UidType, u8>
{
public:
void Read(const PixelShaderUid& key, const u8* value, u32 value_size)
void Read(const UidType& key, const u8* value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size);
}
@ -499,22 +511,34 @@ void PixelShaderCache::Init()
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}
void PixelShaderCache::LoadShaderCache()
{
PixelShaderCacheInserter inserter;
PixelShaderCacheInserter<PixelShaderUid> inserter;
g_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "PS", true, true), inserter);
PixelShaderCacheInserter<UberShader::PixelShaderUid> uber_inserter;
g_uber_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberPS", false, true),
uber_inserter);
}
void PixelShaderCache::Reload()
{
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
g_uber_ps_disk_cache.Sync();
g_uber_ps_disk_cache.Close();
Clear();
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}
// ONLY to be used during shutdown.
@ -522,10 +546,15 @@ void PixelShaderCache::Clear()
{
for (auto& iter : PixelShaders)
iter.second.Destroy();
for (auto& iter : UberPixelShaders)
iter.second.Destroy();
PixelShaders.clear();
UberPixelShaders.clear();
last_entry = nullptr;
last_uber_entry = nullptr;
last_uid = {};
last_uber_uid = {};
}
// Used in Swap() when AA mode has changed
@ -558,82 +587,249 @@ void PixelShaderCache::Shutdown()
Clear();
g_ps_disk_cache.Sync();
g_ps_disk_cache.Close();
g_uber_ps_disk_cache.Sync();
g_uber_ps_disk_cache.Close();
}
bool PixelShaderCache::SetShader()
{
PixelShaderUid uid = GetPixelShaderUid();
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders)
return SetUberShader();
// Check if the shader is already set
if (last_entry)
PixelShaderUid uid = GetPixelShaderUid();
if (last_entry && uid == last_uid)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return (last_entry->shader != nullptr);
}
if (last_entry->pending)
return SetUberShader();
if (!last_entry->shader)
return false;
D3D::stateman->SetPixelShader(last_entry->shader);
return true;
}
last_uid = uid;
// Check if the shader is already in the cache
PSCache::iterator iter;
iter = PixelShaders.find(uid);
auto iter = PixelShaders.find(uid);
if (iter != PixelShaders.end())
{
const PSCacheEntry& entry = iter->second;
if (entry.pending)
return SetUberShader();
last_uid = uid;
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return (entry.shader != nullptr);
if (!last_entry->shader)
return false;
D3D::stateman->SetPixelShader(last_entry->shader);
return true;
}
// Background compiling?
if (g_ActiveConfig.CanBackgroundCompileShaders())
{
// Create a pending entry
PSCacheEntry entry;
entry.pending = true;
PixelShaders[uid] = entry;
// Queue normal shader compiling and use ubershader
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<PixelShaderCompilerWorkItem>(uid));
return SetUberShader();
}
// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
SAFE_RELEASE(bytecode);
return false;
}
// Insert the bytecode into the caches
g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
pbytecode->Release();
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success;
g_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
return SetShader();
}
bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const void* bytecode,
unsigned int bytecodelen)
bool PixelShaderCache::SetUberShader()
{
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
if (shader == nullptr)
return false;
UberShader::PixelShaderUid uid = UberShader::GetPixelShaderUid();
// TODO: Somehow make the debug name a bit more specific
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");
// Make an entry in the table
PSCacheEntry newentry;
newentry.shader = shader;
PixelShaders[uid] = newentry;
last_entry = &PixelShaders[uid];
if (!shader)
if (last_uber_entry && last_uber_uid == uid)
{
// INCSTAT(stats.numPixelShadersFailed);
if (!last_uber_entry->shader)
return false;
D3D::stateman->SetPixelShader(last_uber_entry->shader);
return true;
}
auto iter = UberPixelShaders.find(uid);
if (iter != UberPixelShaders.end())
{
const PSCacheEntry& entry = iter->second;
last_uber_uid = uid;
last_uber_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
if (!last_uber_entry->shader)
return false;
D3D::stateman->SetPixelShader(last_uber_entry->shader);
return true;
}
D3DBlob* bytecode = nullptr;
ShaderCode code =
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
{
SAFE_RELEASE(bytecode);
return false;
}
// Lookup map again.
g_uber_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetUberShader();
}
bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len)
{
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
if (!InsertShader(uid, shader))
{
SAFE_RELEASE(shader);
return false;
}
INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
return true;
}
bool PixelShaderCache::InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data,
size_t len)
{
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
if (!InsertShader(uid, shader))
{
SAFE_RELEASE(shader);
return false;
}
return true;
}
bool PixelShaderCache::InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader)
{
auto iter = PixelShaders.find(uid);
if (iter != PixelShaders.end() && !iter->second.pending)
return false;
PSCacheEntry& newentry = PixelShaders[uid];
newentry.pending = false;
newentry.shader = shader;
INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
return (shader != nullptr);
}
bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid,
ID3D11PixelShader* shader)
{
auto iter = UberPixelShaders.find(uid);
if (iter != UberPixelShaders.end() && !iter->second.pending)
return false;
PSCacheEntry& newentry = UberPixelShaders[uid];
newentry.pending = false;
newentry.shader = shader;
return (shader != nullptr);
}
void PixelShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) {
if (UberPixelShaders.find(uid) != UberPixelShaders.end())
return;
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<UberPixelShaderCompilerWorkItem>(uid));
});
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
g_async_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);
}
PixelShaderCache::PixelShaderCompilerWorkItem::PixelShaderCompilerWorkItem(
const PixelShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}
PixelShaderCache::PixelShaderCompilerWorkItem::~PixelShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
}
bool PixelShaderCache::PixelShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);
return true;
}
void PixelShaderCache::PixelShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_shader))
g_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
else
SAFE_RELEASE(m_shader);
}
PixelShaderCache::UberPixelShaderCompilerWorkItem::UberPixelShaderCompilerWorkItem(
const UberShader::PixelShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}
PixelShaderCache::UberPixelShaderCompilerWorkItem::~UberPixelShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
}
bool PixelShaderCache::UberPixelShaderCompilerWorkItem::Compile()
{
ShaderCode code =
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);
return true;
}
void PixelShaderCache::UberPixelShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_shader))
g_uber_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
else
SAFE_RELEASE(m_shader);
}
} // DX11

View File

@ -7,10 +7,14 @@
#include <d3d11.h>
#include <map>
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/UberShaderPixel.h"
namespace DX11
{
class D3DBlob;
class PixelShaderCache
{
public:
@ -18,12 +22,15 @@ public:
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader(); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const PixelShaderUid& uid, const void* bytecode,
unsigned int bytecodelen);
static bool SetShader();
static bool SetUberShader();
static bool InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len);
static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len);
static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader);
static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader);
static void QueueUberShaderCompiles();
static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
static ID3D11Buffer*& GetConstantBuffer();
static ID3D11Buffer* GetConstantBuffer();
static ID3D11PixelShader* GetColorMatrixProgram(bool multisampled);
static ID3D11PixelShader* GetColorCopyProgram(bool multisampled);
@ -40,18 +47,53 @@ private:
struct PSCacheEntry
{
ID3D11PixelShader* shader;
bool pending;
PSCacheEntry() : shader(nullptr) {}
PSCacheEntry() : shader(nullptr), pending(false) {}
void Destroy() { SAFE_RELEASE(shader); }
};
class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
PixelShaderCompilerWorkItem(const PixelShaderUid& uid);
~PixelShaderCompilerWorkItem() override;
bool Compile() override;
void Retrieve() override;
private:
PixelShaderUid m_uid;
ID3D11PixelShader* m_shader = nullptr;
D3DBlob* m_bytecode = nullptr;
};
class UberPixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
UberPixelShaderCompilerWorkItem(const UberShader::PixelShaderUid& uid);
~UberPixelShaderCompilerWorkItem() override;
bool Compile() override;
void Retrieve() override;
private:
UberShader::PixelShaderUid m_uid;
ID3D11PixelShader* m_shader = nullptr;
D3DBlob* m_bytecode = nullptr;
};
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
typedef std::map<UberShader::PixelShaderUid, PSCacheEntry> UberPSCache;
static void LoadShaderCache();
static PSCache PixelShaders;
static UberPSCache UberPixelShaders;
static const PSCacheEntry* last_entry;
static const PSCacheEntry* last_uber_entry;
static PixelShaderUid last_uid;
static UberShader::PixelShaderUid last_uber_uid;
};
} // namespace DX11

View File

@ -837,6 +837,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
// Enable configuration changes
UpdateActiveConfig();
g_texture_cache->OnConfigChanged(g_ActiveConfig);
VertexShaderCache::RetreiveAsyncShaders();
SetWindowSize(fbStride, fbHeight);
@ -958,10 +959,6 @@ void Renderer::ApplyState()
g_ActiveConfig.bEnablePixelLighting ? vertexConstants : nullptr);
D3D::stateman->SetVertexConstants(vertexConstants);
D3D::stateman->SetGeometryConstants(GeometryShaderCache::GetConstantBuffer());
D3D::stateman->SetPixelShader(PixelShaderCache::GetActiveShader());
D3D::stateman->SetVertexShader(VertexShaderCache::GetActiveShader());
D3D::stateman->SetGeometryShader(GeometryShaderCache::GetActiveShader());
}
void Renderer::RestoreState()

View File

@ -159,7 +159,9 @@ void VertexManager::vFlush()
return;
}
if (!VertexShaderCache::SetShader())
D3DVertexFormat* vertex_format =
static_cast<D3DVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
if (!VertexShaderCache::SetShader(vertex_format))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR, true, { printf("Fail to set pixel shader\n"); });
return;
@ -182,7 +184,6 @@ void VertexManager::vFlush()
PrepareDrawBuffers(stride);
VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers();
g_renderer->ApplyState();
Draw(stride);

View File

@ -4,13 +4,30 @@
#pragma once
#include <d3d11.h>
#include <memory>
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VertexManagerBase.h"
struct ID3D11Buffer;
namespace DX11
{
class D3DBlob;
class D3DVertexFormat : public NativeVertexFormat
{
public:
D3DVertexFormat(const PortableVertexDeclaration& vtx_decl);
~D3DVertexFormat();
void SetInputLayout(D3DBlob* vs_bytecode);
private:
std::array<D3D11_INPUT_ELEMENT_DESC, 32> m_elems{};
UINT m_num_elems = 0;
ID3D11InputLayout* m_layout = nullptr;
};
class VertexManager : public VertexManagerBase
{
public:

View File

@ -8,23 +8,32 @@
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/LinearDiskCache.h"
#include "Common/MsgHandler.h"
#include "Common/StringUtil.h"
#include "Core/ConfigManager.h"
#include "Core/Host.h"
#include "VideoBackends/D3D/D3DShader.h"
#include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3D/VertexManager.h"
#include "VideoBackends/D3D/VertexShaderCache.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"
#include "VideoCommon/VertexShaderManager.h"
namespace DX11
{
VertexShaderCache::VSCache VertexShaderCache::vshaders;
VertexShaderCache::UberVSCache VertexShaderCache::ubervshaders;
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry;
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_uber_entry;
VertexShaderUid VertexShaderCache::last_uid;
UberShader::VertexShaderUid VertexShaderCache::last_uber_uid;
static ID3D11VertexShader* SimpleVertexShader = nullptr;
static ID3D11VertexShader* ClearVertexShader = nullptr;
@ -32,6 +41,8 @@ static ID3D11InputLayout* SimpleLayout = nullptr;
static ID3D11InputLayout* ClearLayout = nullptr;
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
LinearDiskCache<UberShader::VertexShaderUid, u8> g_uber_vs_disk_cache;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;
ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader()
{
@ -70,10 +81,11 @@ ID3D11Buffer*& VertexShaderCache::GetConstantBuffer()
}
// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
template <typename UidType>
class VertexShaderCacheInserter : public LinearDiskCacheReader<UidType, u8>
{
public:
void Read(const VertexShaderUid& key, const u8* value, u32 value_size)
void Read(const UidType& key, const u8* value, u32 value_size)
{
D3DBlob* blob = new D3DBlob(value_size, value);
VertexShaderCache::InsertByteCode(key, blob);
@ -160,36 +172,66 @@ void VertexShaderCache::Init()
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
g_async_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
g_ActiveConfig.GetShaderPrecompilerThreads() :
g_ActiveConfig.GetShaderCompilerThreads());
if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}
void VertexShaderCache::LoadShaderCache()
{
VertexShaderCacheInserter inserter;
VertexShaderCacheInserter<VertexShaderUid> inserter;
g_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "VS", true, true), inserter);
VertexShaderCacheInserter<UberShader::VertexShaderUid> uber_inserter;
g_uber_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberVS", false, true),
uber_inserter);
}
void VertexShaderCache::Reload()
{
g_async_compiler->WaitUntilCompletion();
g_async_compiler->RetrieveWorkItems();
g_vs_disk_cache.Sync();
g_vs_disk_cache.Close();
g_uber_vs_disk_cache.Sync();
g_uber_vs_disk_cache.Close();
Clear();
if (g_ActiveConfig.bShaderCache)
LoadShaderCache();
if (g_ActiveConfig.CanPrecompileUberShaders())
QueueUberShaderCompiles();
}
void VertexShaderCache::Clear()
{
for (auto& iter : vshaders)
iter.second.Destroy();
for (auto& iter : ubervshaders)
iter.second.Destroy();
vshaders.clear();
ubervshaders.clear();
last_entry = nullptr;
last_uid = {};
last_uber_uid = {};
last_entry = nullptr;
last_uber_entry = nullptr;
last_uid = {};
last_uber_uid = {};
}
void VertexShaderCache::Shutdown()
{
g_async_compiler->StopWorkerThreads();
g_async_compiler->RetrieveWorkItems();
SAFE_RELEASE(vscbuf);
SAFE_RELEASE(SimpleVertexShader);
@ -201,74 +243,267 @@ void VertexShaderCache::Shutdown()
Clear();
g_vs_disk_cache.Sync();
g_vs_disk_cache.Close();
g_uber_vs_disk_cache.Sync();
g_uber_vs_disk_cache.Close();
}
bool VertexShaderCache::SetShader()
bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format)
{
VertexShaderUid uid = GetVertexShaderUid();
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders)
return SetUberShader(vertex_format);
if (last_entry)
VertexShaderUid uid = GetVertexShaderUid();
if (last_entry && uid == last_uid)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return (last_entry->shader != nullptr);
}
if (last_entry->pending)
return SetUberShader(vertex_format);
if (!last_entry->shader)
return false;
vertex_format->SetInputLayout(last_entry->bytecode);
D3D::stateman->SetVertexShader(last_entry->shader);
return true;
}
last_uid = uid;
VSCache::iterator iter = vshaders.find(uid);
auto iter = vshaders.find(uid);
if (iter != vshaders.end())
{
const VSCacheEntry& entry = iter->second;
if (entry.pending)
return SetUberShader(vertex_format);
last_uid = uid;
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return (entry.shader != nullptr);
if (!last_entry->shader)
return false;
vertex_format->SetInputLayout(last_entry->bytecode);
D3D::stateman->SetVertexShader(last_entry->shader);
return true;
}
// Background compiling?
if (g_ActiveConfig.CanBackgroundCompileShaders())
{
// Create a pending entry
VSCacheEntry entry;
entry.pending = true;
vshaders[uid] = entry;
// Queue normal shader compiling and use ubershader
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid));
return SetUberShader(vertex_format);
}
// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3DBlob* pbytecode = nullptr;
D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);
if (pbytecode == nullptr)
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
SAFE_RELEASE(bytecode);
return false;
}
g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
bool success = InsertByteCode(uid, pbytecode);
pbytecode->Release();
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success;
g_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetShader(vertex_format);
}
bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob)
bool VertexShaderCache::SetUberShader(D3DVertexFormat* vertex_format)
{
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (shader == nullptr)
D3DVertexFormat* uber_vertex_format = static_cast<D3DVertexFormat*>(
VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration()));
UberShader::VertexShaderUid uid = UberShader::GetVertexShaderUid();
if (last_uber_entry && last_uber_uid == uid)
{
if (!last_uber_entry->shader)
return false;
uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
D3D::stateman->SetVertexShader(last_uber_entry->shader);
return true;
}
auto iter = ubervshaders.find(uid);
if (iter != ubervshaders.end())
{
const VSCacheEntry& entry = iter->second;
last_uber_uid = uid;
last_uber_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
if (!last_uber_entry->shader)
return false;
uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
D3D::stateman->SetVertexShader(last_uber_entry->shader);
return true;
}
// Need to compile a new shader
D3DBlob* bytecode = nullptr;
ShaderCode code =
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
if (!InsertByteCode(uid, bytecode))
{
SAFE_RELEASE(bytecode);
return false;
}
g_uber_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
bytecode->Release();
return SetUberShader(vertex_format);
}
bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob)
{
ID3D11VertexShader* shader =
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
bool result = InsertShader(uid, shader, blob);
SAFE_RELEASE(shader);
return result;
}
bool VertexShaderCache::InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob)
{
ID3D11VertexShader* shader =
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
bool result = InsertShader(uid, shader, blob);
SAFE_RELEASE(shader);
return result;
}
bool VertexShaderCache::InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader,
D3DBlob* blob)
{
auto iter = vshaders.find(uid);
if (iter != vshaders.end() && !iter->second.pending)
return false;
// TODO: Somehow make the debug name a bit more specific
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a vertex shader of VertexShaderCache");
VSCacheEntry& newentry = vshaders[uid];
newentry.pending = false;
if (!shader || !blob)
return false;
// Make an entry in the table
VSCacheEntry entry;
entry.shader = shader;
entry.SetByteCode(bcodeblob);
shader->AddRef();
newentry.SetByteCode(blob);
newentry.shader = shader;
vshaders[uid] = entry;
last_entry = &vshaders[uid];
INCSTAT(stats.numPixelShadersCreated);
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(vshaders.size()));
return true;
}
INCSTAT(stats.numVertexShadersCreated);
SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());
bool VertexShaderCache::InsertShader(const UberShader::VertexShaderUid& uid,
ID3D11VertexShader* shader, D3DBlob* blob)
{
auto iter = ubervshaders.find(uid);
if (iter != ubervshaders.end() && !iter->second.pending)
return false;
VSCacheEntry& newentry = ubervshaders[uid];
newentry.pending = false;
if (!shader || !blob)
return false;
shader->AddRef();
newentry.SetByteCode(blob);
newentry.shader = shader;
return true;
}
void VertexShaderCache::RetreiveAsyncShaders()
{
g_async_compiler->RetrieveWorkItems();
}
void VertexShaderCache::QueueUberShaderCompiles()
{
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) {
if (ubervshaders.find(uid) != ubervshaders.end())
return;
g_async_compiler->QueueWorkItem(
g_async_compiler->CreateWorkItem<UberVertexShaderCompilerWorkItem>(uid));
});
}
void VertexShaderCache::WaitForBackgroundCompilesToComplete()
{
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
g_async_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);
// Switch from precompile -> runtime compiler threads.
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
}
VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem(
const VertexShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}
VertexShaderCache::VertexShaderCompilerWorkItem::~VertexShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
SAFE_RELEASE(m_vs);
}
bool VertexShaderCache::VertexShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);
return true;
}
void VertexShaderCache::VertexShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_vs, m_bytecode))
g_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
}
VertexShaderCache::UberVertexShaderCompilerWorkItem::UberVertexShaderCompilerWorkItem(
const UberShader::VertexShaderUid& uid)
{
std::memcpy(&m_uid, &uid, sizeof(uid));
}
VertexShaderCache::UberVertexShaderCompilerWorkItem::~UberVertexShaderCompilerWorkItem()
{
SAFE_RELEASE(m_bytecode);
SAFE_RELEASE(m_vs);
}
bool VertexShaderCache::UberVertexShaderCompilerWorkItem::Compile()
{
ShaderCode code =
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);
return true;
}
void VertexShaderCache::UberVertexShaderCompilerWorkItem::Retrieve()
{
if (InsertShader(m_uid, m_vs, m_bytecode))
g_uber_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
}
} // namespace DX11

View File

@ -9,10 +9,14 @@
#include "VideoBackends/D3D/D3DBase.h"
#include "VideoBackends/D3D/D3DBlob.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"
namespace DX11
{
class D3DVertexFormat;
class VertexShaderCache
{
public:
@ -20,10 +24,12 @@ public:
static void Reload();
static void Clear();
static void Shutdown();
static bool SetShader(); // TODO: Should be renamed to LoadShader
static bool SetShader(D3DVertexFormat* vertex_format);
static bool SetUberShader(D3DVertexFormat* vertex_format);
static void RetreiveAsyncShaders();
static void QueueUberShaderCompiles();
static void WaitForBackgroundCompilesToComplete();
static ID3D11VertexShader* GetActiveShader() { return last_entry->shader; }
static D3DBlob* GetActiveShaderBytecode() { return last_entry->bytecode; }
static ID3D11Buffer*& GetConstantBuffer();
static ID3D11VertexShader* GetSimpleVertexShader();
@ -31,15 +37,20 @@ public:
static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout();
static bool InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob);
static bool InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob);
static bool InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob);
static bool InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader, D3DBlob* blob);
static bool InsertShader(const UberShader::VertexShaderUid& uid, ID3D11VertexShader* shader,
D3DBlob* blob);
private:
struct VSCacheEntry
{
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout
bool pending;
VSCacheEntry() : shader(nullptr), bytecode(nullptr) {}
VSCacheEntry() : shader(nullptr), bytecode(nullptr), pending(false) {}
void SetByteCode(D3DBlob* blob)
{
SAFE_RELEASE(bytecode);
@ -52,13 +63,49 @@ private:
SAFE_RELEASE(bytecode);
}
};
class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
VertexShaderCompilerWorkItem(const VertexShaderUid& uid);
~VertexShaderCompilerWorkItem() override;
bool Compile() override;
void Retrieve() override;
private:
VertexShaderUid m_uid;
D3DBlob* m_bytecode = nullptr;
ID3D11VertexShader* m_vs = nullptr;
};
class UberVertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
UberVertexShaderCompilerWorkItem(const UberShader::VertexShaderUid& uid);
~UberVertexShaderCompilerWorkItem() override;
bool Compile() override;
void Retrieve() override;
private:
UberShader::VertexShaderUid m_uid;
D3DBlob* m_bytecode = nullptr;
ID3D11VertexShader* m_vs = nullptr;
};
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
typedef std::map<UberShader::VertexShaderUid, VSCacheEntry> UberVSCache;
static void LoadShaderCache();
static void SetInputLayout();
static VSCache vshaders;
static UberVSCache ubervshaders;
static const VSCacheEntry* last_entry;
static const VSCacheEntry* last_uber_entry;
static VertexShaderUid last_uid;
static UberShader::VertexShaderUid last_uber_uid;
};
} // namespace DX11

View File

@ -78,6 +78,8 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
g_Config.backend_info.bSupportsST3CTextures = false;
g_Config.backend_info.bSupportsBitfield = false;
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;
@ -159,6 +161,7 @@ void VideoBackend::Video_Prepare()
VertexShaderCache::Init();
PixelShaderCache::Init();
GeometryShaderCache::Init();
VertexShaderCache::WaitForBackgroundCompilesToComplete();
D3D::InitUtils();
BBox::Init();
}

View File

@ -16,7 +16,6 @@ class NullNativeVertexFormat : public NativeVertexFormat
{
public:
NullNativeVertexFormat() {}
void SetupVertexPointers() override {}
};
std::unique_ptr<NativeVertexFormat>

View File

@ -57,6 +57,7 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl)
glGenVertexArrays(1, &VAO);
glBindVertexArray(VAO);
ProgramShaderCache::BindVertexFormat(this);
// the element buffer is bound directly to the vao, so we must it set for every vao
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->m_index_buffers);
@ -74,16 +75,10 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl)
SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, _vtx_decl.texcoords[i]);
SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, _vtx_decl.posmtx);
vm->m_last_vao = VAO;
}
GLVertexFormat::~GLVertexFormat()
{
glDeleteVertexArrays(1, &VAO);
}
void GLVertexFormat::SetupVertexPointers()
{
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,17 +4,25 @@
#pragma once
#include <memory>
#include <tuple>
#include "Common/GL/GLUtil.h"
#include "Common/LinearDiskCache.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"
class cInterfaceBase;
namespace OGL
{
class GLVertexFormat;
class SHADERUID
{
public:
@ -24,30 +32,53 @@ public:
bool operator<(const SHADERUID& r) const
{
return std::tie(puid, vuid, guid) < std::tie(r.puid, r.vuid, r.guid);
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
}
bool operator==(const SHADERUID& r) const
{
return std::tie(puid, vuid, guid) == std::tie(r.puid, r.vuid, r.guid);
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
}
};
class UBERSHADERUID
{
public:
UberShader::VertexShaderUid vuid;
UberShader::PixelShaderUid puid;
GeometryShaderUid guid;
bool operator<(const UBERSHADERUID& r) const
{
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
}
bool operator==(const UBERSHADERUID& r) const
{
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
}
};
struct SHADER
{
SHADER() : glprogid(0) {}
void Destroy()
{
glDeleteProgram(glprogid);
glprogid = 0;
DestroyShaders();
if (glprogid)
{
glDeleteProgram(glprogid);
glprogid = 0;
}
}
GLuint glprogid; // OpenGL program id
std::string strvprog, strpprog, strgprog;
GLuint vsid = 0;
GLuint gsid = 0;
GLuint psid = 0;
GLuint glprogid = 0;
void SetProgramVariables();
void SetProgramBindings(bool is_compute);
void Bind() const;
void DestroyShaders();
};
class ProgramShaderCache
@ -57,43 +88,126 @@ public:
{
SHADER shader;
bool in_cache;
bool pending;
void Destroy() { shader.Destroy(); }
};
static PCacheEntry GetShaderProgram();
static SHADER* SetShader(u32 primitive_type);
static void GetShaderId(SHADERUID* uid, u32 primitive_type);
static SHADER* SetShader(u32 primitive_type, const GLVertexFormat* vertex_format);
static SHADER* SetUberShader(u32 primitive_type, const GLVertexFormat* vertex_format);
static void BindVertexFormat(const GLVertexFormat* vertex_format);
static void InvalidateVertexFormat();
static void BindLastVertexFormat();
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
const std::string& gcode = "");
static bool CompileComputeShader(SHADER& shader, const std::string& code);
static GLuint CompileSingleShader(GLuint type, const std::string& code);
static GLuint CompileSingleShader(GLenum type, const std::string& code);
static bool CheckShaderCompileResult(GLuint id, GLenum type, const std::string& code);
static bool CheckProgramLinkResult(GLuint id, const std::string& vcode, const std::string& pcode,
const std::string& gcode);
static void UploadConstants();
static void Init();
static void Reload();
static void Shutdown();
static void CreateHeader();
static void RetrieveAsyncShaders();
static void PrecompileUberShaders();
private:
class ProgramShaderCacheInserter : public LinearDiskCacheReader<SHADERUID, u8>
template <typename UIDType>
class ProgramShaderCacheInserter : public LinearDiskCacheReader<UIDType, u8>
{
public:
void Read(const SHADERUID& key, const u8* value, u32 value_size) override;
ProgramShaderCacheInserter(std::map<UIDType, PCacheEntry>& shader_map)
: m_shader_map(shader_map)
{
}
void Read(const UIDType& key, const u8* value, u32 value_size) override
{
if (m_shader_map.find(key) != m_shader_map.end())
return;
PCacheEntry& entry = m_shader_map[key];
if (!CreateCacheEntryFromBinary(&entry, value, value_size))
{
m_shader_map.erase(key);
return;
}
}
private:
std::map<UIDType, PCacheEntry>& m_shader_map;
};
class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
{
protected:
virtual bool WorkerThreadInitMainThread(void** param) override;
virtual bool WorkerThreadInitWorkerThread(void* param) override;
virtual void WorkerThreadExit(void* param) override;
};
struct SharedContextData
{
std::unique_ptr<cInterfaceBase> context;
GLuint prerender_VBO;
GLuint prerender_VAO;
GLuint prerender_IBO;
};
class ShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
ShaderCompileWorkItem(const SHADERUID& uid);
bool Compile() override;
void Retrieve() override;
private:
SHADERUID m_uid;
SHADER m_program;
};
class UberShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
UberShaderCompileWorkItem(const UBERSHADERUID& uid);
bool Compile() override;
void Retrieve() override;
private:
UBERSHADERUID m_uid;
SHADER m_program;
};
typedef std::map<SHADERUID, PCacheEntry> PCache;
typedef std::map<UBERSHADERUID, PCacheEntry> UberPCache;
static GLuint CreateProgramFromBinary(const u8* value, u32 value_size);
static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size);
static void LoadProgramBinaries();
static void SaveProgramBinaries();
static void DestroyShaders();
static void CreatePrerenderArrays(SharedContextData* data);
static void DestroyPrerenderArrays(SharedContextData* data);
static void DrawPrerenderArray(const SHADER& shader, u32 primitive_type);
typedef std::map<SHADERUID, PCacheEntry> PCache;
static PCache pshaders;
static UberPCache ubershaders;
static PCacheEntry* last_entry;
static PCacheEntry* last_uber_entry;
static SHADERUID last_uid;
static UBERSHADERUID last_uber_uid;
static std::unique_ptr<SharedContextAsyncShaderCompiler> s_async_compiler;
static u32 s_ubo_buffer_size;
static s32 s_ubo_align;
static u32 s_last_VAO;
};
} // namespace OGL

View File

@ -119,11 +119,11 @@ static const u8 rasters[CHARACTER_COUNT][CHARACTER_HEIGHT] = {
static const char* s_vertexShaderSrc = "uniform vec2 charSize;\n"
"uniform vec2 offset;"
"in vec2 rawpos;\n"
"in vec2 tex0;\n"
"in vec2 rawtex0;\n"
"out vec2 uv0;\n"
"void main(void) {\n"
" gl_Position = vec4(rawpos + offset,0,1);\n"
" uv0 = tex0 * charSize;\n"
" uv0 = rawtex0 * charSize;\n"
"}\n";
static const char* s_fragmentShaderSrc = "SAMPLER_BINDING(8) uniform sampler2D samp8;\n"

View File

@ -447,6 +447,12 @@ Renderer::Renderer()
// Clip distance support is useless without a method to clamp the depth range
g_Config.backend_info.bSupportsDepthClamp = GLExtensions::Supports("GL_ARB_depth_clamp");
// Desktop OpenGL supports bitfield manulipation and dynamic sampler indexing if it supports
// shader5. OpenGL ES 3.1 supports it implicitly without an extension
g_Config.backend_info.bSupportsBitfield = GLExtensions::Supports("GL_ARB_gpu_shader5");
g_Config.backend_info.bSupportsDynamicSamplerIndexing =
GLExtensions::Supports("GL_ARB_gpu_shader5");
g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary");
g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory");
g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync");
@ -515,6 +521,8 @@ Renderer::Renderer()
g_ogl_config.bSupportsMSAA = true;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports2DTextureStorageMultisample = true;
g_Config.backend_info.bSupportsBitfield = true;
g_Config.backend_info.bSupportsDynamicSamplerIndexing = g_ogl_config.bSupportsAEP;
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
!g_ogl_config.bSupports3DTextureStorageMultisample)
{
@ -542,6 +550,8 @@ Renderer::Renderer()
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports2DTextureStorageMultisample = true;
g_ogl_config.bSupports3DTextureStorageMultisample = true;
g_Config.backend_info.bSupportsBitfield = true;
g_Config.backend_info.bSupportsDynamicSamplerIndexing = true;
}
}
else
@ -1462,6 +1472,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
// Clean out old stuff from caches. It's not worth it to clean out the shader caches.
g_texture_cache->Cleanup(frameCount);
ProgramShaderCache::RetrieveAsyncShaders();
// Render to the framebuffer.
FramebufferManager::SetFramebuffer(0);
@ -1758,10 +1769,9 @@ void Renderer::RestoreAPIState()
SetBlendMode(true);
SetViewport();
ProgramShaderCache::BindLastVertexFormat();
const VertexManager* const vm = static_cast<VertexManager*>(g_vertex_manager.get());
glBindBuffer(GL_ARRAY_BUFFER, vm->m_vertex_buffers);
if (vm->m_last_vao)
glBindVertexArray(vm->m_last_vao);
OGLTexture::SetStage();
}

View File

@ -58,6 +58,7 @@ struct VideoConfig
bool bSupportsConservativeDepth;
bool bSupportsImageLoadStore;
bool bSupportsAniso;
bool bSupportsBitfield;
const char* gl_vendor;
const char* gl_renderer;

View File

@ -53,8 +53,6 @@ void VertexManager::CreateDeviceObjects()
s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
m_index_buffers = s_indexBuffer->m_buffer;
m_last_vao = 0;
}
void VertexManager::DestroyDeviceObjects()
@ -142,22 +140,13 @@ void VertexManager::vFlush()
GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
u32 stride = nativeVertexFmt->GetVertexStride();
if (m_last_vao != nativeVertexFmt->VAO)
{
glBindVertexArray(nativeVertexFmt->VAO);
m_last_vao = nativeVertexFmt->VAO;
}
ProgramShaderCache::SetShader(m_current_primitive_type, nativeVertexFmt);
PrepareDrawBuffers(stride);
ProgramShaderCache::SetShader(m_current_primitive_type);
// upload global constants
ProgramShaderCache::UploadConstants();
// setup the pointers
nativeVertexFmt->SetupVertexPointers();
if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
{
glEnable(GL_STENCIL_TEST);
@ -171,24 +160,6 @@ void VertexManager::vFlush()
glDisable(GL_STENCIL_TEST);
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
{
// save the shaders
ProgramShaderCache::PCacheEntry prog = ProgramShaderCache::GetShaderProgram();
std::string filename = StringFromFormat(
"%sps%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId);
std::ofstream fps;
File::OpenFStream(fps, filename, std::ios_base::out);
fps << prog.shader.strpprog;
filename = StringFromFormat("%svs%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(),
g_ActiveConfig.iSaveTargetId);
std::ofstream fvs;
File::OpenFStream(fvs, filename, std::ios_base::out);
fvs << prog.shader.strvprog;
}
#endif
g_Config.iSaveTargetId++;
ClearEFBCache();
}

View File

@ -20,8 +20,6 @@ public:
GLVertexFormat(const PortableVertexDeclaration& vtx_decl);
~GLVertexFormat();
void SetupVertexPointers() override;
GLuint VAO;
};
@ -42,7 +40,6 @@ public:
// NativeVertexFormat use this
GLuint m_vertex_buffers;
GLuint m_index_buffers;
GLuint m_last_vao;
protected:
void ResetBuffer(u32 stride) override;

View File

@ -30,7 +30,6 @@ class NullNativeVertexFormat : public NativeVertexFormat
{
public:
NullNativeVertexFormat(const PortableVertexDeclaration& _vtx_decl) { vtx_decl = _vtx_decl; }
void SetupVertexPointers() override {}
};
std::unique_ptr<NativeVertexFormat>

View File

@ -769,7 +769,7 @@ void Tev::Draw()
// - scaling of the "k" coefficient isn't clear either.
// First, calculate the offset from the viewport center (normalized to 0..1)
float offset = (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center) - 342)) /
float offset = (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center.Value()) - 342)) /
static_cast<float>(xfmem.viewport.wd);
// Based on that, choose the index such that points which are far away from the z-axis use the

View File

@ -443,7 +443,7 @@ void TransformTexCoord(const InputVertexData* src, OutputVertexData* dst, bool s
dst->texCoords[coordNum].z = 1.0f;
break;
default:
ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype);
ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype.Value());
}
}

View File

@ -16,6 +16,7 @@
#include "Core/ConfigManager.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/ShaderCompiler.h"
#include "VideoBackends/Vulkan/StreamBuffer.h"
#include "VideoBackends/Vulkan/Util.h"
@ -59,6 +60,19 @@ bool ObjectCache::Initialize()
if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer)
return false;
m_dummy_texture = Texture2D::Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_LINEAR,
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkClearColorValue clear_color = {};
VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
m_dummy_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
&clear_color, 1, &clear_range);
m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
return true;
}
@ -99,17 +113,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
VK_SHADER_STAGE_GEOMETRY_BIT}};
// Annoying these have to be split, apparently we can't partially update an array without the
// validation layers throwing a warning.
static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = {
{0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
{7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}};
{0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
VK_SHADER_STAGE_FRAGMENT_BIT}};
static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = {
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}};

View File

@ -15,6 +15,7 @@
#include "Common/LinearDiskCache.h"
#include "VideoBackends/Vulkan/Constants.h"
#include "VideoBackends/Vulkan/Texture2D.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
@ -62,6 +63,9 @@ public:
VkSampler GetLinearSampler() const { return m_linear_sampler; }
VkSampler GetSampler(const SamplerState& info);
// Dummy image for samplers that are unbound
Texture2D* GetDummyImage() const { return m_dummy_texture.get(); }
VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); }
// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
@ -89,6 +93,9 @@ private:
VkSampler m_linear_sampler = VK_NULL_HANDLE;
std::map<SamplerState, VkSampler> m_sampler_cache;
// Dummy image for samplers that are unbound
std::unique_ptr<Texture2D> m_dummy_texture;
};
extern std::unique_ptr<ObjectCache> g_object_cache;

View File

@ -149,7 +149,7 @@ static const std::string DEFAULT_FRAGMENT_SHADER_SOURCE = R"(
static const std::string POSTPROCESSING_SHADER_HEADER = R"(
SAMPLER_BINDING(0) uniform sampler2DArray samp0;
SAMPLER_BINDING(1) uniform sampler2D samp1;
SAMPLER_BINDING(1) uniform sampler2DArray samp1;
layout(location = 0) in float3 uv0;
layout(location = 1) in float4 col0;
@ -176,7 +176,7 @@ static const std::string POSTPROCESSING_SHADER_HEADER = R"(
float4 SampleFontLocation(float2 location)
{
return texture(samp1, location);
return texture(samp1, float3(location, 0.0));
}
float2 GetResolution()

View File

@ -150,7 +150,7 @@ layout(std140, push_constant) uniform PCBlock {
vec4 color;
} PC;
layout(set = 1, binding = 0) uniform sampler2D samp0;
layout(set = 1, binding = 0) uniform sampler2DArray samp0;
layout(location = 0) in vec2 uv0;
@ -158,7 +158,7 @@ layout(location = 0) out vec4 ocol0;
void main()
{
ocol0 = texture(samp0, uv0) * PC.color;
ocol0 = texture(samp0, float3(uv0, 0.0)) * PC.color;
}
)";
@ -209,7 +209,7 @@ bool RasterFont::CreateTexture()
// create the actual texture object
m_texture = Texture2D::Create(CHARACTER_WIDTH * CHARACTER_COUNT, CHARACTER_HEIGHT, 1, 1,
VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
if (!m_texture)
return false;

View File

@ -113,9 +113,6 @@ bool Renderer::Initialize()
m_bounding_box->GetGPUBufferSize());
}
// Ensure all pipelines previously used by the game have been created.
StateTracker::GetInstance()->ReloadPipelineUIDCache();
// Initialize post processing.
m_post_processor = std::make_unique<VulkanPostProcessing>();
if (!static_cast<VulkanPostProcessing*>(m_post_processor.get())
@ -589,6 +586,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
// Clean up stale textures.
TextureCache::GetInstance()->Cleanup(frameCount);
// Pull in now-ready async shaders.
g_shader_cache->RetrieveAsyncShaders();
}
void Renderer::TransitionBuffersForSwap(const TargetRectangle& scaled_rect,
@ -1132,6 +1132,8 @@ void Renderer::CheckForConfigChanges()
bool old_force_filtering = g_ActiveConfig.bForceFiltering;
bool old_use_xfb = g_ActiveConfig.bUseXFB;
bool old_use_realxfb = g_ActiveConfig.bUseRealXFB;
bool old_vertex_ubershaders = g_ActiveConfig.bForceVertexUberShaders;
bool old_pixel_ubershaders = g_ActiveConfig.bForcePixelUberShaders;
// Copy g_Config to g_ActiveConfig.
// NOTE: This can potentially race with the UI thread, however if it does, the changes will be
@ -1145,6 +1147,8 @@ void Renderer::CheckForConfigChanges()
bool aspect_changed = old_aspect_ratio != g_ActiveConfig.iAspectRatio;
bool use_xfb_changed = old_use_xfb != g_ActiveConfig.bUseXFB;
bool use_realxfb_changed = old_use_realxfb != g_ActiveConfig.bUseRealXFB;
bool ubershaders_changed = old_vertex_ubershaders != g_ActiveConfig.bForceVertexUberShaders ||
old_pixel_ubershaders != g_ActiveConfig.bForcePixelUberShaders;
// Update texture cache settings with any changed options.
TextureCache::GetInstance()->OnConfigChanged(g_ActiveConfig);
@ -1190,6 +1194,10 @@ void Renderer::CheckForConfigChanges()
if (anisotropy_changed || force_texture_filtering_changed)
ResetSamplerStates();
// Clear UID state if ubershaders are toggled.
if (ubershaders_changed)
StateTracker::GetInstance()->ClearShaders();
// Check for a changed post-processing shader and recompile if needed.
static_cast<VulkanPostProcessing*>(m_post_processor.get())->UpdateConfig();
}

View File

@ -15,13 +15,20 @@
#include "Common/MsgHandler.h"
#include "Core/ConfigManager.h"
#include "Core/Host.h"
#include "VideoBackends/Vulkan/FramebufferManager.h"
#include "VideoBackends/Vulkan/ShaderCompiler.h"
#include "VideoBackends/Vulkan/StreamBuffer.h"
#include "VideoBackends/Vulkan/Util.h"
#include "VideoBackends/Vulkan/VertexFormat.h"
#include "VideoBackends/Vulkan/VulkanContext.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexLoaderManager.h"
namespace Vulkan
{
@ -55,9 +62,22 @@ bool ShaderCache::Initialize()
if (!CompileSharedShaders())
return false;
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
g_ActiveConfig.GetShaderPrecompilerThreads() :
g_ActiveConfig.GetShaderCompilerThreads());
return true;
}
void ShaderCache::Shutdown()
{
if (m_async_shader_compiler)
{
m_async_shader_compiler->StopWorkerThreads();
m_async_shader_compiler->RetrieveWorkItems();
}
}
static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology)
{
return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP ||
@ -365,13 +385,34 @@ std::pair<VkPipeline, bool> ShaderCache::GetPipelineWithCacheResult(const Pipeli
{
auto iter = m_pipeline_objects.find(info);
if (iter != m_pipeline_objects.end())
return {iter->second, true};
{
// If it's background compiling, ignore it, and recompile it synchronously.
if (!iter->second.second)
return std::make_pair(iter->second.first, true);
else
m_pipeline_objects.erase(iter);
}
VkPipeline pipeline = CreatePipeline(info);
m_pipeline_objects.emplace(info, pipeline);
m_pipeline_objects.emplace(info, std::make_pair(pipeline, false));
_assert_(pipeline != VK_NULL_HANDLE);
return {pipeline, false};
}
std::pair<std::pair<VkPipeline, bool>, bool>
ShaderCache::GetPipelineWithCacheResultAsync(const PipelineInfo& info)
{
auto iter = m_pipeline_objects.find(info);
if (iter != m_pipeline_objects.end())
return std::make_pair(iter->second, true);
// Kick a job off.
m_async_shader_compiler->QueueWorkItem(
m_async_shader_compiler->CreateWorkItem<PipelineCompilerWorkItem>(info));
m_pipeline_objects.emplace(info, std::make_pair(static_cast<VkPipeline>(VK_NULL_HANDLE), true));
return std::make_pair(std::make_pair(static_cast<VkPipeline>(VK_NULL_HANDLE), true), false);
}
VkPipeline ShaderCache::CreateComputePipeline(const ComputePipelineInfo& info)
{
VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
@ -409,10 +450,11 @@ VkPipeline ShaderCache::GetComputePipeline(const ComputePipelineInfo& info)
void ShaderCache::ClearPipelineCache()
{
// TODO: Stop any async compiling happening.
for (const auto& it : m_pipeline_objects)
{
if (it.second != VK_NULL_HANDLE)
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
if (it.second.first != VK_NULL_HANDLE)
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second.first, nullptr);
}
m_pipeline_objects.clear();
@ -620,7 +662,10 @@ void ShaderCache::SavePipelineCache()
template <typename Uid>
struct ShaderCacheReader : public LinearDiskCacheReader<Uid, u32>
{
ShaderCacheReader(std::map<Uid, VkShaderModule>& shader_map) : m_shader_map(shader_map) {}
ShaderCacheReader(std::map<Uid, std::pair<VkShaderModule, bool>>& shader_map)
: m_shader_map(shader_map)
{
}
void Read(const Uid& key, const u32* value, u32 value_size) override
{
// We don't insert null modules into the shader map since creation could succeed later on.
@ -630,10 +675,10 @@ struct ShaderCacheReader : public LinearDiskCacheReader<Uid, u32>
if (module == VK_NULL_HANDLE)
return;
m_shader_map.emplace(key, module);
m_shader_map.emplace(key, std::make_pair(module, false));
}
std::map<Uid, VkShaderModule>& m_shader_map;
std::map<Uid, std::pair<VkShaderModule, bool>>& m_shader_map;
};
void ShaderCache::LoadShaderCaches()
@ -653,6 +698,13 @@ void ShaderCache::LoadShaderCaches()
gs_reader);
}
ShaderCacheReader<UberShader::VertexShaderUid> uber_vs_reader(m_uber_vs_cache.shader_map);
m_uber_vs_cache.disk_cache.OpenAndRead(
GetDiskShaderCacheFileName(APIType::Vulkan, "UberVS", false, true), uber_vs_reader);
ShaderCacheReader<UberShader::PixelShaderUid> uber_ps_reader(m_uber_ps_cache.shader_map);
m_uber_ps_cache.disk_cache.OpenAndRead(
GetDiskShaderCacheFileName(APIType::Vulkan, "UberPS", false, true), uber_ps_reader);
SETSTAT(stats.numPixelShadersCreated, static_cast<int>(m_ps_cache.shader_map.size()));
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(m_ps_cache.shader_map.size()));
SETSTAT(stats.numVertexShadersCreated, static_cast<int>(m_vs_cache.shader_map.size()));
@ -666,8 +718,8 @@ static void DestroyShaderCache(T& cache)
cache.disk_cache.Close();
for (const auto& it : cache.shader_map)
{
if (it.second != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr);
if (it.second.first != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.first, nullptr);
}
cache.shader_map.clear();
}
@ -680,6 +732,9 @@ void ShaderCache::DestroyShaderCaches()
if (g_vulkan_context->SupportsGeometryShaders())
DestroyShaderCache(m_gs_cache);
DestroyShaderCache(m_uber_vs_cache);
DestroyShaderCache(m_uber_ps_cache);
SETSTAT(stats.numPixelShadersCreated, 0);
SETSTAT(stats.numPixelShadersAlive, 0);
SETSTAT(stats.numVertexShadersCreated, 0);
@ -690,7 +745,13 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid)
{
auto it = m_vs_cache.shader_map.find(uid);
if (it != m_vs_cache.shader_map.end())
return it->second;
{
// If it's pending, compile it synchronously.
if (!it->second.second)
return it->second.first;
else
m_vs_cache.shader_map.erase(it);
}
// Not in the cache, so compile the shader.
ShaderCompiler::SPIRVCodeVector spv;
@ -712,7 +773,7 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid)
}
// We still insert null entries to prevent further compilation attempts.
m_vs_cache.shader_map.emplace(uid, module);
m_vs_cache.shader_map.emplace(uid, std::make_pair(module, false));
return module;
}
@ -721,7 +782,13 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid
_assert_(g_vulkan_context->SupportsGeometryShaders());
auto it = m_gs_cache.shader_map.find(uid);
if (it != m_gs_cache.shader_map.end())
return it->second;
{
// If it's pending, compile it synchronously.
if (!it->second.second)
return it->second.first;
else
m_gs_cache.shader_map.erase(it);
}
// Not in the cache, so compile the shader.
ShaderCompiler::SPIRVCodeVector spv;
@ -739,7 +806,7 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid
}
// We still insert null entries to prevent further compilation attempts.
m_gs_cache.shader_map.emplace(uid, module);
m_gs_cache.shader_map.emplace(uid, std::make_pair(module, false));
return module;
}
@ -747,7 +814,13 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid)
{
auto it = m_ps_cache.shader_map.find(uid);
if (it != m_ps_cache.shader_map.end())
return it->second;
{
// If it's pending, compile it synchronously.
if (!it->second.second)
return it->second.first;
else
m_ps_cache.shader_map.erase(it);
}
// Not in the cache, so compile the shader.
ShaderCompiler::SPIRVCodeVector spv;
@ -769,7 +842,79 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid)
}
// We still insert null entries to prevent further compilation attempts.
m_ps_cache.shader_map.emplace(uid, module);
m_ps_cache.shader_map.emplace(uid, std::make_pair(module, false));
return module;
}
VkShaderModule ShaderCache::GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid)
{
auto it = m_uber_vs_cache.shader_map.find(uid);
if (it != m_uber_vs_cache.shader_map.end())
{
// If it's pending, compile it synchronously.
if (!it->second.second)
return it->second.first;
else
m_uber_vs_cache.shader_map.erase(it);
}
// Not in the cache, so compile the shader.
ShaderCompiler::SPIRVCodeVector spv;
VkShaderModule module = VK_NULL_HANDLE;
ShaderCode source_code = UberShader::GenVertexShader(
APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData());
if (ShaderCompiler::CompileVertexShader(&spv, source_code.GetBuffer().c_str(),
source_code.GetBuffer().length()))
{
module = Util::CreateShaderModule(spv.data(), spv.size());
// Append to shader cache if it created successfully.
if (module != VK_NULL_HANDLE)
{
m_uber_vs_cache.disk_cache.Append(uid, spv.data(), static_cast<u32>(spv.size()));
INCSTAT(stats.numVertexShadersCreated);
INCSTAT(stats.numVertexShadersAlive);
}
}
// We still insert null entries to prevent further compilation attempts.
m_uber_vs_cache.shader_map.emplace(uid, std::make_pair(module, false));
return module;
}
VkShaderModule ShaderCache::GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid)
{
auto it = m_uber_ps_cache.shader_map.find(uid);
if (it != m_uber_ps_cache.shader_map.end())
{
// If it's pending, compile it synchronously.
if (!it->second.second)
return it->second.first;
else
m_uber_ps_cache.shader_map.erase(it);
}
// Not in the cache, so compile the shader.
ShaderCompiler::SPIRVCodeVector spv;
VkShaderModule module = VK_NULL_HANDLE;
ShaderCode source_code =
UberShader::GenPixelShader(APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData());
if (ShaderCompiler::CompileFragmentShader(&spv, source_code.GetBuffer().c_str(),
source_code.GetBuffer().length()))
{
module = Util::CreateShaderModule(spv.data(), spv.size());
// Append to shader cache if it created successfully.
if (module != VK_NULL_HANDLE)
{
m_uber_ps_cache.disk_cache.Append(uid, spv.data(), static_cast<u32>(spv.size()));
INCSTAT(stats.numPixelShadersCreated);
INCSTAT(stats.numPixelShadersAlive);
}
}
// We still insert null entries to prevent further compilation attempts.
m_uber_ps_cache.shader_map.emplace(uid, std::make_pair(module, false));
return module;
}
@ -782,6 +927,9 @@ void ShaderCache::RecompileSharedShaders()
void ShaderCache::ReloadShaderAndPipelineCaches()
{
m_async_shader_compiler->WaitUntilCompletion();
m_async_shader_compiler->RetrieveWorkItems();
SavePipelineCache();
DestroyShaderCaches();
DestroyPipelineCache();
@ -795,6 +943,9 @@ void ShaderCache::ReloadShaderAndPipelineCaches()
{
CreatePipelineCache();
}
if (g_ActiveConfig.CanPrecompileUberShaders())
PrecompileUberShaders();
}
std::string ShaderCache::GetUtilityShaderHeader() const
@ -1026,4 +1177,214 @@ void ShaderCache::DestroySharedShaders()
DestroyShader(m_screen_quad_geometry_shader);
DestroyShader(m_passthrough_geometry_shader);
}
void ShaderCache::CreateDummyPipeline(const UberShader::VertexShaderUid& vuid,
const GeometryShaderUid& guid,
const UberShader::PixelShaderUid& puid)
{
PortableVertexDeclaration vertex_decl;
std::memset(&vertex_decl, 0, sizeof(vertex_decl));
PipelineInfo pinfo;
pinfo.vertex_format =
static_cast<const VertexFormat*>(VertexLoaderManager::GetUberVertexFormat(vertex_decl));
pinfo.pipeline_layout = g_object_cache->GetPipelineLayout(
g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation() ?
PIPELINE_LAYOUT_BBOX :
PIPELINE_LAYOUT_STANDARD);
pinfo.vs = GetVertexUberShaderForUid(vuid);
pinfo.gs = (!guid.GetUidData()->IsPassthrough() && g_vulkan_context->SupportsGeometryShaders()) ?
GetGeometryShaderForUid(guid) :
VK_NULL_HANDLE;
pinfo.ps = GetPixelUberShaderForUid(puid);
pinfo.render_pass = FramebufferManager::GetInstance()->GetEFBLoadRenderPass();
pinfo.rasterization_state.bits = Util::GetNoCullRasterizationState().bits;
pinfo.depth_stencil_state.bits = Util::GetNoDepthTestingDepthStencilState().bits;
pinfo.blend_state.hex = Util::GetNoBlendingBlendState().hex;
switch (guid.GetUidData()->primitive_type)
{
case PRIMITIVE_POINTS:
pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
break;
case PRIMITIVE_LINES:
pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
break;
case PRIMITIVE_TRIANGLES:
pinfo.primitive_topology = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP :
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
break;
}
GetPipelineWithCacheResultAsync(pinfo);
}
void ShaderCache::PrecompileUberShaders()
{
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) {
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& puid) {
// UIDs must have compatible texgens, a mismatching combination will never be queried.
if (vuid.GetUidData()->num_texgens != puid.GetUidData()->num_texgens)
return;
EnumerateGeometryShaderUids([&](const GeometryShaderUid& guid) {
if (guid.GetUidData()->numTexGens != vuid.GetUidData()->num_texgens)
return;
CreateDummyPipeline(vuid, guid, puid);
});
});
});
WaitForBackgroundCompilesToComplete();
// Switch to the runtime/background thread config.
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
}
void ShaderCache::WaitForBackgroundCompilesToComplete()
{
m_async_shader_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
static_cast<int>(completed), static_cast<int>(total));
});
m_async_shader_compiler->RetrieveWorkItems();
Host_UpdateProgressDialog("", -1, -1);
}
void ShaderCache::RetrieveAsyncShaders()
{
m_async_shader_compiler->RetrieveWorkItems();
}
std::pair<VkShaderModule, bool> ShaderCache::GetVertexShaderForUidAsync(const VertexShaderUid& uid)
{
auto it = m_vs_cache.shader_map.find(uid);
if (it != m_vs_cache.shader_map.end())
return it->second;
// Kick a compile job off.
m_async_shader_compiler->QueueWorkItem(
m_async_shader_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid));
m_vs_cache.shader_map.emplace(uid,
std::make_pair(static_cast<VkShaderModule>(VK_NULL_HANDLE), true));
return std::make_pair<VkShaderModule, bool>(VK_NULL_HANDLE, true);
}
std::pair<VkShaderModule, bool> ShaderCache::GetPixelShaderForUidAsync(const PixelShaderUid& uid)
{
auto it = m_ps_cache.shader_map.find(uid);
if (it != m_ps_cache.shader_map.end())
return it->second;
// Kick a compile job off.
m_async_shader_compiler->QueueWorkItem(
m_async_shader_compiler->CreateWorkItem<PixelShaderCompilerWorkItem>(uid));
m_ps_cache.shader_map.emplace(uid,
std::make_pair(static_cast<VkShaderModule>(VK_NULL_HANDLE), true));
return std::make_pair<VkShaderModule, bool>(VK_NULL_HANDLE, true);
}
bool ShaderCache::VertexShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GenerateVertexShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (!ShaderCompiler::CompileVertexShader(&m_spirv, code.GetBuffer().c_str(),
code.GetBuffer().length()))
return true;
m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size());
return true;
}
void ShaderCache::VertexShaderCompilerWorkItem::Retrieve()
{
auto it = g_shader_cache->m_vs_cache.shader_map.find(m_uid);
if (it == g_shader_cache->m_vs_cache.shader_map.end())
{
g_shader_cache->m_vs_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false));
g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(),
static_cast<u32>(m_spirv.size()));
return;
}
// The main thread may have also compiled this shader.
if (!it->second.second)
{
if (m_module != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr);
return;
}
// No longer pending.
it->second.first = m_module;
it->second.second = false;
g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(),
static_cast<u32>(m_spirv.size()));
}
bool ShaderCache::PixelShaderCompilerWorkItem::Compile()
{
ShaderCode code =
GeneratePixelShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
if (!ShaderCompiler::CompileFragmentShader(&m_spirv, code.GetBuffer().c_str(),
code.GetBuffer().length()))
return true;
m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size());
return true;
}
void ShaderCache::PixelShaderCompilerWorkItem::Retrieve()
{
auto it = g_shader_cache->m_ps_cache.shader_map.find(m_uid);
if (it == g_shader_cache->m_ps_cache.shader_map.end())
{
g_shader_cache->m_ps_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false));
g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(),
static_cast<u32>(m_spirv.size()));
return;
}
// The main thread may have also compiled this shader.
if (!it->second.second)
{
if (m_module != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr);
return;
}
// No longer pending.
it->second.first = m_module;
it->second.second = false;
g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(),
static_cast<u32>(m_spirv.size()));
}
bool ShaderCache::PipelineCompilerWorkItem::Compile()
{
m_pipeline = g_shader_cache->CreatePipeline(m_info);
return true;
}
void ShaderCache::PipelineCompilerWorkItem::Retrieve()
{
auto it = g_shader_cache->m_pipeline_objects.find(m_info);
if (it == g_shader_cache->m_pipeline_objects.end())
{
g_shader_cache->m_pipeline_objects.emplace(m_info, std::make_pair(m_pipeline, false));
return;
}
// The main thread may have also compiled this shader.
if (!it->second.second)
{
if (m_pipeline != VK_NULL_HANDLE)
vkDestroyPipeline(g_vulkan_context->GetDevice(), m_pipeline, nullptr);
return;
}
// No longer pending.
it->second.first = m_pipeline;
it->second.second = false;
}
}

View File

@ -10,16 +10,21 @@
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include "Common/CommonTypes.h"
#include "Common/LinearDiskCache.h"
#include "VideoBackends/Vulkan/Constants.h"
#include "VideoBackends/Vulkan/ObjectCache.h"
#include "VideoBackends/Vulkan/ShaderCompiler.h"
#include "VideoCommon/AsyncShaderCompiler.h"
#include "VideoCommon/GeometryShaderGen.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"
namespace Vulkan
@ -92,8 +97,17 @@ public:
VkShaderModule GetGeometryShaderForUid(const GeometryShaderUid& uid);
VkShaderModule GetPixelShaderForUid(const PixelShaderUid& uid);
// Ubershader caches
VkShaderModule GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid);
VkShaderModule GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid);
// Accesses ShaderGen shader caches asynchronously
std::pair<VkShaderModule, bool> GetVertexShaderForUidAsync(const VertexShaderUid& uid);
std::pair<VkShaderModule, bool> GetPixelShaderForUidAsync(const PixelShaderUid& uid);
// Perform at startup, create descriptor layouts, compiles all static shaders.
bool Initialize();
void Shutdown();
// Creates a pipeline for the specified description. The resulting pipeline, if successful
// is not stored anywhere, this is left up to the caller.
@ -106,6 +120,8 @@ public:
// resulted in a pipeline being created, the second field of the return value will be false,
// otherwise for a cache hit it will be true.
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
std::pair<std::pair<VkPipeline, bool>, bool>
GetPipelineWithCacheResultAsync(const PipelineInfo& info);
// Creates a compute pipeline, and does not track the handle.
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
@ -134,6 +150,10 @@ public:
VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; }
VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; }
VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; }
void PrecompileUberShaders();
void WaitForBackgroundCompilesToComplete();
void RetrieveAsyncShaders();
private:
bool CreatePipelineCache();
bool LoadPipelineCache();
@ -144,17 +164,26 @@ private:
bool CompileSharedShaders();
void DestroySharedShaders();
// We generate a dummy pipeline with some defaults in the blend/depth states,
// that way the driver is forced to compile something (looking at you, NVIDIA).
// It can then hopefully re-use part of this pipeline for others in the future.
void CreateDummyPipeline(const UberShader::VertexShaderUid& vuid, const GeometryShaderUid& guid,
const UberShader::PixelShaderUid& puid);
template <typename Uid>
struct ShaderModuleCache
{
std::map<Uid, VkShaderModule> shader_map;
std::map<Uid, std::pair<VkShaderModule, bool>> shader_map;
LinearDiskCache<Uid, u32> disk_cache;
};
ShaderModuleCache<VertexShaderUid> m_vs_cache;
ShaderModuleCache<GeometryShaderUid> m_gs_cache;
ShaderModuleCache<PixelShaderUid> m_ps_cache;
ShaderModuleCache<UberShader::VertexShaderUid> m_uber_vs_cache;
ShaderModuleCache<UberShader::PixelShaderUid> m_uber_ps_cache;
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
std::unordered_map<PipelineInfo, std::pair<VkPipeline, bool>, PipelineInfoHash>
m_pipeline_objects;
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
m_compute_pipeline_objects;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
@ -165,6 +194,45 @@ private:
VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE;
VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE;
VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE;
std::unique_ptr<VideoCommon::AsyncShaderCompiler> m_async_shader_compiler;
// TODO: Use templates to reduce the number of these classes.
class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
VertexShaderCompilerWorkItem(const VertexShaderUid& uid) : m_uid(uid) {}
bool Compile() override;
void Retrieve() override;
private:
VertexShaderUid m_uid;
ShaderCompiler::SPIRVCodeVector m_spirv;
VkShaderModule m_module = VK_NULL_HANDLE;
};
class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
PixelShaderCompilerWorkItem(const PixelShaderUid& uid) : m_uid(uid) {}
bool Compile() override;
void Retrieve() override;
private:
PixelShaderUid m_uid;
ShaderCompiler::SPIRVCodeVector m_spirv;
VkShaderModule m_module = VK_NULL_HANDLE;
};
class PipelineCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
{
public:
PipelineCompilerWorkItem(const PipelineInfo& info) : m_info(info) {}
bool Compile() override;
void Retrieve() override;
private:
PipelineInfo m_info;
VkPipeline m_pipeline;
};
};
extern std::unique_ptr<ShaderCache> g_shader_cache;

View File

@ -22,6 +22,7 @@
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
@ -77,12 +78,13 @@ bool StateTracker::Initialize()
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
m_bbox_enabled = false;
ClearShaders();
// Initialize all samplers to point by default
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
{
m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
m_bindings.ps_samplers[i].imageView = VK_NULL_HANDLE;
m_bindings.ps_samplers[i].imageView = g_object_cache->GetDummyImageView();
m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler();
}
@ -154,6 +156,10 @@ void StateTracker::ReloadPipelineUIDCache()
PipelineInserter inserter(this);
m_uid_cache.OpenAndRead(filename, inserter);
}
// If we were using background compilation, ensure everything is ready before continuing.
if (g_ActiveConfig.bBackgroundShaderCompiling)
g_shader_cache->WaitForBackgroundCompilesToComplete();
}
void StateTracker::AppendToPipelineUIDCache(const PipelineInfo& info)
@ -178,7 +184,8 @@ bool StateTracker::PrecachePipelineUID(const SerializedPipelineUID& uid)
// Need to create the vertex declaration first, rather than deferring to when a game creates a
// vertex loader that uses this format, since we need it to create a pipeline.
pinfo.vertex_format = VertexFormat::GetOrCreateMatchingFormat(uid.vertex_decl);
pinfo.vertex_format =
static_cast<VertexFormat*>(VertexLoaderManager::GetOrCreateMatchingFormat(uid.vertex_decl));
pinfo.pipeline_layout = uid.ps_uid.GetUidData()->bounding_box ?
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) :
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
@ -209,11 +216,19 @@ bool StateTracker::PrecachePipelineUID(const SerializedPipelineUID& uid)
pinfo.blend_state.hex = uid.blend_state_bits;
pinfo.primitive_topology = uid.primitive_topology;
VkPipeline pipeline = g_shader_cache->GetPipeline(pinfo);
if (pipeline == VK_NULL_HANDLE)
if (g_ActiveConfig.bBackgroundShaderCompiling)
{
WARN_LOG(VIDEO, "Failed to get pipeline from cached UID.");
return false;
// Use async for multithreaded compilation.
g_shader_cache->GetPipelineWithCacheResultAsync(pinfo);
}
else
{
VkPipeline pipeline = g_shader_cache->GetPipeline(pinfo);
if (pipeline == VK_NULL_HANDLE)
{
WARN_LOG(VIDEO, "Failed to get pipeline from cached UID.");
return false;
}
}
// We don't need to do anything with this pipeline, just make sure it exists.
@ -267,11 +282,11 @@ void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& ren
void StateTracker::SetVertexFormat(const VertexFormat* vertex_format)
{
if (m_pipeline_state.vertex_format == vertex_format)
if (m_vertex_format == vertex_format)
return;
m_pipeline_state.vertex_format = vertex_format;
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
m_vertex_format = vertex_format;
UpdatePipelineVertexFormat();
}
void StateTracker::SetPrimitiveTopology(VkPrimitiveTopology primitive_topology)
@ -323,14 +338,87 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type)
{
VertexShaderUid vs_uid = GetVertexShaderUid();
PixelShaderUid ps_uid = GetPixelShaderUid();
bool changed = false;
if (vs_uid != m_vs_uid)
bool use_ubershaders = g_ActiveConfig.bDisableSpecializedShaders;
if (g_ActiveConfig.CanBackgroundCompileShaders() && !g_ActiveConfig.bDisableSpecializedShaders)
{
m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid);
m_vs_uid = vs_uid;
changed = true;
// Look up both VS and PS, and check if we can compile it asynchronously.
auto vs = g_shader_cache->GetVertexShaderForUidAsync(vs_uid);
auto ps = g_shader_cache->GetPixelShaderForUidAsync(ps_uid);
if (vs.second || ps.second)
{
// One of the shaders is still pending. Use the ubershader for both.
use_ubershaders = true;
}
else
{
// Use the standard shaders for both.
if (m_pipeline_state.vs != vs.first)
{
m_pipeline_state.vs = vs.first;
m_vs_uid = vs_uid;
changed = true;
}
if (m_pipeline_state.ps != ps.first)
{
m_pipeline_state.ps = ps.first;
m_ps_uid = ps_uid;
changed = true;
}
}
}
else
{
// Normal shader path. No ubershaders.
if (vs_uid != m_vs_uid)
{
m_vs_uid = vs_uid;
m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid);
changed = true;
}
if (ps_uid != m_ps_uid)
{
m_ps_uid = ps_uid;
m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid);
changed = true;
}
}
// Ubershader fallback?
bool uber_vertex_shader = use_ubershaders || g_ActiveConfig.bForceVertexUberShaders;
bool uber_pixel_shader = use_ubershaders || g_ActiveConfig.bForcePixelUberShaders;
bool using_ubershaders = uber_vertex_shader || uber_pixel_shader;
// Switching to/from ubershaders? Have to adjust the vertex format and pipeline layout.
if (using_ubershaders != m_using_ubershaders)
{
m_using_ubershaders = using_ubershaders;
UpdatePipelineLayout();
UpdatePipelineVertexFormat();
}
if (uber_vertex_shader)
{
UberShader::VertexShaderUid uber_vs_uid = UberShader::GetVertexShaderUid();
VkShaderModule vs = g_shader_cache->GetVertexUberShaderForUid(uber_vs_uid);
if (vs != m_pipeline_state.vs)
{
m_uber_vs_uid = uber_vs_uid;
m_pipeline_state.vs = vs;
changed = true;
}
}
if (uber_pixel_shader)
{
UberShader::PixelShaderUid uber_ps_uid = UberShader::GetPixelShaderUid();
VkShaderModule ps = g_shader_cache->GetPixelUberShaderForUid(uber_ps_uid);
if (ps != m_pipeline_state.ps)
{
m_uber_ps_uid = uber_ps_uid;
m_pipeline_state.ps = ps;
changed = true;
}
}
if (g_vulkan_context->SupportsGeometryShaders())
@ -338,29 +426,39 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type)
GeometryShaderUid gs_uid = GetGeometryShaderUid(gx_primitive_type);
if (gs_uid != m_gs_uid)
{
m_gs_uid = gs_uid;
if (gs_uid.GetUidData()->IsPassthrough())
m_pipeline_state.gs = VK_NULL_HANDLE;
else
m_pipeline_state.gs = g_shader_cache->GetGeometryShaderForUid(gs_uid);
m_gs_uid = gs_uid;
changed = true;
}
}
if (ps_uid != m_ps_uid)
{
m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid);
m_ps_uid = ps_uid;
changed = true;
}
if (changed)
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
return changed;
}
void StateTracker::ClearShaders()
{
// Set the UIDs to something that will never match, so on the first access they are checked.
std::memset(&m_vs_uid, 0xFF, sizeof(m_vs_uid));
std::memset(&m_gs_uid, 0xFF, sizeof(m_gs_uid));
std::memset(&m_ps_uid, 0xFF, sizeof(m_ps_uid));
std::memset(&m_uber_vs_uid, 0xFF, sizeof(m_uber_vs_uid));
std::memset(&m_uber_ps_uid, 0xFF, sizeof(m_uber_ps_uid));
m_pipeline_state.vs = VK_NULL_HANDLE;
m_pipeline_state.gs = VK_NULL_HANDLE;
m_pipeline_state.ps = VK_NULL_HANDLE;
m_pipeline_state.vertex_format = nullptr;
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
}
void StateTracker::UpdateVertexShaderConstants()
{
if (!VertexShaderManager::dirty || !ReserveConstantStorage())
@ -557,24 +655,8 @@ void StateTracker::SetBBoxEnable(bool enable)
if (m_bbox_enabled == enable)
return;
// Change the number of active descriptor sets, as well as the pipeline layout
if (enable)
{
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX);
m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS;
// The bbox buffer never changes, so we defer descriptor updates until it is enabled.
if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
}
else
{
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
}
m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
m_bbox_enabled = enable;
UpdatePipelineLayout();
}
void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range)
@ -590,7 +672,7 @@ void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceS
m_bindings.ps_ssbo.range = range;
// Defer descriptor update until bbox is actually enabled.
if (m_bbox_enabled)
if (IsSSBODescriptorRequired())
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
}
@ -599,7 +681,7 @@ void StateTracker::UnbindTexture(VkImageView view)
for (VkDescriptorImageInfo& it : m_bindings.ps_samplers)
{
if (it.imageView == view)
it.imageView = VK_NULL_HANDLE;
it.imageView = g_object_cache->GetDummyImageView();
}
}
@ -609,7 +691,7 @@ void StateTracker::InvalidateDescriptorSets()
m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS;
// Defer SSBO descriptor update until bbox is actually enabled.
if (!m_bbox_enabled)
if (!IsSSBODescriptorRequired())
m_dirty_flags &= ~DIRTY_FLAG_PS_SSBO;
}
@ -886,15 +968,49 @@ void StateTracker::EndClearRenderPass()
EndRenderPass();
}
VkPipeline StateTracker::GetPipelineAndCacheUID(const PipelineInfo& info)
VkPipeline StateTracker::GetPipelineAndCacheUID()
{
auto result = g_shader_cache->GetPipelineWithCacheResult(info);
// We can't cache ubershader uids, only normal shader uids.
if (g_ActiveConfig.CanBackgroundCompileShaders() && !m_using_ubershaders)
{
// Append to UID cache if it is a new pipeline.
auto result = g_shader_cache->GetPipelineWithCacheResultAsync(m_pipeline_state);
if (!result.second && g_ActiveConfig.bShaderCache)
AppendToPipelineUIDCache(m_pipeline_state);
// Add to the UID cache if it is a new pipeline.
if (!result.second && g_ActiveConfig.bShaderCache)
AppendToPipelineUIDCache(info);
// Still waiting for the pipeline to compile?
if (!result.first.second)
return result.first.first;
return result.first;
// Use ubershader instead.
m_using_ubershaders = true;
UpdatePipelineLayout();
UpdatePipelineVertexFormat();
PipelineInfo uber_info = m_pipeline_state;
UberShader::VertexShaderUid uber_vuid = UberShader::GetVertexShaderUid();
UberShader::PixelShaderUid uber_puid = UberShader::GetPixelShaderUid();
uber_info.vs = g_shader_cache->GetVertexUberShaderForUid(uber_vuid);
uber_info.ps = g_shader_cache->GetPixelUberShaderForUid(uber_puid);
auto uber_result = g_shader_cache->GetPipelineWithCacheResult(uber_info);
return uber_result.first;
}
else
{
// Add to the UID cache if it is a new pipeline.
auto result = g_shader_cache->GetPipelineWithCacheResult(m_pipeline_state);
if (!result.second && !m_using_ubershaders && g_ActiveConfig.bShaderCache)
AppendToPipelineUIDCache(m_pipeline_state);
return result.first;
}
}
bool StateTracker::IsSSBODescriptorRequired() const
{
return m_bbox_enabled || (m_using_ubershaders && g_ActiveConfig.bBBoxEnable &&
g_ActiveConfig.BBoxUseFragmentShaderImplementation());
}
bool StateTracker::UpdatePipeline()
@ -904,16 +1020,56 @@ bool StateTracker::UpdatePipeline()
return false;
// Grab a new pipeline object, this can fail.
m_pipeline_object = GetPipelineAndCacheUID(m_pipeline_state);
m_pipeline_object = GetPipelineAndCacheUID();
m_dirty_flags |= DIRTY_FLAG_PIPELINE_BINDING;
return m_pipeline_object != VK_NULL_HANDLE;
}
void StateTracker::UpdatePipelineLayout()
{
const bool use_bbox_pipeline_layout = IsSSBODescriptorRequired();
VkPipelineLayout pipeline_layout =
use_bbox_pipeline_layout ? g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) :
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
if (m_pipeline_state.pipeline_layout == pipeline_layout)
return;
// Change the number of active descriptor sets, as well as the pipeline layout
m_pipeline_state.pipeline_layout = pipeline_layout;
if (use_bbox_pipeline_layout)
{
m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS;
// The bbox buffer never changes, so we defer descriptor updates until it is enabled.
if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
}
else
{
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
}
m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
}
void StateTracker::UpdatePipelineVertexFormat()
{
const NativeVertexFormat* vertex_format =
m_using_ubershaders ?
VertexLoaderManager::GetUberVertexFormat(m_vertex_format->GetVertexDeclaration()) :
m_vertex_format;
if (m_pipeline_state.vertex_format == vertex_format)
return;
m_pipeline_state.vertex_format = static_cast<const VertexFormat*>(vertex_format);
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
}
bool StateTracker::UpdateDescriptorSet()
{
const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO
NUM_PIXEL_SHADER_SAMPLERS + // Samplers
1 + // Samplers
1; // SSBO
std::array<VkWriteDescriptorSet, MAX_DESCRIPTOR_WRITES> writes;
u32 num_writes = 0;
@ -954,30 +1110,22 @@ bool StateTracker::UpdateDescriptorSet()
if (set == VK_NULL_HANDLE)
return false;
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
{
const VkDescriptorImageInfo& info = m_bindings.ps_samplers[i];
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
{
writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
static_cast<uint32_t>(i),
0,
1,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
&info,
nullptr,
nullptr};
}
}
writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
0,
0,
static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
m_bindings.ps_samplers.data(),
nullptr,
nullptr};
m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set;
m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
}
if (m_bbox_enabled &&
(m_dirty_flags & DIRTY_FLAG_PS_SSBO ||
if ((m_dirty_flags & DIRTY_FLAG_PS_SSBO ||
m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE))
{
VkDescriptorSetLayout layout =

View File

@ -16,6 +16,8 @@
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/PixelShaderGen.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/UberShaderPixel.h"
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/VertexShaderGen.h"
namespace Vulkan
@ -60,6 +62,7 @@ public:
void SetBlendState(const BlendingState& state);
bool CheckForShaderChanges(u32 gx_primitive_type);
void ClearShaders();
void UpdateVertexShaderConstants();
void UpdateGeometryShaderConstants();
@ -159,8 +162,8 @@ private:
DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11),
DIRTY_FLAG_PIPELINE_BINDING = (1 << 12),
DIRTY_FLAG_ALL_DESCRIPTOR_SETS =
DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO
DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO |
DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO
};
bool Initialize();
@ -178,9 +181,15 @@ private:
// Obtains a Vulkan pipeline object for the specified pipeline configuration.
// Also adds this pipeline configuration to the UID cache if it is not present already.
VkPipeline GetPipelineAndCacheUID(const PipelineInfo& info);
VkPipeline GetPipelineAndCacheUID();
// Are bounding box ubershaders enabled? If so, we need to ensure the SSBO is set up,
// since the bbox writes are determined by a uniform.
bool IsSSBODescriptorRequired() const;
bool UpdatePipeline();
void UpdatePipelineLayout();
void UpdatePipelineVertexFormat();
bool UpdateDescriptorSet();
// Allocates storage in the uniform buffer of the specified size. If this storage cannot be
@ -203,10 +212,14 @@ private:
VertexShaderUid m_vs_uid = {};
GeometryShaderUid m_gs_uid = {};
PixelShaderUid m_ps_uid = {};
UberShader::VertexShaderUid m_uber_vs_uid = {};
UberShader::PixelShaderUid m_uber_ps_uid = {};
bool m_using_ubershaders = false;
// pipeline state
PipelineInfo m_pipeline_state = {};
VkPipeline m_pipeline_object = VK_NULL_HANDLE;
const VertexFormat* m_vertex_format = nullptr;
// shader bindings
std::array<VkDescriptorSet, NUM_DESCRIPTOR_SET_BIND_POINTS> m_descriptor_sets = {};

View File

@ -575,8 +575,7 @@ void UtilityShaderDraw::BindDescriptors()
{
// TODO: This method is a mess, clean it up
std::array<VkDescriptorSet, NUM_DESCRIPTOR_SET_BIND_POINTS> bind_descriptor_sets = {};
std::array<VkWriteDescriptorSet, NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS>
set_writes = {};
std::array<VkWriteDescriptorSet, NUM_UBO_DESCRIPTOR_SET_BINDINGS + 1> set_writes = {};
uint32_t num_set_writes = 0;
VkDescriptorBufferInfo dummy_uniform_buffer = {
@ -633,29 +632,32 @@ void UtilityShaderDraw::BindDescriptors()
// Check if we have any at all, skip the binding process entirely if we don't
if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS)
{
// We need to fill it with non-empty images.
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
{
if (m_ps_samplers[i].imageView == VK_NULL_HANDLE)
{
m_ps_samplers[i].imageView = g_object_cache->GetDummyImageView();
m_ps_samplers[i].sampler = g_object_cache->GetPointSampler();
}
}
// Allocate a new descriptor set
VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(
g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS));
if (set == VK_NULL_HANDLE)
PanicAlert("Failed to allocate descriptor set for utility draw");
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
{
const VkDescriptorImageInfo& info = m_ps_samplers[i];
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
static_cast<uint32_t>(i),
0,
1,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
&info,
nullptr,
nullptr};
}
}
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
0,
0,
static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
m_ps_samplers.data(),
nullptr,
nullptr};
bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set;
}

View File

@ -53,17 +53,9 @@ VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl)
SetupInputState();
}
VertexFormat* VertexFormat::GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
const VkPipelineVertexInputStateCreateInfo& VertexFormat::GetVertexInputStateInfo() const
{
auto vertex_format_map = VertexLoaderManager::GetNativeVertexFormatMap();
auto iter = vertex_format_map->find(decl);
if (iter == vertex_format_map->end())
{
auto ipair = vertex_format_map->emplace(decl, std::make_unique<VertexFormat>(decl));
iter = ipair.first;
}
return static_cast<VertexFormat*>(iter->second.get());
return m_input_state_info;
}
void VertexFormat::MapAttributes()
@ -136,9 +128,4 @@ void VertexFormat::AddAttribute(uint32_t location, uint32_t binding, VkFormat fo
m_attribute_descriptions[m_num_attributes].offset = offset;
m_num_attributes++;
}
void VertexFormat::SetupVertexPointers()
{
}
} // namespace Vulkan

View File

@ -16,24 +16,13 @@ class VertexFormat : public ::NativeVertexFormat
public:
VertexFormat(const PortableVertexDeclaration& in_vtx_decl);
// Creates or obtains a pointer to a VertexFormat representing decl.
// If this results in a VertexFormat being created, if the game later uses a matching vertex
// declaration, the one that was previously created will be used.
static VertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl);
// Passed to pipeline state creation
const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const
{
return m_input_state_info;
}
const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const;
// Converting PortableVertexDeclaration -> Vulkan types
void MapAttributes();
void SetupInputState();
// Not used in the Vulkan backend.
void SetupVertexPointers() override;
private:
void AddAttribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);

View File

@ -236,6 +236,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsMultithreading = true; // Assumed support.
config->backend_info.bSupportsComputeShaders = true; // Assumed support.
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
config->backend_info.bSupportsBitfield = true; // Assumed support.
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.

View File

@ -253,6 +253,7 @@ bool VideoBackend::Initialize(void* window_handle)
g_renderer.reset();
StateTracker::DestroyInstance();
g_framebuffer_manager.reset();
g_shader_cache->Shutdown();
g_shader_cache.reset();
g_object_cache.reset();
g_command_buffer_mgr.reset();
@ -262,6 +263,14 @@ bool VideoBackend::Initialize(void* window_handle)
return false;
}
// Ensure all pipelines previously used by the game have been created.
StateTracker::GetInstance()->ReloadPipelineUIDCache();
// Lastly, precompile ubershaders, if requested.
// This has to be done after the texture cache and shader cache are initialized.
if (g_ActiveConfig.CanPrecompileUberShaders())
g_shader_cache->PrecompileUberShaders();
return true;
}
@ -293,6 +302,7 @@ void VideoBackend::Shutdown()
void VideoBackend::Video_Cleanup()
{
g_command_buffer_mgr->WaitForGPUIdle();
g_shader_cache->Shutdown();
// Save all cached pipelines out to disk for next time.
if (g_ActiveConfig.bShaderCache)

View File

@ -0,0 +1,233 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/AsyncShaderCompiler.h"
#include <thread>
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
namespace VideoCommon
{
AsyncShaderCompiler::AsyncShaderCompiler()
{
}
AsyncShaderCompiler::~AsyncShaderCompiler()
{
// Pending work can be left at shutdown.
// The work item classes are expected to clean up after themselves.
_assert_(!HasWorkerThreads());
_assert_(m_completed_work.empty());
}
void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item)
{
// If no worker threads are available, compile synchronously.
if (!HasWorkerThreads())
{
item->Compile();
m_completed_work.push_back(std::move(item));
}
else
{
std::lock_guard<std::mutex> guard(m_pending_work_lock);
m_pending_work.push_back(std::move(item));
m_worker_thread_wake.notify_one();
}
}
void AsyncShaderCompiler::RetrieveWorkItems()
{
std::deque<WorkItemPtr> completed_work;
{
std::lock_guard<std::mutex> guard(m_completed_work_lock);
m_completed_work.swap(completed_work);
}
while (!completed_work.empty())
{
completed_work.front()->Retrieve();
completed_work.pop_front();
}
}
bool AsyncShaderCompiler::HasPendingWork()
{
std::lock_guard<std::mutex> guard(m_pending_work_lock);
return !m_pending_work.empty() || m_busy_workers.load() != 0;
}
void AsyncShaderCompiler::WaitUntilCompletion()
{
while (HasPendingWork())
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
void AsyncShaderCompiler::WaitUntilCompletion(
const std::function<void(size_t, size_t)>& progress_callback)
{
if (!HasPendingWork())
return;
// Wait a second before opening a progress dialog.
// This way, if the operation completes quickly, we don't annoy the user.
constexpr u32 CHECK_INTERVAL_MS = 50;
constexpr auto CHECK_INTERVAL = std::chrono::milliseconds(CHECK_INTERVAL_MS);
for (u32 i = 0; i < (1000 / CHECK_INTERVAL_MS); i++)
{
std::this_thread::sleep_for(std::chrono::milliseconds(CHECK_INTERVAL));
if (!HasPendingWork())
return;
}
// Grab the number of pending items. We use this to work out how many are left.
size_t total_items = 0;
{
// Safe to hold both locks here, since nowhere else does.
std::lock_guard<std::mutex> pending_guard(m_pending_work_lock);
std::lock_guard<std::mutex> completed_guard(m_completed_work_lock);
total_items = m_completed_work.size() + m_pending_work.size() + m_busy_workers.load() + 1;
}
// Update progress while the compiles complete.
for (;;)
{
size_t remaining_items;
{
std::lock_guard<std::mutex> pending_guard(m_pending_work_lock);
if (m_pending_work.empty() && !m_busy_workers.load())
break;
remaining_items = m_pending_work.size();
}
progress_callback(total_items - remaining_items, total_items);
std::this_thread::sleep_for(CHECK_INTERVAL);
}
}
bool AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
{
if (num_worker_threads == 0)
return true;
for (u32 i = 0; i < num_worker_threads; i++)
{
void* thread_param = nullptr;
if (!WorkerThreadInitMainThread(&thread_param))
{
WARN_LOG(VIDEO, "Failed to initialize shader compiler worker thread.");
break;
}
m_worker_thread_start_result.store(false);
std::thread thr(&AsyncShaderCompiler::WorkerThreadEntryPoint, this, thread_param);
m_init_event.Wait();
if (!m_worker_thread_start_result.load())
{
WARN_LOG(VIDEO, "Failed to start shader compiler worker thread.");
thr.join();
break;
}
m_worker_threads.push_back(std::move(thr));
}
return HasWorkerThreads();
}
bool AsyncShaderCompiler::ResizeWorkerThreads(u32 num_worker_threads)
{
if (m_worker_threads.size() == num_worker_threads)
return true;
StopWorkerThreads();
return StartWorkerThreads(num_worker_threads);
}
bool AsyncShaderCompiler::HasWorkerThreads() const
{
return !m_worker_threads.empty();
}
void AsyncShaderCompiler::StopWorkerThreads()
{
if (!HasWorkerThreads())
return;
// Signal worker threads to stop, and wake all of them.
{
std::lock_guard<std::mutex> guard(m_pending_work_lock);
m_exit_flag.Set();
m_worker_thread_wake.notify_all();
}
// Wait for worker threads to exit.
for (std::thread& thr : m_worker_threads)
thr.join();
m_worker_threads.clear();
m_exit_flag.Clear();
}
bool AsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
{
return true;
}
bool AsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
{
return true;
}
void AsyncShaderCompiler::WorkerThreadExit(void* param)
{
}
void AsyncShaderCompiler::WorkerThreadEntryPoint(void* param)
{
// Initialize worker thread with backend-specific method.
if (!WorkerThreadInitWorkerThread(param))
{
WARN_LOG(VIDEO, "Failed to initialize shader compiler worker.");
m_worker_thread_start_result.store(false);
m_init_event.Set();
return;
}
m_worker_thread_start_result.store(true);
m_init_event.Set();
WorkerThreadRun();
WorkerThreadExit(param);
}
void AsyncShaderCompiler::WorkerThreadRun()
{
std::unique_lock<std::mutex> pending_lock(m_pending_work_lock);
while (!m_exit_flag.IsSet())
{
m_worker_thread_wake.wait(pending_lock);
while (!m_pending_work.empty() && !m_exit_flag.IsSet())
{
m_busy_workers++;
WorkItemPtr item(std::move(m_pending_work.front()));
m_pending_work.pop_front();
pending_lock.unlock();
if (item->Compile())
{
std::lock_guard<std::mutex> completed_guard(m_completed_work_lock);
m_completed_work.push_back(std::move(item));
}
pending_lock.lock();
m_busy_workers--;
}
}
}
} // namespace VideoCommon

View File

@ -0,0 +1,84 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <condition_variable>
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include "Common/CommonTypes.h"
#include "Common/Event.h"
#include "Common/Flag.h"
namespace VideoCommon
{
class AsyncShaderCompiler
{
public:
class WorkItem
{
public:
virtual ~WorkItem() = default;
virtual bool Compile() = 0;
virtual void Retrieve() = 0;
};
using WorkItemPtr = std::unique_ptr<WorkItem>;
AsyncShaderCompiler();
virtual ~AsyncShaderCompiler();
template <typename T, typename... Params>
static WorkItemPtr CreateWorkItem(Params... params)
{
return std::unique_ptr<WorkItem>(new T(params...));
}
void QueueWorkItem(WorkItemPtr item);
void RetrieveWorkItems();
bool HasPendingWork();
// Simpler version without progress updates.
void WaitUntilCompletion();
// Calls progress_callback periodically, with completed_items, and total_items.
void WaitUntilCompletion(const std::function<void(size_t, size_t)>& progress_callback);
// Needed because of calling virtual methods in shutdown procedure.
bool StartWorkerThreads(u32 num_worker_threads);
bool ResizeWorkerThreads(u32 num_worker_threads);
bool HasWorkerThreads() const;
void StopWorkerThreads();
protected:
virtual bool WorkerThreadInitMainThread(void** param);
virtual bool WorkerThreadInitWorkerThread(void* param);
virtual void WorkerThreadExit(void* param);
private:
void WorkerThreadEntryPoint(void* param);
void WorkerThreadRun();
Common::Flag m_exit_flag;
Common::Event m_init_event;
std::vector<std::thread> m_worker_threads;
std::atomic_bool m_worker_thread_start_result{false};
std::deque<WorkItemPtr> m_pending_work;
std::mutex m_pending_work_lock;
std::condition_variable m_worker_thread_wake;
std::atomic_size_t m_busy_workers{0};
std::deque<WorkItemPtr> m_completed_work;
std::mutex m_completed_work_lock;
};
} // namespace VideoCommon

View File

@ -24,8 +24,7 @@ float FogParam0::GetA() const
float FogParam3::GetC() const
{
// scale mantissa from 11 to 23 bits
const u32 integral = (static_cast<u32>(c_sign) << 31) | (static_cast<u32>(c_exp) << 23) |
(static_cast<u32>(c_mant) << 12);
const u32 integral = (c_sign.Value() << 31) | (c_exp.Value() << 23) | (c_mant.Value() << 12);
float real;
std::memcpy(&real, &integral, sizeof(u32));

View File

@ -301,40 +301,37 @@ struct TevStageCombiner
{
union ColorCombiner
{
struct // abc=8bit,d=10bit
{
u32 d : 4; // TEVSELCC_X
u32 c : 4; // TEVSELCC_X
u32 b : 4; // TEVSELCC_X
u32 a : 4; // TEVSELCC_X
// abc=8bit,d=10bit
BitField<0, 4, u32> d; // TEVSELCC_X
BitField<4, 4, u32> c; // TEVSELCC_X
BitField<8, 4, u32> b; // TEVSELCC_X
BitField<12, 4, u32> a; // TEVSELCC_X
u32 bias : 2;
u32 op : 1;
u32 clamp : 1;
BitField<16, 2, u32> bias;
BitField<18, 1, u32> op;
BitField<19, 1, u32> clamp;
BitField<20, 2, u32> shift;
BitField<22, 2, u32> dest; // 1,2,3
u32 shift : 2;
u32 dest : 2; // 1,2,3
};
u32 hex;
};
union AlphaCombiner
{
struct
{
u32 rswap : 2;
u32 tswap : 2;
u32 d : 3; // TEVSELCA_
u32 c : 3; // TEVSELCA_
u32 b : 3; // TEVSELCA_
u32 a : 3; // TEVSELCA_
BitField<0, 2, u32> rswap;
BitField<2, 2, u32> tswap;
BitField<4, 3, u32> d; // TEVSELCA_
BitField<7, 3, u32> c; // TEVSELCA_
BitField<10, 3, u32> b; // TEVSELCA_
BitField<13, 3, u32> a; // TEVSELCA_
u32 bias : 2; // GXTevBias
u32 op : 1;
u32 clamp : 1;
BitField<16, 2, u32> bias; // GXTevBias
BitField<18, 1, u32> op;
BitField<19, 1, u32> clamp;
BitField<20, 2, u32> shift;
BitField<22, 2, u32> dest; // 1,2,3
u32 shift : 2;
u32 dest : 2; // 1,2,3
};
u32 hex;
};
@ -353,21 +350,18 @@ struct TevStageCombiner
union TevStageIndirect
{
struct
{
u32 bt : 2; // Indirect tex stage ID
u32 fmt : 2; // Format: ITF_X
u32 bias : 3; // ITB_X
u32 bs : 2; // ITBA_X, indicates which coordinate will become the 'bump alpha'
u32 mid : 4; // Matrix ID to multiply offsets with
u32 sw : 3; // ITW_X, wrapping factor for S of regular coord
u32 tw : 3; // ITW_X, wrapping factor for T of regular coord
u32 lb_utclod : 1; // Use modified or unmodified texture coordinates for LOD computation
u32 fb_addprev : 1; // 1 if the texture coordinate results from the previous TEV stage should
// be added
u32 pad0 : 3;
u32 rid : 8;
};
BitField<0, 2, u32> bt; // Indirect tex stage ID
BitField<2, 2, u32> fmt; // Format: ITF_X
BitField<4, 3, u32> bias; // ITB_X
BitField<7, 2, u32> bs; // ITBA_X, indicates which coordinate will become the 'bump alpha'
BitField<9, 4, u32> mid; // Matrix ID to multiply offsets with
BitField<13, 3, u32> sw; // ITW_X, wrapping factor for S of regular coord
BitField<16, 3, u32> tw; // ITW_X, wrapping factor for T of regular coord
BitField<19, 1, u32> lb_utclod; // Use modified or unmodified texture
// coordinates for LOD computation
BitField<20, 1, u32> fb_addprev; // 1 if the texture coordinate results from the previous TEV
// stage should be added
struct
{
u32 hex : 21;
@ -381,28 +375,23 @@ union TevStageIndirect
union TwoTevStageOrders
{
struct
{
u32 texmap0 : 3; // Indirect tex stage texmap
u32 texcoord0 : 3;
u32 enable0 : 1; // 1 if should read from texture
u32 colorchan0 : 3; // RAS1_CC_X
BitField<0, 3, u32> texmap0; // Indirect tex stage texmap
BitField<3, 3, u32> texcoord0;
BitField<6, 1, u32> enable0; // 1 if should read from texture
BitField<7, 3, u32> colorchan0; // RAS1_CC_X
u32 pad0 : 2;
BitField<12, 3, u32> texmap1;
BitField<15, 3, u32> texcoord1;
BitField<18, 1, u32> enable1; // 1 if should read from texture
BitField<19, 3, u32> colorchan1; // RAS1_CC_X
u32 texmap1 : 3;
u32 texcoord1 : 3;
u32 enable1 : 1; // 1 if should read from texture
u32 colorchan1 : 3; // RAS1_CC_X
BitField<24, 8, u32> rid;
u32 pad1 : 2;
u32 rid : 8;
};
u32 hex;
int getTexMap(int i) const { return i ? texmap1 : texmap0; }
int getTexCoord(int i) const { return i ? texcoord1 : texcoord0; }
int getEnable(int i) const { return i ? enable1 : enable0; }
int getColorChan(int i) const { return i ? colorchan1 : colorchan0; }
u32 getTexMap(int i) const { return i ? texmap1.Value() : texmap0.Value(); }
u32 getTexCoord(int i) const { return i ? texcoord1.Value() : texcoord0.Value(); }
u32 getEnable(int i) const { return i ? enable1.Value() : enable0.Value(); }
u32 getColorChan(int i) const { return i ? colorchan1.Value() : colorchan0.Value(); }
};
union TEXSCALE
@ -527,20 +516,14 @@ union TexTLUT
union ZTex1
{
struct
{
u32 bias : 24;
};
BitField<0, 24, u32> bias;
u32 hex;
};
union ZTex2
{
struct
{
u32 type : 2; // TEV_Z_TYPE_X
u32 op : 2; // GXZTexOp
};
BitField<0, 2, u32> type; // TEV_Z_TYPE_X
BitField<2, 2, u32> op; // GXZTexOp
u32 hex;
};
@ -681,14 +664,12 @@ union FogParam0
union FogParam3
{
struct
{
u32 c_mant : 11;
u32 c_exp : 8;
u32 c_sign : 1;
u32 proj : 1; // 0 - perspective, 1 - orthographic
u32 fsel : 3; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - backward exp, 7 - backward exp2
};
BitField<0, 11, u32> c_mant;
BitField<11, 8, u32> c_exp;
BitField<19, 1, u32> c_sign;
BitField<20, 1, u32> proj; // 0 - perspective, 1 - orthographic
BitField<21, 3, u32> fsel; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 -
// backward exp, 7 - backward exp2
// amount to subtract from eyespacez after range adjustment
float GetC() const;
@ -698,15 +679,12 @@ union FogParam3
union FogRangeKElement
{
struct
{
u32 HI : 12;
u32 LO : 12;
u32 regid : 8;
};
BitField<0, 12, u32> HI;
BitField<12, 12, u32> LO;
BitField<24, 8, u32> regid;
// TODO: Which scaling coefficient should we use here? This is just a guess!
float GetValue(int i) const { return (i ? HI : LO) / 256.f; }
float GetValue(int i) const { return (i ? HI.Value() : LO.Value()) / 256.f; }
u32 HEX;
};
@ -714,13 +692,9 @@ struct FogRangeParams
{
union RangeBase
{
struct
{
u32 Center : 10; // viewport center + 342
u32 Enabled : 1;
u32 unused : 13;
u32 regid : 8;
};
BitField<0, 10, u32> Center; // viewport center + 342
BitField<10, 1, u32> Enabled;
BitField<24, 8, u32> regid;
u32 hex;
};
RangeBase Base;
@ -736,12 +710,9 @@ struct FogParams
union FogColor
{
struct
{
u32 b : 8;
u32 g : 8;
u32 r : 8;
};
BitField<0, 8, u32> b;
BitField<8, 8, u32> g;
BitField<16, 8, u32> r;
u32 hex;
};
@ -771,11 +742,8 @@ union ZMode
union ConstantAlpha
{
struct
{
u32 alpha : 8;
u32 enable : 1;
};
BitField<0, 8, u32> alpha;
BitField<8, 1, u32> enable;
u32 hex;
};
@ -881,19 +849,16 @@ union TevReg
union TevKSel
{
struct
{
u32 swap1 : 2;
u32 swap2 : 2;
u32 kcsel0 : 5;
u32 kasel0 : 5;
u32 kcsel1 : 5;
u32 kasel1 : 5;
};
BitField<0, 2, u32> swap1;
BitField<2, 2, u32> swap2;
BitField<4, 5, u32> kcsel0;
BitField<9, 5, u32> kasel0;
BitField<14, 5, u32> kcsel1;
BitField<19, 5, u32> kasel1;
u32 hex;
int getKC(int i) const { return i ? kcsel1 : kcsel0; }
int getKA(int i) const { return i ? kasel1 : kasel0; }
u32 getKC(int i) const { return i ? kcsel1.Value() : kcsel0.Value(); }
u32 getKA(int i) const { return i ? kasel1.Value() : kasel0.Value(); }
};
union AlphaTest

View File

@ -93,6 +93,9 @@ static void BPWritten(const BPCmd& bp)
(u32)bpmem.genMode.cullmode, (u32)bpmem.genMode.numindstages,
(u32)bpmem.genMode.zfreeze);
if (bp.changes)
PixelShaderManager::SetGenModeChanged();
// Only call SetGenerationMode when cull mode changes.
if (bp.changes & 0xC000)
SetGenerationMode();
@ -155,12 +158,20 @@ static void BPWritten(const BPCmd& bp)
// Set Color Mask
if (bp.changes & 0x18) // colorupdate | alphaupdate
SetColorMask();
// Dither
if (bp.changes & 0x04)
PixelShaderManager::SetBlendModeChanged();
}
return;
case BPMEM_CONSTANTALPHA: // Set Destination Alpha
PRIM_LOG("constalpha: alp=%d, en=%d", bpmem.dstalpha.alpha, bpmem.dstalpha.enable);
if (bp.changes & 0xFF)
PixelShaderManager::SetDestAlpha();
PRIM_LOG("constalpha: alp=%d, en=%d", bpmem.dstalpha.alpha.Value(),
bpmem.dstalpha.enable.Value());
if (bp.changes)
{
PixelShaderManager::SetAlpha();
PixelShaderManager::SetDestAlphaChanged();
}
if (bp.changes & 0x100)
SetBlendMode();
return;
@ -237,6 +248,7 @@ static void BPWritten(const BPCmd& bp)
// the number of lines copied is determined by the y scale * source efb height
BoundingBox::active = false;
PixelShaderManager::SetBoundingBoxActive(false);
float yScale;
if (PE_copy.scale_invert)
@ -317,12 +329,13 @@ static void BPWritten(const BPCmd& bp)
PixelShaderManager::SetAlpha();
if (bp.changes)
{
PixelShaderManager::SetAlphaTestChanged();
g_renderer->SetColorMask();
SetBlendMode();
}
return;
case BPMEM_BIAS: // BIAS
PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias);
PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias.Value());
if (bp.changes)
PixelShaderManager::SetZTextureBias();
return;
@ -331,7 +344,7 @@ static void BPWritten(const BPCmd& bp)
if (bp.changes & 3)
PixelShaderManager::SetZTextureTypeChanged();
if (bp.changes & 12)
VertexShaderManager::SetViewportChanged();
PixelShaderManager::SetZTextureOpChanged();
#if defined(_DEBUG) || defined(DEBUGFAST)
const char* pzop[] = {"DISABLE", "ADD", "REPLACE", "?"};
const char* pztype[] = {"Z8", "Z16", "Z24", "?"};
@ -389,6 +402,7 @@ static void BPWritten(const BPCmd& bp)
{
u8 offset = bp.address & 2;
BoundingBox::active = true;
PixelShaderManager::SetBoundingBoxActive(true);
if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable)
{
@ -425,6 +439,11 @@ static void BPWritten(const BPCmd& bp)
* 3 BC0 - Ind. Tex Stage 0 NTexCoord
* 0 BI0 - Ind. Tex Stage 0 NTexMap */
case BPMEM_IREF:
{
if (bp.changes)
PixelShaderManager::SetTevIndirectChanged();
return;
}
case BPMEM_TEV_KSEL: // Texture Environment Swap Mode Table 0
case BPMEM_TEV_KSEL + 1: // Texture Environment Swap Mode Table 1
@ -434,6 +453,8 @@ static void BPWritten(const BPCmd& bp)
case BPMEM_TEV_KSEL + 5: // Texture Environment Swap Mode Table 5
case BPMEM_TEV_KSEL + 6: // Texture Environment Swap Mode Table 6
case BPMEM_TEV_KSEL + 7: // Texture Environment Swap Mode Table 7
PixelShaderManager::SetTevKSel(bp.address - BPMEM_TEV_KSEL, bp.newvalue);
return;
/* This Register can be used to limit to which bits of BP registers is
* actually written to. The mask is only valid for the next BP write,
@ -566,6 +587,7 @@ static void BPWritten(const BPCmd& bp)
// -------------------------
case BPMEM_TREF:
case BPMEM_TREF + 4:
PixelShaderManager::SetTevOrder(bp.address - BPMEM_TREF, bp.newvalue);
return;
// ----------------------
// Set wrap size
@ -629,15 +651,18 @@ static void BPWritten(const BPCmd& bp)
// --------------
// Indirect Tev
// --------------
case BPMEM_IND_CMD: // Indirect 0-15
case BPMEM_IND_CMD:
PixelShaderManager::SetTevIndirectChanged();
return;
// --------------------------------------------------
// Set Color/Alpha of a Tev
// BPMEM_TEV_COLOR_ENV - Dest, Shift, Clamp, Sub, Bias, Sel A, Sel B, Sel C, Sel D
// BPMEM_TEV_ALPHA_ENV - Dest, Shift, Clamp, Sub, Bias, Sel A, Sel B, Sel C, Sel D, T Swap, R Swap
// --------------------------------------------------
case BPMEM_TEV_COLOR_ENV: // Texture Environment Color/Alpha 0-7
case BPMEM_TEV_COLOR_ENV + 16: // Texture Environment Color/Alpha 8-15
case BPMEM_TEV_COLOR_ENV: // Texture Environment 1
case BPMEM_TEV_COLOR_ENV + 16:
PixelShaderManager::SetTevCombiner((bp.address - BPMEM_TEV_COLOR_ENV) >> 1,
(bp.address - BPMEM_TEV_COLOR_ENV) & 1, bp.newvalue);
return;
default:
break;
@ -1281,7 +1306,7 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
"Tex sel: %d\n",
(data[0] - BPMEM_TEV_ALPHA_ENV) / 2, tevin[ac.a], tevin[ac.b], tevin[ac.c],
tevin[ac.d], tevbias[ac.bias], tevop[ac.op], no_yes[ac.clamp],
tevscale[ac.shift], tevout[ac.dest], ac.rswap, ac.tswap);
tevscale[ac.shift], tevout[ac.dest], ac.rswap.Value(), ac.tswap.Value());
break;
}

View File

@ -1,6 +1,7 @@
set(SRCS
AbstractTexture.cpp
AsyncRequests.cpp
AsyncShaderCompiler.cpp
BoundingBox.cpp
BPFunctions.cpp
BPMemory.cpp
@ -31,6 +32,9 @@ set(SRCS
RenderState.cpp
ShaderGenCommon.cpp
Statistics.cpp
UberShaderCommon.cpp
UberShaderPixel.cpp
UberShaderVertex.cpp
TextureCacheBase.cpp
TextureConfig.cpp
TextureConversionShader.cpp

View File

@ -24,11 +24,31 @@ struct PixelShaderConstants
int4 fogi;
float4 fogf[2];
float4 zslope;
float4 efbscale;
float efbscale[2];
// Constants from here onwards are only used in ubershaders.
u32 genmode; // .z
u32 alphaTest; // .w
u32 fogParam3; // .x
u32 fogRangeBase; // .y
u32 dstalpha; // .z
u32 ztex_op; // .w
u32 early_ztest; // .x (bool)
u32 rgba6_format; // .y (bool)
u32 dither; // .z (bool)
u32 bounding_box; // .w (bool)
uint4 pack1[16]; // .xy - combiners, .z - tevind, .w - iref
uint4 pack2[8]; // .x - tevorder, .y - tevksel
int4 konst[32]; // .rgba
};
struct VertexShaderConstants
{
u32 components; // .x
u32 xfmem_dualTexInfo; // .y
u32 xfmem_numColorChans; // .z
u32 pad1; // .w
float4 posnormalmatrix[6];
float4 projection[4];
int4 materials[4];
@ -45,7 +65,10 @@ struct VertexShaderConstants
float4 normalmatrices[32];
float4 posttransformmatrices[64];
float4 pixelcentercorrection;
float4 viewport;
float viewport[2]; // .xy
float pad2[2]; // .zw
uint4 xfmem_pack1[8]; // .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha
};
struct GeometryShaderConstants

View File

@ -98,7 +98,10 @@ static BugInfo m_known_bugs[] = {
BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, -1.0, true},
{API_VULKAN, OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0, -1.0,
true},
};
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN,
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}};
static std::map<Bug, BugInfo> m_bugs;

View File

@ -247,6 +247,12 @@ enum Bug
// fail compilation with no useful diagnostic log. This can be worked around by storing
// the negated value to a temporary variable then using that in the bitwise op.
BUG_BROKEN_BITWISE_OP_NEGATION,
// Bug: Shaders are recompiled on the main thread after being previously compiled on
// a worker thread on Mesa i965.
// Started version: -1
// Ended Version: -1
BUG_SHARED_CONTEXT_SHADER_COMPILATION,
};
// Initializes our internal vendor, device family, and driver version

View File

@ -364,3 +364,23 @@ static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
else
out.Write("\toutput.RestartStrip();\n");
}
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback)
{
GeometryShaderUid uid;
std::memset(&uid, 0, sizeof(uid));
static constexpr std::array<u32, 3> primitive_lut = {
{PRIMITIVE_TRIANGLES, PRIMITIVE_LINES, PRIMITIVE_POINTS}};
for (u32 primitive : primitive_lut)
{
auto* guid = uid.GetUidData<geometry_shader_uid_data>();
guid->primitive_type = primitive;
for (u32 texgens = 0; texgens <= 8; texgens++)
{
guid->numTexGens = texgens;
callback(uid);
}
}
}

View File

@ -4,6 +4,7 @@
#pragma once
#include <functional>
#include "Common/CommonTypes.h"
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/VertexManagerBase.h"
@ -28,3 +29,4 @@ typedef ShaderUid<geometry_shader_uid_data> GeometryShaderUid;
ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
const geometry_shader_uid_data* uid_data);
GeometryShaderUid GetGeometryShaderUid(u32 primitive_type);
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback);

View File

@ -194,9 +194,6 @@ void VideoBackendBase::InitializeShared()
g_Config.UpdateProjectionHack();
g_Config.VerifyValidity();
UpdateActiveConfig();
// Notify the core that the video backend is ready
Host_Message(WM_USER_CREATE);
}
void VideoBackendBase::ShutdownShared()

View File

@ -106,8 +106,6 @@ class NativeVertexFormat : NonCopyable
{
public:
virtual ~NativeVertexFormat() {}
virtual void SetupVertexPointers() = 0;
u32 GetVertexStride() const { return vtx_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
protected:

View File

@ -18,6 +18,7 @@
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
namespace PixelEngine
{
@ -231,6 +232,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{
mmio->Register(base | (PE_BBOX_LEFT + 2 * i), MMIO::ComplexRead<u16>([i](u32) {
BoundingBox::active = false;
PixelShaderManager::SetBoundingBoxActive(false);
return g_video_backend->Video_GetBoundingBox(i);
}),
MMIO::InvalidWrite<u16>());

View File

@ -179,7 +179,7 @@ PixelShaderUid GetPixelShaderUid()
u32 numStages = uid_data->genMode_numtevstages + 1;
const bool forced_early_z =
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
bpmem.UseEarlyDepthTest() &&
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
@ -192,18 +192,6 @@ PixelShaderUid GetPixelShaderUid()
uid_data->per_pixel_depth = per_pixel_depth;
uid_data->forced_early_z = forced_early_z;
if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() &&
(!g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED))
{
static bool warn_once = true;
if (warn_once)
WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current "
"configuration. Make sure to enable fast depth calculations. If this message "
"still shows up your hardware isn't able to emulate the feature properly (a "
"GPU with D3D 11.0 / OGL 4.2 support is required).");
warn_once = false;
}
if (g_ActiveConfig.bEnablePixelLighting)
{
// The lighting shader only needs the two color bits of the 23bit component bit array.
@ -333,6 +321,110 @@ PixelShaderUid GetPixelShaderUid()
return out;
}
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
bool per_pixel_lighting, bool bounding_box)
{
// dot product for integer vectors
out.Write("int idot(int3 x, int3 y)\n"
"{\n"
"\tint3 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z;\n"
"}\n");
out.Write("int idot(int4 x, int4 y)\n"
"{\n"
"\tint4 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
"}\n\n");
// rounding + casting to integer at once in a single function
out.Write("int iround(float x) { return int (round(x)); }\n"
"int2 iround(float2 x) { return int2(round(x)); }\n"
"int3 iround(float3 x) { return int3(round(x)); }\n"
"int4 iround(float4 x) { return int4(round(x)); }\n\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
}
else // D3D
{
// Declare samplers
out.Write("SamplerState samp[8] : register(s0);\n");
out.Write("\n");
out.Write("Texture2DArray Tex[8] : register(t0);\n");
}
out.Write("\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n");
else
out.Write("cbuffer PSBlock : register(b0) {\n");
out.Write("\tint4 " I_COLORS "[4];\n"
"\tint4 " I_KCOLORS "[4];\n"
"\tint4 " I_ALPHA ";\n"
"\tfloat4 " I_TEXDIMS "[8];\n"
"\tint4 " I_ZBIAS "[2];\n"
"\tint4 " I_INDTEXSCALE "[2];\n"
"\tint4 " I_INDTEXMTX "[6];\n"
"\tint4 " I_FOGCOLOR ";\n"
"\tint4 " I_FOGI ";\n"
"\tfloat4 " I_FOGF "[2];\n"
"\tfloat4 " I_ZSLOPE ";\n"
"\tfloat2 " I_EFBSCALE ";\n"
"\tuint bpmem_genmode;\n"
"\tuint bpmem_alphaTest;\n"
"\tuint bpmem_fogParam3;\n"
"\tuint bpmem_fogRangeBase;\n"
"\tuint bpmem_dstalpha;\n"
"\tuint bpmem_ztex_op;\n"
"\tbool bpmem_early_ztest;\n"
"\tbool bpmem_rgba6_format;\n"
"\tbool bpmem_dither;\n"
"\tbool bpmem_bounding_box;\n"
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
"\tint4 konstLookup[32];\n"
"};\n\n");
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
"#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n"
"#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n"
"#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n");
if (per_pixel_lighting)
{
out.Write("%s", s_lighting_struct);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
else
out.Write("cbuffer VSBlock : register(b1) {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
}
if (bounding_box)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.Write("SSBO_BINDING(0) buffer BBox {\n"
"\tint4 bbox_data;\n"
"};\n");
}
else
{
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
}
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, num_texgens, per_pixel_lighting, "");
out.Write("};\n");
}
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
APIType ApiType, bool stereo);
static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
@ -360,100 +452,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens,
uid_data->genMode_numindstages);
// dot product for integer vectors
out.Write("int idot(int3 x, int3 y)\n"
"{\n"
"\tint3 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z;\n"
"}\n");
// Stuff that is shared between ubershaders and pixelgen.
WritePixelShaderCommonHeader(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting,
uid_data->bounding_box);
out.Write("int idot(int4 x, int4 y)\n"
"{\n"
"\tint4 tmp = x * y;\n"
"\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
"}\n\n");
// rounding + casting to integer at once in a single function
out.Write("int iround(float x) { return int (round(x)); }\n"
"int2 iround(float2 x) { return int2(round(x)); }\n"
"int3 iround(float3 x) { return int3(round(x)); }\n"
"int4 iround(float4 x) { return int4(round(x)); }\n\n");
if (ApiType == APIType::OpenGL)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
}
else if (ApiType == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
out.Write("SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n");
out.Write("SAMPLER_BINDING(2) uniform sampler2DArray samp2;\n");
out.Write("SAMPLER_BINDING(3) uniform sampler2DArray samp3;\n");
out.Write("SAMPLER_BINDING(4) uniform sampler2DArray samp4;\n");
out.Write("SAMPLER_BINDING(5) uniform sampler2DArray samp5;\n");
out.Write("SAMPLER_BINDING(6) uniform sampler2DArray samp6;\n");
out.Write("SAMPLER_BINDING(7) uniform sampler2DArray samp7;\n");
}
else // D3D
{
// Declare samplers
out.Write("SamplerState samp[8] : register(s0);\n");
out.Write("\n");
out.Write("Texture2DArray Tex[8] : register(t0);\n");
}
out.Write("\n");
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n");
else
out.Write("cbuffer PSBlock : register(b0) {\n");
out.Write("\tint4 " I_COLORS "[4];\n"
"\tint4 " I_KCOLORS "[4];\n"
"\tint4 " I_ALPHA ";\n"
"\tfloat4 " I_TEXDIMS "[8];\n"
"\tint4 " I_ZBIAS "[2];\n"
"\tint4 " I_INDTEXSCALE "[2];\n"
"\tint4 " I_INDTEXMTX "[6];\n"
"\tint4 " I_FOGCOLOR ";\n"
"\tint4 " I_FOGI ";\n"
"\tfloat4 " I_FOGF "[2];\n"
"\tfloat4 " I_ZSLOPE ";\n"
"\tfloat4 " I_EFBSCALE ";\n"
"};\n");
if (per_pixel_lighting)
{
out.Write("%s", s_lighting_struct);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
else
out.Write("cbuffer VSBlock : register(b1) {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
}
if (uid_data->bounding_box)
{
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.Write("SSBO_BINDING(0) buffer BBox {\n"
"\tint4 bbox_data;\n"
"};\n");
}
else
{
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
}
}
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting, "");
out.Write("};\n");
if (uid_data->forced_early_z)
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
@ -549,7 +552,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
// Let's set up attributes
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(msaa, ssaa), i);
out.Write("%s in float3 tex%d;\n", GetInterpolationQualifier(msaa, ssaa), i);
}
out.Write("%s in float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
if (per_pixel_lighting)
@ -560,13 +563,6 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
}
out.Write("void main()\n{\n");
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
{
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
out.Write("\tfloat3 uv%d = tex%d;\n", i, i);
}
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
}
else // D3D
@ -582,7 +578,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
// compute window position if needed because binding semantic WPOS is not widely supported
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
out.Write(",\n in %s float3 uv%d : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa), i, i);
out.Write(",\n in %s float3 tex%d : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa), i,
i);
out.Write(",\n in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa),
uid_data->genMode_numtexgens);
if (per_pixel_lighting)
@ -645,7 +642,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
{
out.Write("\tint2 fixpoint_uv%d = int2(", i);
out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i);
out.Write("(tex%d.z == 0.0 ? tex%d.xy : tex%d.xy / tex%d.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[%d].zw);\n", i);
// TODO: S24 overflows here?
}
@ -824,7 +821,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const char* tevIndAlphaSel[] = {"", "x", "y", "z"};
const char* tevIndAlphaMask[] = {"248", "224", "240",
"248"}; // 0b11111000, 0b11100000, 0b11110000, 0b11111000
out.Write("alphabump = iindtex%d.%s & %s;\n", tevind.bt, tevIndAlphaSel[tevind.bs],
out.Write("alphabump = iindtex%d.%s & %s;\n", tevind.bt.Value(), tevIndAlphaSel[tevind.bs],
tevIndAlphaMask[tevind.fmt]);
}
else
@ -836,7 +833,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
{
// format
const char* tevIndFmtMask[] = {"255", "31", "15", "7"};
out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, tevind.bt, tevIndFmtMask[tevind.fmt]);
out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, tevind.bt.Value(),
tevIndFmtMask[tevind.fmt]);
// bias - TODO: Check if this needs to be this complicated..
const char* tevIndBiasField[] = {"", "x", "y", "xy",
@ -1166,11 +1164,6 @@ static void SampleTexture(ShaderCode& out, const char* texcoords, const char* te
"[%d].xy, %s))).%s;\n",
texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
else if (ApiType == APIType::Vulkan)
{
out.Write("iround(255.0 * texture(samp%d, float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n",
texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
}
else
{
out.Write("iround(255.0 * texture(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n",

View File

@ -159,4 +159,7 @@ typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data);
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
bool per_pixel_lighting, bool bounding_box);
ShaderCode GeneratePixelShaderCode(APIType ApiType, const pixel_shader_uid_data* uid_data);
PixelShaderUid GetPixelShaderUid();

View File

@ -15,6 +15,8 @@
bool PixelShaderManager::s_bFogRangeAdjustChanged;
bool PixelShaderManager::s_bViewPortChanged;
bool PixelShaderManager::s_bIndirectDirty;
bool PixelShaderManager::s_bDestAlphaDirty;
PixelShaderConstants PixelShaderManager::constants;
bool PixelShaderManager::dirty;
@ -40,6 +42,38 @@ void PixelShaderManager::Init()
SetTexCoordChanged(6);
SetTexCoordChanged(7);
// fixed Konstants
for (int component = 0; component < 4; component++)
{
constants.konst[0][component] = 255; // 1
constants.konst[1][component] = 223; // 7/8
constants.konst[2][component] = 191; // 3/4
constants.konst[3][component] = 159; // 5/8
constants.konst[4][component] = 128; // 1/2
constants.konst[5][component] = 96; // 3/8
constants.konst[6][component] = 64; // 1/4
constants.konst[7][component] = 32; // 1/8
// Invalid Konstants (reads as zero on hardware)
constants.konst[8][component] = 0;
constants.konst[9][component] = 0;
constants.konst[10][component] = 0;
constants.konst[11][component] = 0;
// Annoyingly, alpha reads zero values for the .rgb colors (offically
// defined as invalid)
// If it wasn't for this, we could just use one of the first 3 colunms
// instead of
// wasting an entire 4th column just for alpha.
if (component == 3)
{
constants.konst[12][component] = 0;
constants.konst[13][component] = 0;
constants.konst[14][component] = 0;
constants.konst[15][component] = 0;
}
}
dirty = true;
}
@ -99,6 +133,59 @@ void PixelShaderManager::SetConstants()
dirty = true;
s_bViewPortChanged = false;
}
if (s_bIndirectDirty)
{
for (int i = 0; i < 4; i++)
constants.pack1[i][3] = 0;
for (u32 i = 0; i < (bpmem.genMode.numtevstages + 1); ++i)
{
u32 stage = bpmem.tevind[i].bt;
if (stage < bpmem.genMode.numindstages)
{
// We set some extra bits so the ubershader can quickly check if these
// features are in use.
if (bpmem.tevind[i].IsActive())
constants.pack1[stage][3] =
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
// Note: a tevind of zero just happens to be a passthrough, so no need
// to set an extra bit.
constants.pack1[i][2] =
bpmem.tevind[i].hex; // TODO: This match shadergen, but videosw will
// always wrap.
// The ubershader uses tevind != 0 as a condition whether to calculate texcoords,
// even when texture is disabled, instead of the stage < bpmem.genMode.numindstages.
// We set an unused bit here to indicate that the stage is active, even if it
// is just a pass-through.
constants.pack1[i][2] |= 0x80000000;
}
else
{
constants.pack1[i][2] = 0;
}
}
dirty = true;
s_bIndirectDirty = false;
}
if (s_bDestAlphaDirty)
{
// Destination alpha is only enabled if alpha writes are enabled. Force entire uniform to zero
// when disabled.
u32 dstalpha = bpmem.blendmode.alphaupdate && bpmem.dstalpha.enable &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 ?
bpmem.dstalpha.hex :
0;
if (constants.dstalpha != dstalpha)
{
constants.dstalpha = dstalpha;
dirty = true;
}
}
}
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
@ -116,20 +203,78 @@ void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value)
c[component] = value;
dirty = true;
// Konst for ubershaders. We build the whole array on cpu so the gpu can do a single indirect
// access.
if (component != 3) // Alpha doesn't included in the .rgb konsts
constants.konst[index + 12][component] = value;
// .rrrr .gggg .bbbb .aaaa konsts
constants.konst[index + 16 + component * 4][0] = value;
constants.konst[index + 16 + component * 4][1] = value;
constants.konst[index + 16 + component * 4][2] = value;
constants.konst[index + 16 + component * 4][3] = value;
PRIM_LOG("tev konst color%d: %d %d %d %d", index, c[0], c[1], c[2], c[3]);
}
void PixelShaderManager::SetTevOrder(int index, u32 order)
{
if (constants.pack2[index][0] != order)
{
constants.pack2[index][0] = order;
dirty = true;
}
}
void PixelShaderManager::SetTevKSel(int index, u32 ksel)
{
if (constants.pack2[index][1] != ksel)
{
constants.pack2[index][1] = ksel;
dirty = true;
}
}
void PixelShaderManager::SetTevCombiner(int index, int alpha, u32 combiner)
{
if (constants.pack1[index][alpha] != combiner)
{
constants.pack1[index][alpha] = combiner;
dirty = true;
}
}
void PixelShaderManager::SetTevIndirectChanged()
{
s_bIndirectDirty = true;
}
void PixelShaderManager::SetAlpha()
{
constants.alpha[0] = bpmem.alpha_test.ref0;
constants.alpha[1] = bpmem.alpha_test.ref1;
constants.alpha[3] = static_cast<s32>(bpmem.dstalpha.alpha);
dirty = true;
}
void PixelShaderManager::SetDestAlpha()
void PixelShaderManager::SetAlphaTestChanged()
{
constants.alpha[3] = bpmem.dstalpha.alpha;
dirty = true;
// Force alphaTest Uniform to zero if it will always pass.
// (set an extra bit to distinguish from "never && never")
// TODO: we could optimize this further and check the actual constants,
// i.e. "a <= 0" and "a >= 255" will always pass.
u32 alpha_test =
bpmem.alpha_test.TestResult() != AlphaTest::PASS ? bpmem.alpha_test.hex | 1 << 31 : 0;
if (constants.alphaTest != alpha_test)
{
constants.alphaTest = alpha_test;
dirty = true;
}
}
void PixelShaderManager::SetDestAlphaChanged()
{
s_bDestAlphaDirty = true;
}
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height)
@ -235,6 +380,12 @@ void PixelShaderManager::SetZTextureTypeChanged()
dirty = true;
}
void PixelShaderManager::SetZTextureOpChanged()
{
constants.ztex_op = bpmem.ztex2.op;
dirty = true;
}
void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
{
TCoordInfo& tc = bpmem.texcoords[texmapid];
@ -262,6 +413,7 @@ void PixelShaderManager::SetFogParamChanged()
constants.fogi[1] = bpmem.fog.b_magnitude;
constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC();
constants.fogi[3] = bpmem.fog.b_shift;
constants.fogParam3 = bpmem.fog.c_proj_fsel.hex;
}
else
{
@ -269,6 +421,7 @@ void PixelShaderManager::SetFogParamChanged()
constants.fogi[1] = 1;
constants.fogf[1][2] = 0.f;
constants.fogi[3] = 1;
constants.fogParam3 = 0;
}
dirty = true;
}
@ -279,12 +432,68 @@ void PixelShaderManager::SetFogRangeAdjustChanged()
return;
s_bFogRangeAdjustChanged = true;
if (constants.fogRangeBase != bpmem.fogRange.Base.hex)
{
constants.fogRangeBase = bpmem.fogRange.Base.hex;
dirty = true;
}
}
void PixelShaderManager::SetGenModeChanged()
{
constants.genmode = bpmem.genMode.hex;
s_bIndirectDirty = true;
dirty = true;
}
void PixelShaderManager::SetZControlChanged()
{
u32 early_ztest = bpmem.zcontrol.early_ztest ? 1 : 0;
u32 rgba6_format =
(bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ? 1 :
0;
u32 dither = rgba6_format && bpmem.blendmode.dither;
if (constants.early_ztest != early_ztest || constants.rgba6_format != rgba6_format ||
constants.dither != dither)
{
constants.early_ztest = early_ztest;
constants.rgba6_format = rgba6_format;
constants.dither = dither;
dirty = true;
}
s_bDestAlphaDirty = true;
}
void PixelShaderManager::SetBlendModeChanged()
{
u32 dither = constants.rgba6_format && bpmem.blendmode.dither;
if (constants.dither != dither)
{
constants.dither = dither;
dirty = true;
}
s_bDestAlphaDirty = true;
}
void PixelShaderManager::SetBoundingBoxActive(bool active)
{
const bool enable =
active && g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation();
if (enable == (constants.bounding_box != 0))
return;
constants.bounding_box = active;
dirty = true;
}
void PixelShaderManager::DoState(PointerWrap& p)
{
p.Do(s_bFogRangeAdjustChanged);
p.Do(s_bViewPortChanged);
p.Do(s_bIndirectDirty);
p.Do(s_bDestAlphaDirty);
p.Do(constants);

View File

@ -24,24 +24,36 @@ public:
// so make sure to call them after memory is committed
static void SetTevColor(int index, int component, s32 value);
static void SetTevKonstColor(int index, int component, s32 value);
static void SetTevOrder(int index, u32 order);
static void SetTevKSel(int index, u32 ksel);
static void SetTevCombiner(int index, int alpha, u32 combiner);
static void SetAlpha();
static void SetDestAlpha();
static void SetAlphaTestChanged();
static void SetDestAlphaChanged();
static void SetTexDims(int texmapid, u32 width, u32 height);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged(float scalex, float scaley);
static void SetZSlope(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetTevIndirectChanged();
static void SetZTextureTypeChanged();
static void SetZTextureOpChanged();
static void SetIndTexScaleChanged(bool high);
static void SetTexCoordChanged(u8 texmapid);
static void SetFogColorChanged();
static void SetFogParamChanged();
static void SetFogRangeAdjustChanged();
static void SetGenModeChanged();
static void SetZControlChanged();
static void SetBlendModeChanged();
static void SetBoundingBoxActive(bool active);
static PixelShaderConstants constants;
static bool dirty;
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
static bool s_bIndirectDirty;
static bool s_bDestAlphaDirty;
};

View File

@ -29,6 +29,9 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.backend_atomics = g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics;
bits.backend_depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp;
bits.backend_reversed_depth_range = g_ActiveConfig.backend_info.bSupportsReversedDepthRange;
bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield;
bits.backend_dynamic_sampler_indexing =
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
return bits;
}
@ -65,7 +68,7 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
if (include_host_config)
{
// We're using 18 bits, so 5 hex characters.
// We're using 20 bits, so 5 hex characters.
ShaderHostConfig host_config = ShaderHostConfig::GetCurrent();
filename += StringFromFormat("-%05X", host_config.bits);
}

View File

@ -176,7 +176,9 @@ union ShaderHostConfig
u32 backend_atomics : 1;
u32 backend_depth_clamp : 1;
u32 backend_reversed_depth_range : 1;
u32 pad : 14;
u32 backend_bitfield : 1;
u32 backend_dynamic_sampler_indexing : 1;
u32 pad : 12;
};
static ShaderHostConfig GetCurrent();
@ -316,7 +318,10 @@ inline const char* GetInterpolationQualifier(bool msaa, bool ssaa,
#define I_LINEPTPARAMS "clinept"
#define I_TEXOFFSET "ctexoffset"
static const char s_shader_uniforms[] = "\tfloat4 " I_POSNORMALMATRIX "[6];\n"
static const char s_shader_uniforms[] = "\tuint components;\n"
"\tuint xfmem_dualTexInfo;\n"
"\tuint xfmem_numColorChans;\n"
"\tfloat4 " I_POSNORMALMATRIX "[6];\n"
"\tfloat4 " I_PROJECTION "[4];\n"
"\tint4 " I_MATERIALS "[4];\n"
"\tLight " I_LIGHTS "[8];\n"
@ -325,4 +330,9 @@ static const char s_shader_uniforms[] = "\tfloat4 " I_POSNORMALMATRIX "[6];\n"
"\tfloat4 " I_NORMALMATRICES "[32];\n"
"\tfloat4 " I_POSTTRANSFORMMATRICES "[64];\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
"\tfloat2 " I_VIEWPORT_SIZE ";\n";
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
"\tuint4 xfmem_pack1[8];\n"
"\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n"
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"
"\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n";

View File

@ -0,0 +1,203 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/UberShaderCommon.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
namespace UberShader
{
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}\n\n");
}
}
void WriteLightingFunction(ShaderCode& out)
{
// ==============================================
// Lighting channel calculation helper
// ==============================================
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
"float3 normal) {\n"
" float3 ldir, h, cosAttn, distAttn;\n"
" float dist, dist2, attn;\n"
"\n"
" switch (attnfunc) {\n");
out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = 1.0;\n"
" if (length(ldir) == 0.0)\n"
" ldir = normal;\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
"[index].dir.xyz)) : 0.0;\n"
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
" else\n"
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
"float3(1.0, attn, attn*attn));\n"
" break;\n\n");
out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
" dist2 = dot(ldir, ldir);\n"
" dist = sqrt(dist2);\n"
" ldir = ldir / dist;\n"
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
" break;\n\n");
out.Write(" default:\n"
" attn = 1.0;\n"
" ldir = normal;\n"
" break;\n"
" }\n"
"\n"
" switch (diffusefunc) {\n");
out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
"[index].color)));\n\n");
out.Write(" default:\n"
" return int4(0, 0, 0, 0);\n"
" }\n"
"}\n\n");
}
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
const char* normal_var, const char* in_color_0_var,
const char* in_color_1_var, const char* out_color_0_var,
const char* out_color_1_var)
{
out.Write("// Lighting\n");
out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n",
api_type == APIType::D3D ? "[loop] " : "");
out.Write(" uint colorreg = xfmem_color(chan);\n"
" uint alphareg = xfmem_alpha(chan);\n"
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
" int4 lacc = int4(255, 255, 255, 255);\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.xyz = int3(255, 255, 255);\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" mat.w = int(round(%s.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" mat.w = 255;\n"
" } else {\n"
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("colorreg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
in_color_0_var, in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.xyz = int3(255, 255, 255);\n"
" } else {\n"
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str());
out.Write(
" for (uint light_index = 0u; light_index < 8u; light_index++) {\n"
" if ((light_mask & (1u << light_index)) != 0u)\n"
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).xyz;\n",
world_pos_var, normal_var);
out.Write(" }\n"
" }\n"
"\n");
out.Write(" if (%s != 0u) {\n",
BitfieldExtract("alphareg", LitChannel().enablelighting).c_str());
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str());
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
in_color_1_var);
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
out.Write(" lacc.w = int(round(%s.w * 255.0));\n", in_color_0_var);
out.Write(" else\n"
" lacc.w = 255;\n"
" } else {\n"
" lacc.w = " I_MATERIALS " [chan].w;\n"
" }\n"
"\n");
out.Write(" uint light_mask = %s | (%s << 4u);\n",
BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(),
BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str());
out.Write(" uint attnfunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().attnfunc).c_str());
out.Write(" uint diffusefunc = %s;\n",
BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str());
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n"
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).w;\n",
world_pos_var, normal_var);
out.Write(" }\n"
" }\n"
"\n");
out.Write(" lacc = clamp(lacc, 0, 255);\n"
"\n"
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
" switch (chan) {\n"
" case 0u: %s = lit_color; break;\n",
out_color_0_var);
out.Write(" case 1u: %s = lit_color; break;\n", out_color_1_var);
out.Write(" }\n"
"}\n"
"\n");
out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1);
out.Write(" %s = %s;\n\n", out_color_1_var, out_color_0_var);
}
}

View File

@ -0,0 +1,30 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/VideoCommon.h"
namespace UberShader
{
// Common functions across all ubershaders
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);
// Vertex lighting
void WriteLightingFunction(ShaderCode& out);
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
const char* normal_var, const char* in_color_0_var,
const char* in_color_1_var, const char* out_color_0_var,
const char* out_color_1_var);
// bitfieldExtract generator for BitField types
template <typename T>
std::string BitfieldExtract(const std::string& source, T type)
{
return StringFromFormat("bitfieldExtract(%s, %u, %u)", source.c_str(),
static_cast<u32>(type.StartBit()), static_cast<u32>(type.NumBits()));
}
} // namespace UberShader

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "VideoCommon/PixelShaderGen.h"
namespace UberShader
{
#pragma pack(1)
struct pixel_ubershader_uid_data
{
u32 num_texgens : 4;
u32 early_depth : 1;
u32 per_pixel_depth : 1;
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
};
#pragma pack()
typedef ShaderUid<pixel_ubershader_uid_data> PixelShaderUid;
PixelShaderUid GetPixelShaderUid();
ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
const pixel_ubershader_uid_data* uid_data);
void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>& callback);
}

View File

@ -0,0 +1,467 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/UberShaderCommon.h"
#include "VideoCommon/VertexShaderGen.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
namespace UberShader
{
VertexShaderUid GetVertexShaderUid()
{
VertexShaderUid out;
vertex_ubershader_uid_data* uid_data = out.GetUidData<vertex_ubershader_uid_data>();
memset(uid_data, 0, sizeof(*uid_data));
uid_data->num_texgens = xfmem.numTexGen.numTexGens;
return out;
}
static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out);
ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
const vertex_ubershader_uid_data* uid_data)
{
const bool msaa = host_config.msaa;
const bool ssaa = host_config.ssaa;
const bool per_pixel_lighting = host_config.per_pixel_lighting;
const bool vertex_rounding = host_config.vertex_rounding;
const u32 numTexgen = uid_data->num_texgens;
ShaderCode out;
out.Write("// Vertex UberShader\n\n");
out.Write("%s", s_lighting_struct);
// uniforms
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
else
out.Write("cbuffer VSBlock {\n");
out.Write(s_shader_uniforms);
out.Write("};\n");
out.Write("struct VS_OUTPUT {\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, "");
out.Write("};\n\n");
WriteUberShaderCommonHeader(out, ApiType, host_config);
WriteLightingFunction(out);
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i, i);
// We need to always use output blocks for Vulkan, but geometry shaders are also optional.
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
{
out.Write("VARYING_LOCATION(0) out VertexData {\n");
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting,
GetInterpolationQualifier(msaa, ssaa, true, false));
out.Write("} vs;\n");
}
else
{
// Let's set up attributes
for (u32 i = 0; i < numTexgen; ++i)
out.Write("%s out float3 tex%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
if (per_pixel_lighting)
{
out.Write("%s out float3 Normal;\n", GetInterpolationQualifier(msaa, ssaa));
out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier(msaa, ssaa));
}
out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier(msaa, ssaa));
out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier(msaa, ssaa));
}
out.Write("void main()\n{\n");
}
else // D3D
{
out.Write("VS_OUTPUT main(\n");
// inputs
out.Write(" float3 rawnorm0 : NORMAL0,\n");
out.Write(" float3 rawnorm1 : NORMAL1,\n");
out.Write(" float3 rawnorm2 : NORMAL2,\n");
out.Write(" float4 rawcolor0 : COLOR0,\n");
out.Write(" float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
out.Write(" float3 rawtex%d : TEXCOORD%d,\n", i, i);
out.Write(" uint posmtx : BLENDINDICES,\n");
out.Write(" float4 rawpos : POSITION) {\n");
}
out.Write("VS_OUTPUT o;\n"
"\n");
// Transforms
out.Write("// Position matrix\n"
"float4 P0;\n"
"float4 P1;\n"
"float4 P2;\n"
"\n"
"// Normal matrix\n"
"float3 N0;\n"
"float3 N1;\n"
"float3 N2;\n"
"\n"
"if ((components & %uu) != 0u) {// VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
out.Write(" // Vertex format has a per-vertex matrix\n"
" int posidx = int(posmtx.r);\n"
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
" P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
"\n"
" int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
" N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
" N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
" N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
"} else {\n"
" // One shared matrix\n"
" P0 = " I_POSNORMALMATRIX "[0];\n"
" P1 = " I_POSNORMALMATRIX "[1];\n"
" P2 = " I_POSNORMALMATRIX "[2];\n"
" N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
" N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
" N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
"}\n"
"\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n"
"// Only the first normal gets normalized (TODO: why?)\n"
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM0\n",
VB_HAS_NRM0);
out.Write(
" _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
"\n"
"float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM1\n",
VB_HAS_NRM1);
out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
"\n"
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
"if ((components & %uu) != 0u) // VB_HAS_NRM2\n",
VB_HAS_NRM2);
out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
"\n");
// Hardware Lighting
WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
"o.colors_1");
// Texture Coordinates
if (numTexgen > 0)
GenVertexShaderTexGens(ApiType, numTexgen, out);
// clipPos/w needs to be done in pixel shader, not here
out.Write("o.clipPos = o.pos;\n");
if (per_pixel_lighting)
{
out.Write("o.Normal = _norm0;\n");
out.Write("o.WorldPos = pos.xyz;\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
out.Write(" o.colors_0 = rawcolor0;\n");
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1);
out.Write(" o.colors_1 = rawcolor1;\n");
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
// our own depth clipping and calculate the depth range before the perspective divide if
// necessary.
if (host_config.backend_depth_clamp)
{
// Since we're adjusting z for the depth range before the perspective divide, we have to do our
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n");
out.Write("o.clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w
out.Write("o.clipDist1 = -clipDepth;\n"); // Far: z > 0
}
// Write the true depth value. If the game uses depth textures, then the pixel shader will
// override it with the correct values if not then early z culling will improve speed.
// There are two different ways to do this, when the depth range is oversized, we process
// the depth range in the vertex shader, if not we let the host driver handle it.
//
// Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
// so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
// We have to handle the depth range in the vertex shader instead of after the perspective
// divide, because some games will use a depth range larger than what is allowed by the
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
// games effectively add a depth bias to the values written to the depth buffer.
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
if (!host_config.backend_clip_control)
{
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
// operation that can introduce a round-trip error.
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
}
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
// since the viewport height is already negated by the render backend.
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
// The console GPU places the pixel center at 7/12 in screen space unless
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
// in some primitives being placed one pixel too far to the bottom-right,
// which in turn can be critical if it happens for clear quads.
// Hence, we compensate for this pixel center difference so that primitives
// get rasterized correctly.
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
if (vertex_rounding)
{
// By now our position is in clip space. However, higher resolutions than the Wii outputs
// cause an additional pixel offset. Due to a higher pixel density we need to correct this
// by converting our clip-space position into the Wii's screen-space.
// Acquire the right pixel and then convert it back.
out.Write("if (o.pos.w == 1.0f)\n");
out.Write("{\n");
out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n");
out.Write("\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
out.Write("\tss_pixel_x = round(ss_pixel_x);\n");
out.Write("\tss_pixel_y = round(ss_pixel_y);\n");
out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n");
out.Write("\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n");
out.Write("}\n");
}
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
{
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
{
AssignVSOutputMembers(out, "vs", "o", numTexgen, per_pixel_lighting);
}
else
{
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (u32 i = 0; i < numTexgen; ++i)
out.Write("tex%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{
out.Write("Normal = o.Normal;\n");
out.Write("WorldPos = o.WorldPos;\n");
}
out.Write("colors_0 = o.colors_0;\n");
out.Write("colors_1 = o.colors_1;\n");
}
if (host_config.backend_depth_clamp)
{
out.Write("gl_ClipDistance[0] = o.clipDist0;\n");
out.Write("gl_ClipDistance[1] = o.clipDist1;\n");
}
// Vulkan NDC space has Y pointing down (right-handed NDC space).
if (ApiType == APIType::Vulkan)
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
else
out.Write("gl_Position = o.pos;\n");
}
else // D3D
{
out.Write("return o;\n");
}
out.Write("}\n");
return out;
}
void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out)
{
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying
// to dynamically index them.
for (u32 i = 0; i < numTexgen; i++)
out.Write("o.tex%u = float3(0.0, 0.0, 0.0);\n", i);
out.Write("// Texture coordinate generation\n");
if (numTexgen == 1)
out.Write("{ const uint texgen = 0u;\n");
else
out.Write("%sfor (uint texgen = 0u; texgen < %uu; texgen++) {\n",
ApiType == APIType::D3D ? "[loop] " : "", numTexgen);
out.Write(" // Texcoord transforms\n");
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
out.Write(" switch (%s) {\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow).c_str());
out.Write(" case %uu: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
out.Write(" coord.xyz = rawpos.xyz;\n");
out.Write(" break;\n\n");
out.Write(" case %uu: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
out.Write(
" coord.xyz = ((components & %uu /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
VB_HAS_NRM0);
out.Write(" break;\n\n");
out.Write(" case %uu: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
out.Write(
" coord.xyz = ((components & %uu /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
VB_HAS_NRM1);
out.Write(" break;\n\n");
out.Write(" case %uu: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
out.Write(
" coord.xyz = ((components & %uu /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
VB_HAS_NRM2);
out.Write(" break;\n\n");
for (u32 i = 0; i < 8; i++)
{
out.Write(" case %uu: // XF_SRCTEX%u_INROW\n", XF_SRCTEX0_INROW + i, i);
out.Write(
" coord = ((components & %uu /* VB_HAS_UV%u */) != 0u) ? float4(rawtex%u.x, rawtex%u.y, "
"1.0, 1.0) : coord;\n",
VB_HAS_UV0 << i, i, i, i);
out.Write(" break;\n\n");
}
out.Write(" }\n");
out.Write("\n");
out.Write(" // Input form of AB11 sets z element to 1.0\n");
out.Write(" if (%s == %uu) // inputform == XF_TEXINPUT_AB11\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform).c_str(), XF_TEXINPUT_AB11);
out.Write(" coord.z = 1.0f;\n");
out.Write("\n");
out.Write(" // first transformation\n");
out.Write(" uint texgentype = %s;\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype).c_str());
out.Write(" float3 output_tex;\n"
" switch (texgentype)\n"
" {\n");
out.Write(" case %uu: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
out.Write(" {\n");
out.Write(" uint light = %s;\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift).c_str());
out.Write(" uint source = %s;\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift).c_str());
out.Write(" switch (source) {\n");
for (u32 i = 0; i < numTexgen; i++)
out.Write(" case %uu: output_tex.xyz = o.tex%u; break;\n", i, i);
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
" }\n");
out.Write(" if ((components & %uu) != 0u) { // VB_HAS_NRM1 | VB_HAS_NRM2\n",
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
" }\n"
" }\n"
" break;\n\n");
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
" break;\n\n");
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
" break;\n\n");
out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
" {\n");
out.Write(" if ((components & (%uu /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {\n",
VB_HAS_TEXMTXIDX0);
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
" int tmp = 0;\n"
" switch (texgen) {\n");
for (u32 i = 0; i < numTexgen; i++)
out.Write(" case %uu: tmp = int(rawtex%u.z); break;\n", i, i);
out.Write(" }\n"
"\n");
out.Write(" if (%s == %uu) {\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
" } else {\n"
" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
" 1.0);\n"
" }\n"
" } else {\n");
out.Write(" if (%s == %uu) {\n",
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
" } else {\n"
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
" 1.0);\n"
" }\n"
" }\n"
" }\n"
" break;\n\n"
" }\n"
"\n");
out.Write(" if (xfmem_dualTexInfo != 0u) {\n");
out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
out.Write(" uint base_index = %s;\n",
BitfieldExtract("postMtxInfo", PostMtxInfo().index).c_str());
out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
"\n");
out.Write(" if (%s != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize).c_str());
out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
"\n"
" // multiply by postmatrix\n"
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
" dot(P1.xyz, output_tex.xyz) + P1.w,\n"
" dot(P2.xyz, output_tex.xyz) + P2.w);\n"
" }\n\n");
// When q is 0, the GameCube appears to have a special case
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
out.Write(" if (texgentype == %uu && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
XF_TEXGEN_REGULAR);
out.Write(
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
"\n");
out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
out.Write(" switch (texgen) {\n");
for (u32 i = 0; i < numTexgen; i++)
out.Write(" case %uu: o.tex%u = output_tex; break;\n", i, i);
out.Write(" }\n"
"}\n");
}
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)
{
VertexShaderUid uid;
std::memset(&uid, 0, sizeof(uid));
for (u32 texgens = 0; texgens <= 8; texgens++)
{
auto* vuid = uid.GetUidData<UberShader::vertex_ubershader_uid_data>();
vuid->num_texgens = texgens;
callback(uid);
}
}
}

View File

@ -0,0 +1,28 @@
// Copyright 2015 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include "VideoCommon/PixelShaderGen.h"
namespace UberShader
{
#pragma pack(1)
struct vertex_ubershader_uid_data
{
u32 num_texgens : 4;
u32 NumValues() const { return sizeof(vertex_ubershader_uid_data); }
};
#pragma pack()
typedef ShaderUid<vertex_ubershader_uid_data> VertexShaderUid;
VertexShaderUid GetVertexShaderUid();
ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config,
const vertex_ubershader_uid_data* uid_data);
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback);
}

View File

@ -44,13 +44,6 @@ static VertexLoaderMap s_vertex_loader_map;
u8* cached_arraybases[12];
// Used in the Vulkan backend
NativeVertexFormatMap* GetNativeVertexFormatMap()
{
return &s_native_vertex_map;
}
void Init()
{
MarkAllDirty();
@ -133,6 +126,75 @@ void MarkAllDirty()
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
}
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
{
auto iter = s_native_vertex_map.find(decl);
if (iter == s_native_vertex_map.end())
{
std::unique_ptr<NativeVertexFormat> fmt = g_vertex_manager->CreateNativeVertexFormat(decl);
auto ipair = s_native_vertex_map.emplace(decl, std::move(fmt));
iter = ipair.first;
}
return iter->second.get();
}
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
{
// The padding in the structs can cause the memcmp() in the map to create duplicates.
// Avoid this by initializing the padding to zero.
PortableVertexDeclaration new_decl;
std::memset(&new_decl, 0, sizeof(new_decl));
new_decl.stride = decl.stride;
auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) {
attr.type = type;
attr.components = components;
attr.offset = 0;
attr.enable = true;
attr.integer = integer;
};
auto CopyAttribute = [](AttributeFormat& attr, const AttributeFormat& src) {
attr.type = src.type;
attr.components = src.components;
attr.offset = src.offset;
attr.enable = src.enable;
attr.integer = src.integer;
};
if (decl.position.enable)
CopyAttribute(new_decl.position, decl.position);
else
MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false);
for (size_t i = 0; i < ArraySize(new_decl.normals); i++)
{
if (decl.normals[i].enable)
CopyAttribute(new_decl.normals[i], decl.normals[i]);
else
MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false);
}
for (size_t i = 0; i < ArraySize(new_decl.colors); i++)
{
if (decl.colors[i].enable)
CopyAttribute(new_decl.colors[i], decl.colors[i]);
else
MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false);
}
for (size_t i = 0; i < ArraySize(new_decl.texcoords); i++)
{
if (decl.texcoords[i].enable)
CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]);
else
MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false);
}
if (decl.posmtx.enable)
CopyAttribute(new_decl.posmtx, decl.posmtx);
else
MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true);
return GetOrCreateMatchingFormat(new_decl);
}
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
{
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
@ -208,6 +270,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
}
s_current_vtx_fmt = loader->m_native_vertex_format;
g_current_components = loader->m_native_components;
VertexShaderManager::SetVertexFormat(loader->m_native_components);
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence

View File

@ -24,7 +24,15 @@ void Clear();
void MarkAllDirty();
NativeVertexFormatMap* GetNativeVertexFormatMap();
// Creates or obtains a pointer to a VertexFormat representing decl.
// If this results in a VertexFormat being created, if the game later uses a matching vertex
// declaration, the one that was previously created will be used.
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl);
// For vertex ubershaders, all attributes need to be present, even when the vertex
// format does not contain them. This function returns a vertex format with dummy
// offsets set to the unused attributes.
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess);

View File

@ -193,22 +193,24 @@ void VertexManagerBase::Flush()
g_video_backend->CheckInvalidState();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d",
PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u",
g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
xfmem.dualTexTrans.enabled, bpmem.ztex2.op, (int)bpmem.blendmode.colorupdate,
(int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
xfmem.dualTexTrans.enabled, bpmem.ztex2.op.Value(), bpmem.blendmode.colorupdate.Value(),
bpmem.blendmode.alphaupdate.Value(), bpmem.zmode.updateenable.Value());
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
for (u32 i = 0; i < xfmem.numChan.numColorChans; ++i)
{
LitChannel* ch = &xfmem.color[i];
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
PRIM_LOG("colchan%u: matsrc=%u, light=0x%x, ambsrc=%u, diffunc=%u, attfunc=%u", i,
ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
ch->diffusefunc.Value(), ch->attnfunc.Value());
ch = &xfmem.alpha[i];
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
PRIM_LOG("alpchan%u: matsrc=%u, light=0x%x, ambsrc=%u, diffunc=%u, attfunc=%u", i,
ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
ch->diffusefunc.Value(), ch->attnfunc.Value());
}
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
for (u32 i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
@ -216,16 +218,17 @@ void VertexManagerBase::Flush()
if (tinfo.texgentype != XF_TEXGEN_REGULAR)
tinfo.projection = 0;
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, "
"postmtx=%d, postnorm=%d",
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow,
tinfo.embosssourceshift, tinfo.embosslightshift, xfmem.postMtxInfo[i].index,
xfmem.postMtxInfo[i].normalize);
PRIM_LOG("txgen%u: proj=%u, input=%u, gentype=%u, srcrow=%u, embsrc=%u, emblght=%u, "
"postmtx=%u, postnorm=%u",
i, tinfo.projection.Value(), tinfo.inputform.Value(), tinfo.texgentype.Value(),
tinfo.sourcerow.Value(), tinfo.embosssourceshift.Value(),
tinfo.embosslightshift.Value(), xfmem.postMtxInfo[i].index.Value(),
xfmem.postMtxInfo[i].normalize.Value());
}
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x",
(int)bpmem.genMode.numtevstages + 1, (int)bpmem.genMode.numindstages,
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable,
PRIM_LOG("pixel: tev=%u, ind=%u, texgen=%u, dstalpha=%u, alphatest=0x%x",
bpmem.genMode.numtevstages.Value() + 1, bpmem.genMode.numindstages.Value(),
bpmem.genMode.numtexgens.Value(), bpmem.dstalpha.enable.Value(),
(bpmem.alpha_test.hex >> 16) & 0xff);
#endif

View File

@ -114,16 +114,16 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
if (uid_data->components & VB_HAS_COL0)
out.Write("ATTRIBUTE_LOCATION(%d) in float4 color0;\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if (uid_data->components & VB_HAS_COL1)
out.Write("ATTRIBUTE_LOCATION(%d) in float4 color1;\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
{
out.Write("ATTRIBUTE_LOCATION(%d) in float%d tex%d;\n", SHADER_TEXTURE0_ATTRIB + i,
out.Write("ATTRIBUTE_LOCATION(%d) in float%d rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i,
hastexmtx ? 3 : 2, i);
}
}
@ -143,7 +143,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
if (i < uid_data->numTexGens)
{
out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
out.Write("%s out float3 tex%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
}
}
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
@ -170,14 +170,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if (uid_data->components & VB_HAS_NRM2)
out.Write(" float3 rawnorm2 : NORMAL2,\n");
if (uid_data->components & VB_HAS_COL0)
out.Write(" float4 color0 : COLOR0,\n");
out.Write(" float4 rawcolor0 : COLOR0,\n");
if (uid_data->components & VB_HAS_COL1)
out.Write(" float4 color1 : COLOR1,\n");
out.Write(" float4 rawcolor1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
out.Write(" float%d rawtex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
}
if (uid_data->components & VB_HAS_POSMTXIDX)
out.Write(" uint4 posmtx : BLENDINDICES,\n");
@ -242,18 +242,18 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
if (uid_data->numColorChans == 0)
{
if (uid_data->components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
else
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, uid_data->numColorChans,
"color", "o.colors_");
"rawcolor", "o.colors_");
if (uid_data->numColorChans < 2)
{
if (uid_data->components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
else
out.Write("o.colors_1 = o.colors_0;\n");
}
@ -296,7 +296,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
out.Write("coord = float4(rawtex%d.x, rawtex%d.y, 1.0, 1.0);\n",
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
@ -338,7 +338,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
default:
if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i))
{
out.Write("int tmp = int(tex%d.z);\n", i);
out.Write("int tmp = int(rawtex%d.z);\n", i);
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
@ -407,10 +407,10 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("o.WorldPos = pos.xyz;\n");
if (uid_data->components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
out.Write("o.colors_0 = rawcolor0;\n");
if (uid_data->components & VB_HAS_COL1)
out.Write("o.colors_1 = color1;\n");
out.Write("o.colors_1 = rawcolor1;\n");
}
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
@ -495,7 +495,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
// TODO: Pass interface blocks between shader stages even if geometry shaders
// are not supported, however that will require at least OpenGL 3.2 support.
for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
out.Write("uv%d.xyz = o.tex%d;\n", i, i);
out.Write("tex%d.xyz = o.tex%d;\n", i, i);
out.Write("clipPos = o.clipPos;\n");
if (per_pixel_lighting)
{

View File

@ -30,6 +30,7 @@ alignas(16) static float g_fProjectionMatrix[16];
// track changes
static bool bTexMatricesChanged[2], bPosNormalMatrixChanged, bProjectionChanged, bViewportChanged;
static bool bTexMtxInfoChanged, bLightingConfigChanged;
static BitSet32 nMaterialsChanged;
static int nTransformMatricesChanged[2]; // min,max
static int nNormalMatricesChanged[2]; // min,max
@ -193,8 +194,10 @@ void VertexShaderManager::Init()
bPosNormalMatrixChanged = false;
bProjectionChanged = true;
bViewportChanged = false;
bTexMtxInfoChanged = false;
bLightingConfigChanged = false;
xfmem = {};
std::memset(&xfmem, 0, sizeof(xfmem));
constants = {};
ResetView();
@ -561,6 +564,32 @@ void VertexShaderManager::SetConstants()
dirty = true;
}
if (bTexMtxInfoChanged)
{
bTexMtxInfoChanged = false;
constants.xfmem_dualTexInfo = xfmem.dualTexTrans.enabled;
for (size_t i = 0; i < ArraySize(xfmem.texMtxInfo); i++)
constants.xfmem_pack1[i][0] = xfmem.texMtxInfo[i].hex;
for (size_t i = 0; i < ArraySize(xfmem.postMtxInfo); i++)
constants.xfmem_pack1[i][1] = xfmem.postMtxInfo[i].hex;
dirty = true;
}
if (bLightingConfigChanged)
{
bLightingConfigChanged = false;
for (size_t i = 0; i < 2; i++)
{
constants.xfmem_pack1[i][2] = xfmem.color[i].hex;
constants.xfmem_pack1[i][3] = xfmem.alpha[i].hex;
}
constants.xfmem_numColorChans = xfmem.numChan.numColorChans;
dirty = true;
}
}
void VertexShaderManager::InvalidateXFRange(int start, int end)
@ -758,6 +787,27 @@ void VertexShaderManager::ResetView()
bProjectionChanged = true;
}
void VertexShaderManager::SetVertexFormat(u32 components)
{
if (components != constants.components)
{
constants.components = components;
dirty = true;
}
}
void VertexShaderManager::SetTexMatrixInfoChanged(int index)
{
// TODO: Should we track this with more precision, like which indices changed?
// The whole vertex constants are probably going to be uploaded regardless.
bTexMtxInfoChanged = true;
}
void VertexShaderManager::SetLightingConfigChanged()
{
bLightingConfigChanged = true;
}
void VertexShaderManager::TransformToClipSpace(const float* data, float* out, u32 MtxIdx)
{
const float* world_matrix = &xfmem.posMatrices[(MtxIdx & 0x3f) * 4];
@ -800,6 +850,8 @@ void VertexShaderManager::DoState(PointerWrap& p)
p.Do(bPosNormalMatrixChanged);
p.Do(bProjectionChanged);
p.Do(bViewportChanged);
p.Do(bTexMtxInfoChanged);
p.Do(bLightingConfigChanged);
p.Do(constants);

View File

@ -36,6 +36,10 @@ public:
static void RotateView(float x, float y);
static void ResetView();
static void SetVertexFormat(u32 components);
static void SetTexMatrixInfoChanged(int index);
static void SetLightingConfigChanged();
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
// out: 4 floats which will be initialized with the corresponding clip space coordinates
// NOTE: g_fProjectionMatrix must be up to date when this is called

View File

@ -38,6 +38,7 @@
<ItemGroup>
<ClCompile Include="AbstractTexture.cpp" />
<ClCompile Include="AsyncRequests.cpp" />
<ClCompile Include="AsyncShaderCompiler.cpp" />
<ClCompile Include="AVIDump.cpp" />
<ClCompile Include="BoundingBox.cpp" />
<ClCompile Include="BPFunctions.cpp" />
@ -66,12 +67,15 @@
<ClCompile Include="RenderState.cpp" />
<ClCompile Include="LightingShaderGen.cpp" />
<ClCompile Include="ShaderGenCommon.cpp" />
<ClCompile Include="UberShaderCommon.cpp" />
<ClCompile Include="UberShaderPixel.cpp" />
<ClCompile Include="Statistics.cpp" />
<ClCompile Include="GeometryShaderGen.cpp" />
<ClCompile Include="GeometryShaderManager.cpp" />
<ClCompile Include="TextureCacheBase.cpp" />
<ClCompile Include="TextureConfig.cpp" />
<ClCompile Include="TextureConversionShader.cpp" />
<ClCompile Include="UberShaderVertex.cpp" />
<ClCompile Include="VertexLoader.cpp" />
<ClCompile Include="VertexLoaderBase.cpp" />
<ClCompile Include="VertexLoaderX64.cpp" />
@ -94,6 +98,7 @@
<ItemGroup>
<ClInclude Include="AbstractTexture.h" />
<ClInclude Include="AsyncRequests.h" />
<ClInclude Include="AsyncShaderCompiler.h" />
<ClInclude Include="AVIDump.h" />
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="BPFunctions.h" />
@ -107,6 +112,8 @@
<ClInclude Include="Fifo.h" />
<ClInclude Include="FPSCounter.h" />
<ClInclude Include="FramebufferManagerBase.h" />
<ClInclude Include="UberShaderCommon.h" />
<ClInclude Include="UberShaderPixel.h" />
<ClInclude Include="HiresTextures.h" />
<ClInclude Include="ImageWrite.h" />
<ClInclude Include="IndexGenerator.h" />
@ -131,6 +138,7 @@
<ClInclude Include="TextureConfig.h" />
<ClInclude Include="TextureConversionShader.h" />
<ClInclude Include="TextureDecoder.h" />
<ClInclude Include="UberShaderVertex.h" />
<ClInclude Include="VertexLoader.h" />
<ClInclude Include="VertexLoaderBase.h" />
<ClInclude Include="VertexLoaderManager.h" />
@ -172,4 +180,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -176,6 +176,18 @@
<ClCompile Include="ShaderGenCommon.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="AsyncShaderCompiler.cpp">
<Filter>Util</Filter>
</ClCompile>
<ClCompile Include="UberShaderPixel.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="UberShaderCommon.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="UberShaderVertex.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="CommandProcessor.h" />
@ -332,8 +344,20 @@
<ClInclude Include="AbstractTexture.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="AsyncShaderCompiler.h">
<Filter>Util</Filter>
</ClInclude>
<ClInclude Include="UberShaderPixel.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="UberShaderCommon.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="UberShaderVertex.h">
<Filter>Shader Generators</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>

View File

@ -4,6 +4,7 @@
#include <algorithm>
#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Core/Config/GraphicsSettings.h"
@ -93,6 +94,13 @@ void VideoConfig::Refresh()
bBackendMultithreading = Config::Get(Config::GFX_BACKEND_MULTITHREADING);
iCommandBufferExecuteInterval = Config::Get(Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL);
bShaderCache = Config::Get(Config::GFX_SHADER_CACHE);
bBackgroundShaderCompiling = Config::Get(Config::GFX_BACKGROUND_SHADER_COMPILING);
bDisableSpecializedShaders = Config::Get(Config::GFX_DISABLE_SPECIALIZED_SHADERS);
bPrecompileUberShaders = Config::Get(Config::GFX_PRECOMPILE_UBER_SHADERS);
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
bForceVertexUberShaders = Config::Get(Config::GFX_FORCE_VERTEX_UBER_SHADERS);
bForcePixelUberShaders = Config::Get(Config::GFX_FORCE_PIXEL_UBER_SHADERS);
bZComploc = Config::Get(Config::GFX_SW_ZCOMPLOC);
bZFreeze = Config::Get(Config::GFX_SW_ZFREEZE);
@ -188,3 +196,37 @@ bool VideoConfig::IsVSync()
{
return bVSync && !Core::GetIsThrottlerTempDisabled();
}
static u32 GetNumAutoShaderCompilerThreads()
{
// Automatic number. We use clamp(cpus - 3, 1, 4).
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
}
u32 VideoConfig::GetShaderCompilerThreads() const
{
if (iShaderCompilerThreads >= 0)
return static_cast<u32>(iShaderCompilerThreads);
else
return GetNumAutoShaderCompilerThreads();
}
u32 VideoConfig::GetShaderPrecompilerThreads() const
{
if (iShaderPrecompilerThreads >= 0)
return static_cast<u32>(iShaderPrecompilerThreads);
else
return GetNumAutoShaderCompilerThreads();
}
bool VideoConfig::CanPrecompileUberShaders() const
{
// We don't want to precompile ubershaders if they're never going to be used.
return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders);
}
bool VideoConfig::CanBackgroundCompileShaders() const
{
// We require precompiled ubershaders to background compile shaders.
return bBackgroundShaderCompiling && bPrecompileUberShaders;
}

View File

@ -168,6 +168,36 @@ struct VideoConfig final
// Currently only supported with Vulkan.
int iCommandBufferExecuteInterval;
// The following options determine the ubershader mode:
// No ubershaders:
// - bBackgroundShaderCompiling = false
// - bDisableSpecializedShaders = false
// Hybrid/background compiling:
// - bBackgroundShaderCompiling = true
// - bDisableSpecializedShaders = false
// Ubershaders only:
// - bBackgroundShaderCompiling = false
// - bDisableSpecializedShaders = true
// Enable background shader compiling, use ubershaders while waiting.
bool bBackgroundShaderCompiling;
// Use ubershaders only, don't compile specialized shaders.
bool bDisableSpecializedShaders;
// Precompile ubershader variants at boot/config reload time.
bool bPrecompileUberShaders;
// Number of shader compiler threads.
// 0 disables background compilation.
// -1 uses an automatic number based on the CPU threads.
int iShaderCompilerThreads;
int iShaderPrecompilerThreads;
// Temporary toggling of ubershaders, for debugging
bool bForceVertexUberShaders;
bool bForcePixelUberShaders;
// Static config per API
// TODO: Move this out of VideoConfig
struct
@ -204,6 +234,8 @@ struct VideoConfig final
bool bSupportsInternalResolutionFrameDumps;
bool bSupportsGPUTextureDecoding;
bool bSupportsST3CTextures;
bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
} backend_info;
// Utility
@ -224,6 +256,10 @@ struct VideoConfig final
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
}
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; }
u32 GetShaderCompilerThreads() const;
u32 GetShaderPrecompilerThreads() const;
bool CanPrecompileUberShaders() const;
bool CanBackgroundCompileShaders() const;
};
extern VideoConfig g_Config;

View File

@ -4,6 +4,7 @@
#pragma once
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
@ -132,27 +133,15 @@ enum
union LitChannel
{
struct
{
u32 matsource : 1;
u32 enablelighting : 1;
u32 lightMask0_3 : 4;
u32 ambsource : 1;
u32 diffusefunc : 2; // LIGHTDIF_X
u32 attnfunc : 2; // LIGHTATTN_X
u32 lightMask4_7 : 4;
};
struct
{
u32 hex : 15;
u32 unused : 17;
};
struct
{
u32 dummy0 : 7;
u32 lightparams : 4;
u32 dummy1 : 21;
};
BitField<0, 1, u32> matsource;
BitField<1, 1, u32> enablelighting;
BitField<2, 4, u32> lightMask0_3;
BitField<6, 1, u32> ambsource;
BitField<7, 2, u32> diffusefunc; // LIGHTDIF_X
BitField<9, 2, u32> attnfunc; // LIGHTATTN_X
BitField<11, 4, u32> lightMask4_7;
u32 hex;
unsigned int GetFullLightMask() const
{
return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0;
@ -173,28 +162,22 @@ union INVTXSPEC
union TexMtxInfo
{
struct
{
u32 unknown : 1;
u32 projection : 1; // XF_TEXPROJ_X
u32 inputform : 1; // XF_TEXINPUT_X
u32 unknown2 : 1;
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
};
BitField<0, 1, u32> unknown; //
BitField<1, 1, u32> projection; // XF_TEXPROJ_X
BitField<2, 1, u32> inputform; // XF_TEXINPUT_X
BitField<3, 1, u32> unknown2; //
BitField<4, 3, u32> texgentype; // XF_TEXGEN_X
BitField<7, 5, u32> sourcerow; // XF_SRCGEOM_X
BitField<12, 3, u32> embosssourceshift; // what generated texcoord to use
BitField<15, 3, u32> embosslightshift; // light index that is used
u32 hex;
};
union PostMtxInfo
{
struct
{
u32 index : 6; // base row of dual transform matrix
u32 unused : 2;
u32 normalize : 1; // normalize before send operation
};
BitField<0, 6, u32> index; // base row of dual transform matrix
BitField<6, 2, u32> unused; //
BitField<8, 1, u32> normalize; // normalize before send operation
u32 hex;
};

View File

@ -56,6 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETNUMCHAN:
if (xfmem.numChan.numColorChans != (newValue & 3))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;
case XFMEM_SETCHAN0_AMBCOLOR: // Channel Ambient Color
@ -88,11 +89,13 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETCHAN1_ALPHA:
if (((u32*)&xfmem)[address] != (newValue & 0x7fff))
g_vertex_manager->Flush();
VertexShaderManager::SetLightingConfigChanged();
break;
case XFMEM_DUALTEX:
if (xfmem.dualTexTrans.enabled != (newValue & 1))
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(-1);
break;
case XFMEM_SETMATRIXINDA:
@ -146,6 +149,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETTEXMTXINFO + 6:
case XFMEM_SETTEXMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO);
nextAddress = XFMEM_SETTEXMTXINFO + 8;
break;
@ -159,6 +163,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
case XFMEM_SETPOSMTXINFO + 6:
case XFMEM_SETPOSMTXINFO + 7:
g_vertex_manager->Flush();
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSMTXINFO);
nextAddress = XFMEM_SETPOSMTXINFO + 8;
break;