commit
ba57605266
|
@ -274,6 +274,10 @@ public final class SettingsAdapter extends RecyclerView.Adapter<SettingViewHolde
|
|||
{
|
||||
putXfbSetting(which);
|
||||
}
|
||||
else if (scSetting.getKey().equals(SettingsFile.KEY_UBERSHADER_MODE))
|
||||
{
|
||||
putUberShaderModeSetting(which);
|
||||
}
|
||||
else if (scSetting.getKey().equals(SettingsFile.KEY_WIIMOTE_EXTENSION))
|
||||
{
|
||||
putExtensionSetting(which, Character.getNumericValue(scSetting.getSection().charAt(scSetting.getSection().length() - 1)));
|
||||
|
@ -437,6 +441,33 @@ public final class SettingsAdapter extends RecyclerView.Adapter<SettingViewHolde
|
|||
mView.putSetting(xfbReal);
|
||||
}
|
||||
|
||||
public void putUberShaderModeSetting(int which)
|
||||
{
|
||||
BooleanSetting disableSpecializedShaders = null;
|
||||
BooleanSetting backgroundShaderCompilation = null;
|
||||
|
||||
switch (which)
|
||||
{
|
||||
case 0:
|
||||
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
|
||||
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
|
||||
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, false);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
disableSpecializedShaders = new BooleanSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
|
||||
backgroundShaderCompilation = new BooleanSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, true);
|
||||
break;
|
||||
}
|
||||
|
||||
mView.putSetting(disableSpecializedShaders);
|
||||
mView.putSetting(backgroundShaderCompilation);
|
||||
}
|
||||
|
||||
public void putExtensionSetting(int which, int wiimoteNumber)
|
||||
{
|
||||
StringSetting extension = new StringSetting(SettingsFile.KEY_WIIMOTE_EXTENSION, SettingsFile.SECTION_WIIMOTE + wiimoteNumber,
|
||||
|
|
|
@ -276,6 +276,8 @@ public final class SettingsFragmentPresenter
|
|||
|
||||
private void addEnhanceSettings(ArrayList<SettingsItem> sl)
|
||||
{
|
||||
int uberShaderModeValue = getUberShaderModeValue();
|
||||
|
||||
Setting resolution = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_INTERNAL_RES);
|
||||
Setting fsaa = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_FSAA);
|
||||
Setting anisotropic = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_ENHANCEMENTS).getSetting(SettingsFile.KEY_ANISOTROPY);
|
||||
|
@ -283,6 +285,7 @@ public final class SettingsFragmentPresenter
|
|||
Setting perPixel = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_PER_PIXEL);
|
||||
Setting forceFilter = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_ENHANCEMENTS).getSetting(SettingsFile.KEY_FORCE_FILTERING);
|
||||
Setting disableFog = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_FOG);
|
||||
IntSetting uberShaderMode = new IntSetting(SettingsFile.KEY_UBERSHADER_MODE, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, uberShaderModeValue);
|
||||
|
||||
sl.add(new SingleChoiceSetting(SettingsFile.KEY_INTERNAL_RES, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.internal_resolution, R.string.internal_resolution_descrip, R.array.internalResolutionEntries, R.array.internalResolutionValues, 0, resolution));
|
||||
sl.add(new SingleChoiceSetting(SettingsFile.KEY_FSAA, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.FSAA, R.string.FSAA_descrip, R.array.FSAAEntries, R.array.FSAAValues, 0, fsaa));
|
||||
|
@ -296,6 +299,7 @@ public final class SettingsFragmentPresenter
|
|||
sl.add(new CheckBoxSetting(SettingsFile.KEY_PER_PIXEL, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.per_pixel_lighting, R.string.per_pixel_lighting_descrip, false, perPixel));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_FORCE_FILTERING, SettingsFile.SECTION_GFX_ENHANCEMENTS, SettingsFile.SETTINGS_GFX, R.string.force_texture_filtering, R.string.force_texture_filtering_descrip, false, forceFilter));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_DISABLE_FOG, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.disable_fog, R.string.disable_fog_descrip, false, disableFog));
|
||||
sl.add(new SingleChoiceSetting(SettingsFile.KEY_UBERSHADER_MODE, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.ubershader_mode, R.string.ubershader_mode_descrip, R.array.uberShaderModeEntries, R.array.uberShaderModeValues, 0, uberShaderMode));
|
||||
|
||||
/*
|
||||
Check if we support stereo
|
||||
|
@ -903,6 +907,29 @@ public final class SettingsFragmentPresenter
|
|||
return xfbValue;
|
||||
}
|
||||
|
||||
private int getUberShaderModeValue()
|
||||
{
|
||||
int uberShaderModeValue = 0;
|
||||
|
||||
try
|
||||
{
|
||||
boolean backgroundShaderCompiling = ((BooleanSetting) mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_BACKGROUND_SHADER_COMPILING)).getValue();
|
||||
boolean disableSpecializedShaders = ((BooleanSetting) mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_SPECIALIZED_SHADERS)).getValue();
|
||||
|
||||
if (disableSpecializedShaders)
|
||||
uberShaderModeValue = 2; // Exclusive
|
||||
else if (backgroundShaderCompiling)
|
||||
uberShaderModeValue = 1; // Hybrid
|
||||
else
|
||||
uberShaderModeValue = 0; // Disabled
|
||||
}
|
||||
catch (NullPointerException ex)
|
||||
{
|
||||
}
|
||||
|
||||
return uberShaderModeValue;
|
||||
}
|
||||
|
||||
private int getExtensionValue(int wiimoteNumber)
|
||||
{
|
||||
int extensionValue;
|
||||
|
|
|
@ -79,6 +79,9 @@ public final class SettingsFile
|
|||
public static final String KEY_XFB_REAL = "UseRealXFB";
|
||||
public static final String KEY_FAST_DEPTH = "FastDepthCalc";
|
||||
public static final String KEY_ASPECT_RATIO = "AspectRatio";
|
||||
public static final String KEY_UBERSHADER_MODE = "UberShaderMode";
|
||||
public static final String KEY_DISABLE_SPECIALIZED_SHADERS = "DisableSpecializedShaders";
|
||||
public static final String KEY_BACKGROUND_SHADER_COMPILING = "BackgroundShaderCompiling";
|
||||
|
||||
public static final String KEY_GCPAD_TYPE = "SIDevice";
|
||||
|
||||
|
|
|
@ -89,6 +89,18 @@
|
|||
<item>2</item>
|
||||
</integer-array>
|
||||
|
||||
<!-- Ubershader Mode Preference -->
|
||||
<string-array name="uberShaderModeEntries" translatable="false">
|
||||
<item>Disabled</item>
|
||||
<item>Hybrid</item>
|
||||
<item>Exclusive</item>
|
||||
</string-array>
|
||||
<integer-array name="uberShaderModeValues" translatable="false">
|
||||
<item>0</item>
|
||||
<item>1</item>
|
||||
<item>2</item>
|
||||
</integer-array>
|
||||
|
||||
<!-- Internal Resolution Preference -->
|
||||
<string-array name="internalResolutionEntries" translatable="false">
|
||||
<item>1x Native (640x528)</item>
|
||||
|
|
|
@ -179,6 +179,8 @@
|
|||
<string name="fast_depth_calculation_descrip">Uses a less accurate algorithm to calculate depth values.</string>
|
||||
<string name="aspect_ratio">Aspect Ratio</string>
|
||||
<string name="aspect_ratio_descrip">Select what aspect ratio to use when rendering</string>
|
||||
<string name="ubershader_mode">Ubershader Mode</string>
|
||||
<string name="ubershader_mode_descrip">Specifies when to use Ubershaders. Disabled - Never, Hybrid - Use ubershaders while compiling specialized shaders. Exclusive - Use only ubershaders, largest performance impact.</string>
|
||||
|
||||
<!-- Miscellaneous -->
|
||||
<string name="yes">Yes</string>
|
||||
|
|
|
@ -77,6 +77,20 @@ const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING{
|
|||
const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL{
|
||||
{System::GFX, "Settings", "CommandBufferExecuteInterval"}, 100};
|
||||
const ConfigInfo<bool> GFX_SHADER_CACHE{{System::GFX, "Settings", "ShaderCache"}, true};
|
||||
const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING{
|
||||
{System::GFX, "Settings", "BackgroundShaderCompiling"}, false};
|
||||
const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS{
|
||||
{System::GFX, "Settings", "DisableSpecializedShaders"}, false};
|
||||
const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS{
|
||||
{System::GFX, "Settings", "PrecompileUberShaders"}, true};
|
||||
const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS{
|
||||
{System::GFX, "Settings", "ShaderCompilerThreads"}, 1};
|
||||
const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS{
|
||||
{System::GFX, "Settings", "ShaderPrecompilerThreads"}, 1};
|
||||
const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS{
|
||||
{System::GFX, "Settings", "ForceVertexUberShaders"}, false};
|
||||
const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS{
|
||||
{System::GFX, "Settings", "ForcePixelUberShaders"}, false};
|
||||
|
||||
const ConfigInfo<bool> GFX_SW_ZCOMPLOC{{System::GFX, "Settings", "SWZComploc"}, true};
|
||||
const ConfigInfo<bool> GFX_SW_ZFREEZE{{System::GFX, "Settings", "SWZFreeze"}, true};
|
||||
|
|
|
@ -59,6 +59,13 @@ extern const ConfigInfo<bool> GFX_ENABLE_VALIDATION_LAYER;
|
|||
extern const ConfigInfo<bool> GFX_BACKEND_MULTITHREADING;
|
||||
extern const ConfigInfo<int> GFX_COMMAND_BUFFER_EXECUTE_INTERVAL;
|
||||
extern const ConfigInfo<bool> GFX_SHADER_CACHE;
|
||||
extern const ConfigInfo<bool> GFX_BACKGROUND_SHADER_COMPILING;
|
||||
extern const ConfigInfo<bool> GFX_DISABLE_SPECIALIZED_SHADERS;
|
||||
extern const ConfigInfo<bool> GFX_PRECOMPILE_UBER_SHADERS;
|
||||
extern const ConfigInfo<int> GFX_SHADER_COMPILER_THREADS;
|
||||
extern const ConfigInfo<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||
extern const ConfigInfo<bool> GFX_FORCE_VERTEX_UBER_SHADERS;
|
||||
extern const ConfigInfo<bool> GFX_FORCE_PIXEL_UBER_SHADERS;
|
||||
|
||||
extern const ConfigInfo<bool> GFX_SW_ZCOMPLOC;
|
||||
extern const ConfigInfo<bool> GFX_SW_ZFREEZE;
|
||||
|
|
|
@ -43,6 +43,11 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location)
|
|||
Config::GFX_DISABLE_FOG.location, Config::GFX_BORDERLESS_FULLSCREEN.location,
|
||||
Config::GFX_ENABLE_VALIDATION_LAYER.location, Config::GFX_BACKEND_MULTITHREADING.location,
|
||||
Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL.location, Config::GFX_SHADER_CACHE.location,
|
||||
Config::GFX_BACKGROUND_SHADER_COMPILING.location,
|
||||
Config::GFX_DISABLE_SPECIALIZED_SHADERS.location,
|
||||
Config::GFX_PRECOMPILE_UBER_SHADERS.location, Config::GFX_SHADER_COMPILER_THREADS.location,
|
||||
Config::GFX_SHADER_PRECOMPILER_THREADS.location,
|
||||
Config::GFX_FORCE_VERTEX_UBER_SHADERS.location, Config::GFX_FORCE_PIXEL_UBER_SHADERS.location,
|
||||
|
||||
Config::GFX_SW_ZCOMPLOC.location, Config::GFX_SW_ZFREEZE.location,
|
||||
Config::GFX_SW_DUMP_OBJECTS.location, Config::GFX_SW_DUMP_TEV_STAGES.location,
|
||||
|
|
|
@ -341,6 +341,7 @@ static void CpuThread()
|
|||
{
|
||||
Common::SetCurrentThreadName("CPU-GPU thread");
|
||||
g_video_backend->Video_Prepare();
|
||||
Host_Message(WM_USER_CREATE);
|
||||
}
|
||||
|
||||
// This needs to be delayed until after the video backend is ready.
|
||||
|
@ -409,6 +410,7 @@ static void FifoPlayerThread()
|
|||
else
|
||||
{
|
||||
g_video_backend->Video_Prepare();
|
||||
Host_Message(WM_USER_CREATE);
|
||||
Common::SetCurrentThreadName("FIFO-GPU thread");
|
||||
}
|
||||
|
||||
|
@ -601,6 +603,7 @@ static void EmuThread(std::unique_ptr<BootParameters> boot)
|
|||
Common::SetCurrentThreadName("Video thread");
|
||||
|
||||
g_video_backend->Video_Prepare();
|
||||
Host_Message(WM_USER_CREATE);
|
||||
|
||||
// Spawn the CPU thread
|
||||
s_cpu_thread = std::thread(cpuThreadFunc);
|
||||
|
|
|
@ -308,6 +308,14 @@ static wxString gpu_texture_decoding_desc =
|
|||
wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
|
||||
"performance gains in some scenarios, or on systems where the CPU is the "
|
||||
"bottleneck.\n\nIf unsure, leave this unchecked.");
|
||||
static wxString ubershader_desc =
|
||||
wxTRANSLATE("Disabled: Ubershaders are never used. Stuttering will occur during shader "
|
||||
"compilation, but GPU demands are low. Recommended for low-end hardware.\n\n"
|
||||
"Hybrid: Ubershaders will be used to prevent stuttering during shader "
|
||||
"compilation, but traditional shaders will be used when they will not cause "
|
||||
"stuttering. Balances performance and smoothness.\n\n"
|
||||
"Exclusive: Ubershaders will always be used. Only recommended for high-end "
|
||||
"systems.");
|
||||
|
||||
VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
|
||||
: wxDialog(parent, wxID_ANY, wxString::Format(_("Dolphin %s Graphics Configuration"),
|
||||
|
@ -561,6 +569,29 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
|
|||
row += 1;
|
||||
}
|
||||
|
||||
// ubershaders
|
||||
{
|
||||
const std::array<wxString, 3> mode_choices = {{_("Disabled"), _("Hybrid"), _("Exclusive")}};
|
||||
|
||||
wxChoice* const choice_mode =
|
||||
new wxChoice(page_enh, wxID_ANY, wxDefaultPosition, wxDefaultSize,
|
||||
static_cast<int>(mode_choices.size()), mode_choices.data());
|
||||
RegisterControl(choice_mode, wxGetTranslation(ubershader_desc));
|
||||
szr_enh->Add(new wxStaticText(page_enh, wxID_ANY, _("Ubershaders:")), wxGBPosition(row, 0),
|
||||
wxDefaultSpan, wxALIGN_CENTER_VERTICAL);
|
||||
szr_enh->Add(choice_mode, wxGBPosition(row, 1), span2, wxALIGN_CENTER_VERTICAL);
|
||||
row += 1;
|
||||
|
||||
// Determine ubershader mode
|
||||
choice_mode->Bind(wxEVT_CHOICE, &VideoConfigDiag::OnUberShaderModeChanged, this);
|
||||
if (Config::GetBase(Config::GFX_DISABLE_SPECIALIZED_SHADERS))
|
||||
choice_mode->SetSelection(2);
|
||||
else if (Config::GetBase(Config::GFX_BACKGROUND_SHADER_COMPILING))
|
||||
choice_mode->SetSelection(1);
|
||||
else
|
||||
choice_mode->SetSelection(0);
|
||||
}
|
||||
|
||||
// postproc shader
|
||||
if (vconfig.backend_info.bSupportsPostProcessing)
|
||||
{
|
||||
|
@ -1326,3 +1357,13 @@ void VideoConfigDiag::OnAAChanged(wxCommandEvent& ev)
|
|||
|
||||
Config::SetBaseOrCurrent(Config::GFX_MSAA, vconfig.backend_info.AAModes[mode]);
|
||||
}
|
||||
|
||||
void VideoConfigDiag::OnUberShaderModeChanged(wxCommandEvent& ev)
|
||||
{
|
||||
// 0: No ubershaders
|
||||
// 1: Hybrid ubershaders
|
||||
// 2: Only ubershaders
|
||||
int mode = ev.GetInt();
|
||||
Config::SetBaseOrCurrent(Config::GFX_BACKGROUND_SHADER_COMPILING, mode == 1);
|
||||
Config::SetBaseOrCurrent(Config::GFX_DISABLE_SPECIALIZED_SHADERS, mode == 2);
|
||||
}
|
||||
|
|
|
@ -140,6 +140,7 @@ protected:
|
|||
void PopulatePostProcessingShaders();
|
||||
void PopulateAAList();
|
||||
void OnAAChanged(wxCommandEvent& ev);
|
||||
void OnUberShaderModeChanged(wxCommandEvent& ev);
|
||||
|
||||
wxChoice* choice_backend;
|
||||
wxChoice* choice_adapter;
|
||||
|
|
|
@ -185,10 +185,9 @@ std::vector<DXGI_SAMPLE_DESC> EnumAAModes(IDXGIAdapter* adapter)
|
|||
ID3D11Device* _device;
|
||||
ID3D11DeviceContext* _context;
|
||||
D3D_FEATURE_LEVEL feat_level;
|
||||
HRESULT hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
|
||||
D3D11_CREATE_DEVICE_SINGLETHREADED, supported_feature_levels,
|
||||
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, &_device,
|
||||
&feat_level, &_context);
|
||||
HRESULT hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0,
|
||||
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS,
|
||||
D3D11_SDK_VERSION, &_device, &feat_level, &_context);
|
||||
if (FAILED(hr) || feat_level == D3D_FEATURE_LEVEL_10_0)
|
||||
{
|
||||
DXGI_SAMPLE_DESC desc;
|
||||
|
@ -221,9 +220,9 @@ std::vector<DXGI_SAMPLE_DESC> EnumAAModes(IDXGIAdapter* adapter)
|
|||
D3D_FEATURE_LEVEL GetFeatureLevel(IDXGIAdapter* adapter)
|
||||
{
|
||||
D3D_FEATURE_LEVEL feat_level = D3D_FEATURE_LEVEL_9_1;
|
||||
PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, D3D11_CREATE_DEVICE_SINGLETHREADED,
|
||||
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION,
|
||||
nullptr, &feat_level, nullptr);
|
||||
PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, supported_feature_levels,
|
||||
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, nullptr, &feat_level,
|
||||
nullptr);
|
||||
return feat_level;
|
||||
}
|
||||
|
||||
|
@ -311,8 +310,7 @@ HRESULT Create(HWND wnd)
|
|||
// Creating debug devices can sometimes fail if the user doesn't have the correct
|
||||
// version of the DirectX SDK. If it does, simply fallback to a non-debug device.
|
||||
{
|
||||
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
|
||||
D3D11_CREATE_DEVICE_SINGLETHREADED | D3D11_CREATE_DEVICE_DEBUG,
|
||||
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, D3D11_CREATE_DEVICE_DEBUG,
|
||||
supported_feature_levels, NUM_SUPPORTED_FEATURE_LEVELS,
|
||||
D3D11_SDK_VERSION, &device, &featlevel, &context);
|
||||
|
||||
|
@ -339,8 +337,7 @@ HRESULT Create(HWND wnd)
|
|||
if (FAILED(hr))
|
||||
#endif
|
||||
{
|
||||
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr,
|
||||
D3D11_CREATE_DEVICE_SINGLETHREADED, supported_feature_levels,
|
||||
hr = PD3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, supported_feature_levels,
|
||||
NUM_SUPPORTED_FEATURE_LEVELS, D3D11_SDK_VERSION, &device, &featlevel,
|
||||
&context);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace DX11
|
|||
namespace D3D
|
||||
{
|
||||
// bytecode->shader
|
||||
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len)
|
||||
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len)
|
||||
{
|
||||
ID3D11VertexShader* v_shader;
|
||||
HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, &v_shader);
|
||||
|
@ -73,7 +73,7 @@ bool CompileVertexShader(const std::string& code, D3DBlob** blob)
|
|||
}
|
||||
|
||||
// bytecode->shader
|
||||
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len)
|
||||
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len)
|
||||
{
|
||||
ID3D11GeometryShader* g_shader;
|
||||
HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, &g_shader);
|
||||
|
@ -131,7 +131,7 @@ bool CompileGeometryShader(const std::string& code, D3DBlob** blob,
|
|||
}
|
||||
|
||||
// bytecode->shader
|
||||
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len)
|
||||
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len)
|
||||
{
|
||||
ID3D11PixelShader* p_shader;
|
||||
HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, &p_shader);
|
||||
|
|
|
@ -16,9 +16,9 @@ namespace DX11
|
|||
{
|
||||
namespace D3D
|
||||
{
|
||||
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, unsigned int len);
|
||||
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, unsigned int len);
|
||||
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, unsigned int len);
|
||||
ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len);
|
||||
ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len);
|
||||
ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len);
|
||||
|
||||
// The returned bytecode buffers should be Release()d.
|
||||
bool CompileVertexShader(const std::string& code, D3DBlob** blob);
|
||||
|
|
|
@ -136,7 +136,7 @@ void StateManager::Apply()
|
|||
m_current.pixelConstants[1] != m_pending.pixelConstants[1])
|
||||
{
|
||||
D3D::context->PSSetConstantBuffers(0, m_pending.pixelConstants[1] ? 2 : 1,
|
||||
m_pending.pixelConstants);
|
||||
m_pending.pixelConstants.data());
|
||||
m_current.pixelConstants[0] = m_pending.pixelConstants[0];
|
||||
m_current.pixelConstants[1] = m_pending.pixelConstants[1];
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <stack>
|
||||
#include <unordered_map>
|
||||
|
@ -269,9 +270,9 @@ private:
|
|||
|
||||
struct Resources
|
||||
{
|
||||
ID3D11ShaderResourceView* textures[8];
|
||||
ID3D11SamplerState* samplers[8];
|
||||
ID3D11Buffer* pixelConstants[2];
|
||||
std::array<ID3D11ShaderResourceView*, 8> textures;
|
||||
std::array<ID3D11SamplerState*, 8> samplers;
|
||||
std::array<ID3D11Buffer*, 2> pixelConstants;
|
||||
ID3D11Buffer* vertexConstants;
|
||||
ID3D11Buffer* geometryConstants;
|
||||
ID3D11Buffer* vertexBuffer;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "VideoBackends/D3D/D3DBase.h"
|
||||
#include "VideoBackends/D3D/D3DShader.h"
|
||||
#include "VideoBackends/D3D/D3DState.h"
|
||||
#include "VideoBackends/D3D/FramebufferManager.h"
|
||||
#include "VideoBackends/D3D/GeometryShaderCache.h"
|
||||
|
||||
|
@ -159,6 +160,9 @@ void GeometryShaderCache::Init()
|
|||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileShaders();
|
||||
}
|
||||
|
||||
void GeometryShaderCache::LoadShaderCache()
|
||||
|
@ -175,6 +179,9 @@ void GeometryShaderCache::Reload()
|
|||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileShaders();
|
||||
}
|
||||
|
||||
// ONLY to be used during shutdown.
|
||||
|
@ -203,78 +210,74 @@ void GeometryShaderCache::Shutdown()
|
|||
bool GeometryShaderCache::SetShader(u32 primitive_type)
|
||||
{
|
||||
GeometryShaderUid uid = GetGeometryShaderUid(primitive_type);
|
||||
|
||||
// Check if the shader is already set
|
||||
if (last_entry)
|
||||
if (last_entry && uid == last_uid)
|
||||
{
|
||||
if (uid == last_uid)
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
return true;
|
||||
}
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
D3D::stateman->SetGeometryShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
last_uid = uid;
|
||||
|
||||
// Check if the shader is a pass-through shader
|
||||
if (uid.GetUidData()->IsPassthrough())
|
||||
{
|
||||
// Return the default pass-through shader
|
||||
last_uid = uid;
|
||||
last_entry = &pass_entry;
|
||||
D3D::stateman->SetGeometryShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the shader is already in the cache
|
||||
GSCache::iterator iter;
|
||||
iter = GeometryShaders.find(uid);
|
||||
auto iter = GeometryShaders.find(uid);
|
||||
if (iter != GeometryShaders.end())
|
||||
{
|
||||
const GSCacheEntry& entry = iter->second;
|
||||
last_uid = uid;
|
||||
last_entry = &entry;
|
||||
|
||||
D3D::stateman->SetGeometryShader(last_entry->shader);
|
||||
return (entry.shader != nullptr);
|
||||
}
|
||||
|
||||
// Need to compile a new shader
|
||||
if (CompileShader(uid))
|
||||
return SetShader(primitive_type);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GeometryShaderCache::CompileShader(const GeometryShaderUid& uid)
|
||||
{
|
||||
D3DBlob* bytecode;
|
||||
ShaderCode code =
|
||||
GenerateGeometryShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
|
||||
D3DBlob* pbytecode;
|
||||
if (!D3D::CompileGeometryShader(code.GetBuffer(), &pbytecode))
|
||||
if (!D3D::CompileGeometryShader(code.GetBuffer(), &bytecode) ||
|
||||
!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
|
||||
SAFE_RELEASE(bytecode);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Insert the bytecode into the caches
|
||||
g_gs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
|
||||
|
||||
bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
|
||||
pbytecode->Release();
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid& uid, const void* bytecode,
|
||||
unsigned int bytecodelen)
|
||||
{
|
||||
ID3D11GeometryShader* shader = D3D::CreateGeometryShaderFromByteCode(bytecode, bytecodelen);
|
||||
if (shader == nullptr)
|
||||
return false;
|
||||
|
||||
// TODO: Somehow make the debug name a bit more specific
|
||||
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of GeometryShaderCache");
|
||||
|
||||
// Make an entry in the table
|
||||
GSCacheEntry newentry;
|
||||
newentry.shader = shader;
|
||||
GeometryShaders[uid] = newentry;
|
||||
last_entry = &GeometryShaders[uid];
|
||||
|
||||
if (!shader)
|
||||
return false;
|
||||
|
||||
g_gs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GeometryShaderCache::InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode,
|
||||
size_t len)
|
||||
{
|
||||
GSCacheEntry& newentry = GeometryShaders[uid];
|
||||
newentry.shader = bytecode ? D3D::CreateGeometryShaderFromByteCode(bytecode, len) : nullptr;
|
||||
return newentry.shader != nullptr;
|
||||
}
|
||||
|
||||
void GeometryShaderCache::PrecompileShaders()
|
||||
{
|
||||
EnumerateGeometryShaderUids([](const GeometryShaderUid& uid) {
|
||||
if (GeometryShaders.find(uid) != GeometryShaders.end())
|
||||
return;
|
||||
|
||||
CompileShader(uid);
|
||||
});
|
||||
}
|
||||
|
||||
} // DX11
|
||||
|
|
|
@ -18,14 +18,14 @@ public:
|
|||
static void Reload();
|
||||
static void Clear();
|
||||
static void Shutdown();
|
||||
static bool SetShader(u32 primitive_type); // TODO: Should be renamed to LoadShader
|
||||
static bool InsertByteCode(const GeometryShaderUid& uid, const void* bytecode,
|
||||
unsigned int bytecodelen);
|
||||
static bool SetShader(u32 primitive_type);
|
||||
static bool CompileShader(const GeometryShaderUid& uid);
|
||||
static bool InsertByteCode(const GeometryShaderUid& uid, const u8* bytecode, size_t len);
|
||||
static void PrecompileShaders();
|
||||
|
||||
static ID3D11GeometryShader* GetClearGeometryShader();
|
||||
static ID3D11GeometryShader* GetCopyGeometryShader();
|
||||
|
||||
static ID3D11GeometryShader* GetActiveShader() { return last_entry->shader; }
|
||||
static ID3D11Buffer*& GetConstantBuffer();
|
||||
|
||||
private:
|
||||
|
|
|
@ -13,20 +13,6 @@
|
|||
|
||||
namespace DX11
|
||||
{
|
||||
class D3DVertexFormat : public NativeVertexFormat
|
||||
{
|
||||
public:
|
||||
D3DVertexFormat(const PortableVertexDeclaration& vtx_decl);
|
||||
~D3DVertexFormat() { SAFE_RELEASE(m_layout); }
|
||||
void SetupVertexPointers() override;
|
||||
|
||||
private:
|
||||
std::array<D3D11_INPUT_ELEMENT_DESC, 32> m_elems{};
|
||||
UINT m_num_elems = 0;
|
||||
|
||||
ID3D11InputLayout* m_layout = nullptr;
|
||||
};
|
||||
|
||||
std::unique_ptr<NativeVertexFormat>
|
||||
VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl)
|
||||
{
|
||||
|
@ -66,7 +52,6 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl)
|
|||
this->vtx_decl = _vtx_decl;
|
||||
|
||||
const AttributeFormat* format = &_vtx_decl.position;
|
||||
|
||||
if (format->enable)
|
||||
{
|
||||
m_elems[m_num_elems].SemanticName = "POSITION";
|
||||
|
@ -129,15 +114,18 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl)
|
|||
}
|
||||
}
|
||||
|
||||
void D3DVertexFormat::SetupVertexPointers()
|
||||
D3DVertexFormat::~D3DVertexFormat()
|
||||
{
|
||||
SAFE_RELEASE(m_layout);
|
||||
}
|
||||
|
||||
void D3DVertexFormat::SetInputLayout(D3DBlob* vs_bytecode)
|
||||
{
|
||||
if (!m_layout)
|
||||
{
|
||||
// CreateInputLayout requires a shader input, but it only looks at the
|
||||
// signature of the shader, so we don't need to recompute it if the shader
|
||||
// changes.
|
||||
D3DBlob* vs_bytecode = DX11::VertexShaderCache::GetActiveShaderBytecode();
|
||||
|
||||
HRESULT hr = DX11::D3D::device->CreateInputLayout(
|
||||
m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &m_layout);
|
||||
if (FAILED(hr))
|
||||
|
|
|
@ -8,12 +8,15 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FileUtil.h"
|
||||
#include "Common/LinearDiskCache.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/StringUtil.h"
|
||||
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Host.h"
|
||||
|
||||
#include "VideoBackends/D3D/D3DBase.h"
|
||||
#include "VideoBackends/D3D/D3DShader.h"
|
||||
#include "VideoBackends/D3D/D3DState.h"
|
||||
#include "VideoBackends/D3D/PixelShaderCache.h"
|
||||
|
||||
#include "VideoCommon/Debugger.h"
|
||||
|
@ -25,10 +28,15 @@
|
|||
namespace DX11
|
||||
{
|
||||
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
|
||||
PixelShaderCache::UberPSCache PixelShaderCache::UberPixelShaders;
|
||||
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
|
||||
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_uber_entry;
|
||||
PixelShaderUid PixelShaderCache::last_uid;
|
||||
UberShader::PixelShaderUid PixelShaderCache::last_uber_uid;
|
||||
|
||||
LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
|
||||
LinearDiskCache<UberShader::PixelShaderUid, u8> g_uber_ps_disk_cache;
|
||||
extern std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;
|
||||
|
||||
ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
|
||||
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
|
||||
|
@ -429,10 +437,8 @@ ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram()
|
|||
return s_DepthResolveProgram;
|
||||
}
|
||||
|
||||
ID3D11Buffer*& PixelShaderCache::GetConstantBuffer()
|
||||
static void UpdateConstantBuffers()
|
||||
{
|
||||
// TODO: divide the global variables of the generated shaders into about 5 constant buffers to
|
||||
// speed this up
|
||||
if (PixelShaderManager::dirty)
|
||||
{
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
@ -443,14 +449,20 @@ ID3D11Buffer*& PixelShaderCache::GetConstantBuffer()
|
|||
|
||||
ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants));
|
||||
}
|
||||
}
|
||||
|
||||
ID3D11Buffer* PixelShaderCache::GetConstantBuffer()
|
||||
{
|
||||
UpdateConstantBuffers();
|
||||
return pscbuf;
|
||||
}
|
||||
|
||||
// this class will load the precompiled shaders into our cache
|
||||
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
|
||||
template <typename UidType>
|
||||
class PixelShaderCacheInserter : public LinearDiskCacheReader<UidType, u8>
|
||||
{
|
||||
public:
|
||||
void Read(const PixelShaderUid& key, const u8* value, u32 value_size)
|
||||
void Read(const UidType& key, const u8* value, u32 value_size)
|
||||
{
|
||||
PixelShaderCache::InsertByteCode(key, value, value_size);
|
||||
}
|
||||
|
@ -499,22 +511,34 @@ void PixelShaderCache::Init()
|
|||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void PixelShaderCache::LoadShaderCache()
|
||||
{
|
||||
PixelShaderCacheInserter inserter;
|
||||
PixelShaderCacheInserter<PixelShaderUid> inserter;
|
||||
g_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "PS", true, true), inserter);
|
||||
|
||||
PixelShaderCacheInserter<UberShader::PixelShaderUid> uber_inserter;
|
||||
g_uber_ps_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberPS", false, true),
|
||||
uber_inserter);
|
||||
}
|
||||
|
||||
void PixelShaderCache::Reload()
|
||||
{
|
||||
g_ps_disk_cache.Sync();
|
||||
g_ps_disk_cache.Close();
|
||||
g_uber_ps_disk_cache.Sync();
|
||||
g_uber_ps_disk_cache.Close();
|
||||
Clear();
|
||||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
// ONLY to be used during shutdown.
|
||||
|
@ -522,10 +546,15 @@ void PixelShaderCache::Clear()
|
|||
{
|
||||
for (auto& iter : PixelShaders)
|
||||
iter.second.Destroy();
|
||||
for (auto& iter : UberPixelShaders)
|
||||
iter.second.Destroy();
|
||||
PixelShaders.clear();
|
||||
UberPixelShaders.clear();
|
||||
|
||||
last_entry = nullptr;
|
||||
last_uber_entry = nullptr;
|
||||
last_uid = {};
|
||||
last_uber_uid = {};
|
||||
}
|
||||
|
||||
// Used in Swap() when AA mode has changed
|
||||
|
@ -558,82 +587,249 @@ void PixelShaderCache::Shutdown()
|
|||
Clear();
|
||||
g_ps_disk_cache.Sync();
|
||||
g_ps_disk_cache.Close();
|
||||
g_uber_ps_disk_cache.Sync();
|
||||
g_uber_ps_disk_cache.Close();
|
||||
}
|
||||
|
||||
bool PixelShaderCache::SetShader()
|
||||
{
|
||||
PixelShaderUid uid = GetPixelShaderUid();
|
||||
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForcePixelUberShaders)
|
||||
return SetUberShader();
|
||||
|
||||
// Check if the shader is already set
|
||||
if (last_entry)
|
||||
PixelShaderUid uid = GetPixelShaderUid();
|
||||
if (last_entry && uid == last_uid)
|
||||
{
|
||||
if (uid == last_uid)
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
return (last_entry->shader != nullptr);
|
||||
}
|
||||
if (last_entry->pending)
|
||||
return SetUberShader();
|
||||
|
||||
if (!last_entry->shader)
|
||||
return false;
|
||||
|
||||
D3D::stateman->SetPixelShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
last_uid = uid;
|
||||
|
||||
// Check if the shader is already in the cache
|
||||
PSCache::iterator iter;
|
||||
iter = PixelShaders.find(uid);
|
||||
auto iter = PixelShaders.find(uid);
|
||||
if (iter != PixelShaders.end())
|
||||
{
|
||||
const PSCacheEntry& entry = iter->second;
|
||||
if (entry.pending)
|
||||
return SetUberShader();
|
||||
|
||||
last_uid = uid;
|
||||
last_entry = &entry;
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
return (entry.shader != nullptr);
|
||||
if (!last_entry->shader)
|
||||
return false;
|
||||
|
||||
D3D::stateman->SetPixelShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Background compiling?
|
||||
if (g_ActiveConfig.CanBackgroundCompileShaders())
|
||||
{
|
||||
// Create a pending entry
|
||||
PSCacheEntry entry;
|
||||
entry.pending = true;
|
||||
PixelShaders[uid] = entry;
|
||||
|
||||
// Queue normal shader compiling and use ubershader
|
||||
g_async_compiler->QueueWorkItem(
|
||||
g_async_compiler->CreateWorkItem<PixelShaderCompilerWorkItem>(uid));
|
||||
return SetUberShader();
|
||||
}
|
||||
|
||||
// Need to compile a new shader
|
||||
D3DBlob* bytecode = nullptr;
|
||||
ShaderCode code =
|
||||
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
|
||||
D3DBlob* pbytecode;
|
||||
if (!D3D::CompilePixelShader(code.GetBuffer(), &pbytecode))
|
||||
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
|
||||
if (!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
|
||||
SAFE_RELEASE(bytecode);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Insert the bytecode into the caches
|
||||
g_ps_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
|
||||
|
||||
bool success = InsertByteCode(uid, pbytecode->Data(), pbytecode->Size());
|
||||
pbytecode->Release();
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
return success;
|
||||
g_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
|
||||
return SetShader();
|
||||
}
|
||||
|
||||
bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const void* bytecode,
|
||||
unsigned int bytecodelen)
|
||||
bool PixelShaderCache::SetUberShader()
|
||||
{
|
||||
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
|
||||
if (shader == nullptr)
|
||||
return false;
|
||||
UberShader::PixelShaderUid uid = UberShader::GetPixelShaderUid();
|
||||
|
||||
// TODO: Somehow make the debug name a bit more specific
|
||||
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a pixel shader of PixelShaderCache");
|
||||
|
||||
// Make an entry in the table
|
||||
PSCacheEntry newentry;
|
||||
newentry.shader = shader;
|
||||
PixelShaders[uid] = newentry;
|
||||
last_entry = &PixelShaders[uid];
|
||||
|
||||
if (!shader)
|
||||
if (last_uber_entry && last_uber_uid == uid)
|
||||
{
|
||||
// INCSTAT(stats.numPixelShadersFailed);
|
||||
if (!last_uber_entry->shader)
|
||||
return false;
|
||||
|
||||
D3D::stateman->SetPixelShader(last_uber_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto iter = UberPixelShaders.find(uid);
|
||||
if (iter != UberPixelShaders.end())
|
||||
{
|
||||
const PSCacheEntry& entry = iter->second;
|
||||
last_uber_uid = uid;
|
||||
last_uber_entry = &entry;
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
|
||||
if (!last_uber_entry->shader)
|
||||
return false;
|
||||
|
||||
D3D::stateman->SetPixelShader(last_uber_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
D3DBlob* bytecode = nullptr;
|
||||
ShaderCode code =
|
||||
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
D3D::CompilePixelShader(code.GetBuffer(), &bytecode);
|
||||
if (!InsertByteCode(uid, bytecode->Data(), bytecode->Size()))
|
||||
{
|
||||
SAFE_RELEASE(bytecode);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Lookup map again.
|
||||
g_uber_ps_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
|
||||
bytecode->Release();
|
||||
return SetUberShader();
|
||||
}
|
||||
|
||||
bool PixelShaderCache::InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len)
|
||||
{
|
||||
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
|
||||
if (!InsertShader(uid, shader))
|
||||
{
|
||||
SAFE_RELEASE(shader);
|
||||
return false;
|
||||
}
|
||||
|
||||
INCSTAT(stats.numPixelShadersCreated);
|
||||
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelShaderCache::InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data,
|
||||
size_t len)
|
||||
{
|
||||
ID3D11PixelShader* shader = data ? D3D::CreatePixelShaderFromByteCode(data, len) : nullptr;
|
||||
if (!InsertShader(uid, shader))
|
||||
{
|
||||
SAFE_RELEASE(shader);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PixelShaderCache::InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader)
|
||||
{
|
||||
auto iter = PixelShaders.find(uid);
|
||||
if (iter != PixelShaders.end() && !iter->second.pending)
|
||||
return false;
|
||||
|
||||
PSCacheEntry& newentry = PixelShaders[uid];
|
||||
newentry.pending = false;
|
||||
newentry.shader = shader;
|
||||
|
||||
INCSTAT(stats.numPixelShadersCreated);
|
||||
SETSTAT(stats.numPixelShadersAlive, PixelShaders.size());
|
||||
return (shader != nullptr);
|
||||
}
|
||||
|
||||
bool PixelShaderCache::InsertShader(const UberShader::PixelShaderUid& uid,
|
||||
ID3D11PixelShader* shader)
|
||||
{
|
||||
auto iter = UberPixelShaders.find(uid);
|
||||
if (iter != UberPixelShaders.end() && !iter->second.pending)
|
||||
return false;
|
||||
|
||||
PSCacheEntry& newentry = UberPixelShaders[uid];
|
||||
newentry.pending = false;
|
||||
newentry.shader = shader;
|
||||
return (shader != nullptr);
|
||||
}
|
||||
|
||||
void PixelShaderCache::QueueUberShaderCompiles()
|
||||
{
|
||||
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& uid) {
|
||||
if (UberPixelShaders.find(uid) != UberPixelShaders.end())
|
||||
return;
|
||||
|
||||
g_async_compiler->QueueWorkItem(
|
||||
g_async_compiler->CreateWorkItem<UberPixelShaderCompilerWorkItem>(uid));
|
||||
});
|
||||
|
||||
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
|
||||
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
|
||||
static_cast<int>(completed), static_cast<int>(total));
|
||||
});
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
Host_UpdateProgressDialog("", -1, -1);
|
||||
}
|
||||
|
||||
PixelShaderCache::PixelShaderCompilerWorkItem::PixelShaderCompilerWorkItem(
|
||||
const PixelShaderUid& uid)
|
||||
{
|
||||
std::memcpy(&m_uid, &uid, sizeof(uid));
|
||||
}
|
||||
|
||||
PixelShaderCache::PixelShaderCompilerWorkItem::~PixelShaderCompilerWorkItem()
|
||||
{
|
||||
SAFE_RELEASE(m_bytecode);
|
||||
}
|
||||
|
||||
bool PixelShaderCache::PixelShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
GeneratePixelShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
|
||||
if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
|
||||
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PixelShaderCache::PixelShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
if (InsertShader(m_uid, m_shader))
|
||||
g_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
|
||||
else
|
||||
SAFE_RELEASE(m_shader);
|
||||
}
|
||||
|
||||
PixelShaderCache::UberPixelShaderCompilerWorkItem::UberPixelShaderCompilerWorkItem(
|
||||
const UberShader::PixelShaderUid& uid)
|
||||
{
|
||||
std::memcpy(&m_uid, &uid, sizeof(uid));
|
||||
}
|
||||
|
||||
PixelShaderCache::UberPixelShaderCompilerWorkItem::~UberPixelShaderCompilerWorkItem()
|
||||
{
|
||||
SAFE_RELEASE(m_bytecode);
|
||||
}
|
||||
|
||||
bool PixelShaderCache::UberPixelShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
UberShader::GenPixelShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
|
||||
if (D3D::CompilePixelShader(code.GetBuffer(), &m_bytecode))
|
||||
m_shader = D3D::CreatePixelShaderFromByteCode(m_bytecode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PixelShaderCache::UberPixelShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
if (InsertShader(m_uid, m_shader))
|
||||
g_uber_ps_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
|
||||
else
|
||||
SAFE_RELEASE(m_shader);
|
||||
}
|
||||
|
||||
} // DX11
|
||||
|
|
|
@ -7,10 +7,14 @@
|
|||
#include <d3d11.h>
|
||||
#include <map>
|
||||
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
|
||||
namespace DX11
|
||||
{
|
||||
class D3DBlob;
|
||||
|
||||
class PixelShaderCache
|
||||
{
|
||||
public:
|
||||
|
@ -18,12 +22,15 @@ public:
|
|||
static void Reload();
|
||||
static void Clear();
|
||||
static void Shutdown();
|
||||
static bool SetShader(); // TODO: Should be renamed to LoadShader
|
||||
static bool InsertByteCode(const PixelShaderUid& uid, const void* bytecode,
|
||||
unsigned int bytecodelen);
|
||||
static bool SetShader();
|
||||
static bool SetUberShader();
|
||||
static bool InsertByteCode(const PixelShaderUid& uid, const u8* data, size_t len);
|
||||
static bool InsertByteCode(const UberShader::PixelShaderUid& uid, const u8* data, size_t len);
|
||||
static bool InsertShader(const PixelShaderUid& uid, ID3D11PixelShader* shader);
|
||||
static bool InsertShader(const UberShader::PixelShaderUid& uid, ID3D11PixelShader* shader);
|
||||
static void QueueUberShaderCompiles();
|
||||
|
||||
static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
|
||||
static ID3D11Buffer*& GetConstantBuffer();
|
||||
static ID3D11Buffer* GetConstantBuffer();
|
||||
|
||||
static ID3D11PixelShader* GetColorMatrixProgram(bool multisampled);
|
||||
static ID3D11PixelShader* GetColorCopyProgram(bool multisampled);
|
||||
|
@ -40,18 +47,53 @@ private:
|
|||
struct PSCacheEntry
|
||||
{
|
||||
ID3D11PixelShader* shader;
|
||||
bool pending;
|
||||
|
||||
PSCacheEntry() : shader(nullptr) {}
|
||||
PSCacheEntry() : shader(nullptr), pending(false) {}
|
||||
void Destroy() { SAFE_RELEASE(shader); }
|
||||
};
|
||||
|
||||
class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
PixelShaderCompilerWorkItem(const PixelShaderUid& uid);
|
||||
~PixelShaderCompilerWorkItem() override;
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
PixelShaderUid m_uid;
|
||||
ID3D11PixelShader* m_shader = nullptr;
|
||||
D3DBlob* m_bytecode = nullptr;
|
||||
};
|
||||
|
||||
class UberPixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
UberPixelShaderCompilerWorkItem(const UberShader::PixelShaderUid& uid);
|
||||
~UberPixelShaderCompilerWorkItem() override;
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
UberShader::PixelShaderUid m_uid;
|
||||
ID3D11PixelShader* m_shader = nullptr;
|
||||
D3DBlob* m_bytecode = nullptr;
|
||||
};
|
||||
|
||||
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
|
||||
typedef std::map<UberShader::PixelShaderUid, PSCacheEntry> UberPSCache;
|
||||
|
||||
static void LoadShaderCache();
|
||||
|
||||
static PSCache PixelShaders;
|
||||
static UberPSCache UberPixelShaders;
|
||||
static const PSCacheEntry* last_entry;
|
||||
static const PSCacheEntry* last_uber_entry;
|
||||
static PixelShaderUid last_uid;
|
||||
static UberShader::PixelShaderUid last_uber_uid;
|
||||
};
|
||||
|
||||
} // namespace DX11
|
||||
|
|
|
@ -837,6 +837,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
|
|||
// Enable configuration changes
|
||||
UpdateActiveConfig();
|
||||
g_texture_cache->OnConfigChanged(g_ActiveConfig);
|
||||
VertexShaderCache::RetreiveAsyncShaders();
|
||||
|
||||
SetWindowSize(fbStride, fbHeight);
|
||||
|
||||
|
@ -958,10 +959,6 @@ void Renderer::ApplyState()
|
|||
g_ActiveConfig.bEnablePixelLighting ? vertexConstants : nullptr);
|
||||
D3D::stateman->SetVertexConstants(vertexConstants);
|
||||
D3D::stateman->SetGeometryConstants(GeometryShaderCache::GetConstantBuffer());
|
||||
|
||||
D3D::stateman->SetPixelShader(PixelShaderCache::GetActiveShader());
|
||||
D3D::stateman->SetVertexShader(VertexShaderCache::GetActiveShader());
|
||||
D3D::stateman->SetGeometryShader(GeometryShaderCache::GetActiveShader());
|
||||
}
|
||||
|
||||
void Renderer::RestoreState()
|
||||
|
|
|
@ -159,7 +159,9 @@ void VertexManager::vFlush()
|
|||
return;
|
||||
}
|
||||
|
||||
if (!VertexShaderCache::SetShader())
|
||||
D3DVertexFormat* vertex_format =
|
||||
static_cast<D3DVertexFormat*>(VertexLoaderManager::GetCurrentVertexFormat());
|
||||
if (!VertexShaderCache::SetShader(vertex_format))
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR, true, { printf("Fail to set pixel shader\n"); });
|
||||
return;
|
||||
|
@ -182,7 +184,6 @@ void VertexManager::vFlush()
|
|||
|
||||
PrepareDrawBuffers(stride);
|
||||
|
||||
VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers();
|
||||
g_renderer->ApplyState();
|
||||
|
||||
Draw(stride);
|
||||
|
|
|
@ -4,13 +4,30 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <memory>
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
|
||||
struct ID3D11Buffer;
|
||||
|
||||
namespace DX11
|
||||
{
|
||||
class D3DBlob;
|
||||
class D3DVertexFormat : public NativeVertexFormat
|
||||
{
|
||||
public:
|
||||
D3DVertexFormat(const PortableVertexDeclaration& vtx_decl);
|
||||
~D3DVertexFormat();
|
||||
void SetInputLayout(D3DBlob* vs_bytecode);
|
||||
|
||||
private:
|
||||
std::array<D3D11_INPUT_ELEMENT_DESC, 32> m_elems{};
|
||||
UINT m_num_elems = 0;
|
||||
|
||||
ID3D11InputLayout* m_layout = nullptr;
|
||||
};
|
||||
|
||||
class VertexManager : public VertexManagerBase
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -8,23 +8,32 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/FileUtil.h"
|
||||
#include "Common/LinearDiskCache.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/StringUtil.h"
|
||||
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Host.h"
|
||||
|
||||
#include "VideoBackends/D3D/D3DShader.h"
|
||||
#include "VideoBackends/D3D/D3DState.h"
|
||||
#include "VideoBackends/D3D/VertexManager.h"
|
||||
#include "VideoBackends/D3D/VertexShaderCache.h"
|
||||
|
||||
#include "VideoCommon/Debugger.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
|
||||
namespace DX11
|
||||
{
|
||||
VertexShaderCache::VSCache VertexShaderCache::vshaders;
|
||||
VertexShaderCache::UberVSCache VertexShaderCache::ubervshaders;
|
||||
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_entry;
|
||||
const VertexShaderCache::VSCacheEntry* VertexShaderCache::last_uber_entry;
|
||||
VertexShaderUid VertexShaderCache::last_uid;
|
||||
UberShader::VertexShaderUid VertexShaderCache::last_uber_uid;
|
||||
|
||||
static ID3D11VertexShader* SimpleVertexShader = nullptr;
|
||||
static ID3D11VertexShader* ClearVertexShader = nullptr;
|
||||
|
@ -32,6 +41,8 @@ static ID3D11InputLayout* SimpleLayout = nullptr;
|
|||
static ID3D11InputLayout* ClearLayout = nullptr;
|
||||
|
||||
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
|
||||
LinearDiskCache<UberShader::VertexShaderUid, u8> g_uber_vs_disk_cache;
|
||||
std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;
|
||||
|
||||
ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader()
|
||||
{
|
||||
|
@ -70,10 +81,11 @@ ID3D11Buffer*& VertexShaderCache::GetConstantBuffer()
|
|||
}
|
||||
|
||||
// this class will load the precompiled shaders into our cache
|
||||
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
|
||||
template <typename UidType>
|
||||
class VertexShaderCacheInserter : public LinearDiskCacheReader<UidType, u8>
|
||||
{
|
||||
public:
|
||||
void Read(const VertexShaderUid& key, const u8* value, u32 value_size)
|
||||
void Read(const UidType& key, const u8* value, u32 value_size)
|
||||
{
|
||||
D3DBlob* blob = new D3DBlob(value_size, value);
|
||||
VertexShaderCache::InsertByteCode(key, blob);
|
||||
|
@ -160,36 +172,66 @@ void VertexShaderCache::Init()
|
|||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
g_async_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
|
||||
g_ActiveConfig.GetShaderPrecompilerThreads() :
|
||||
g_ActiveConfig.GetShaderCompilerThreads());
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void VertexShaderCache::LoadShaderCache()
|
||||
{
|
||||
VertexShaderCacheInserter inserter;
|
||||
VertexShaderCacheInserter<VertexShaderUid> inserter;
|
||||
g_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "VS", true, true), inserter);
|
||||
|
||||
VertexShaderCacheInserter<UberShader::VertexShaderUid> uber_inserter;
|
||||
g_uber_vs_disk_cache.OpenAndRead(GetDiskShaderCacheFileName(APIType::D3D, "UberVS", false, true),
|
||||
uber_inserter);
|
||||
}
|
||||
|
||||
void VertexShaderCache::Reload()
|
||||
{
|
||||
g_async_compiler->WaitUntilCompletion();
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
|
||||
g_vs_disk_cache.Sync();
|
||||
g_vs_disk_cache.Close();
|
||||
g_uber_vs_disk_cache.Sync();
|
||||
g_uber_vs_disk_cache.Close();
|
||||
Clear();
|
||||
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
LoadShaderCache();
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
QueueUberShaderCompiles();
|
||||
}
|
||||
|
||||
void VertexShaderCache::Clear()
|
||||
{
|
||||
for (auto& iter : vshaders)
|
||||
iter.second.Destroy();
|
||||
for (auto& iter : ubervshaders)
|
||||
iter.second.Destroy();
|
||||
vshaders.clear();
|
||||
ubervshaders.clear();
|
||||
|
||||
last_entry = nullptr;
|
||||
last_uid = {};
|
||||
last_uber_uid = {};
|
||||
last_entry = nullptr;
|
||||
last_uber_entry = nullptr;
|
||||
last_uid = {};
|
||||
last_uber_uid = {};
|
||||
}
|
||||
|
||||
void VertexShaderCache::Shutdown()
|
||||
{
|
||||
g_async_compiler->StopWorkerThreads();
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
|
||||
SAFE_RELEASE(vscbuf);
|
||||
|
||||
SAFE_RELEASE(SimpleVertexShader);
|
||||
|
@ -201,74 +243,267 @@ void VertexShaderCache::Shutdown()
|
|||
Clear();
|
||||
g_vs_disk_cache.Sync();
|
||||
g_vs_disk_cache.Close();
|
||||
g_uber_vs_disk_cache.Sync();
|
||||
g_uber_vs_disk_cache.Close();
|
||||
}
|
||||
|
||||
bool VertexShaderCache::SetShader()
|
||||
bool VertexShaderCache::SetShader(D3DVertexFormat* vertex_format)
|
||||
{
|
||||
VertexShaderUid uid = GetVertexShaderUid();
|
||||
if (g_ActiveConfig.bDisableSpecializedShaders || g_ActiveConfig.bForceVertexUberShaders)
|
||||
return SetUberShader(vertex_format);
|
||||
|
||||
if (last_entry)
|
||||
VertexShaderUid uid = GetVertexShaderUid();
|
||||
if (last_entry && uid == last_uid)
|
||||
{
|
||||
if (uid == last_uid)
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
|
||||
return (last_entry->shader != nullptr);
|
||||
}
|
||||
if (last_entry->pending)
|
||||
return SetUberShader(vertex_format);
|
||||
|
||||
if (!last_entry->shader)
|
||||
return false;
|
||||
|
||||
vertex_format->SetInputLayout(last_entry->bytecode);
|
||||
D3D::stateman->SetVertexShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
last_uid = uid;
|
||||
|
||||
VSCache::iterator iter = vshaders.find(uid);
|
||||
auto iter = vshaders.find(uid);
|
||||
if (iter != vshaders.end())
|
||||
{
|
||||
const VSCacheEntry& entry = iter->second;
|
||||
if (entry.pending)
|
||||
return SetUberShader(vertex_format);
|
||||
|
||||
last_uid = uid;
|
||||
last_entry = &entry;
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
|
||||
return (entry.shader != nullptr);
|
||||
if (!last_entry->shader)
|
||||
return false;
|
||||
|
||||
vertex_format->SetInputLayout(last_entry->bytecode);
|
||||
D3D::stateman->SetVertexShader(last_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Background compiling?
|
||||
if (g_ActiveConfig.CanBackgroundCompileShaders())
|
||||
{
|
||||
// Create a pending entry
|
||||
VSCacheEntry entry;
|
||||
entry.pending = true;
|
||||
vshaders[uid] = entry;
|
||||
|
||||
// Queue normal shader compiling and use ubershader
|
||||
g_async_compiler->QueueWorkItem(
|
||||
g_async_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid));
|
||||
return SetUberShader(vertex_format);
|
||||
}
|
||||
|
||||
// Need to compile a new shader
|
||||
D3DBlob* bytecode = nullptr;
|
||||
ShaderCode code =
|
||||
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
|
||||
D3DBlob* pbytecode = nullptr;
|
||||
D3D::CompileVertexShader(code.GetBuffer(), &pbytecode);
|
||||
|
||||
if (pbytecode == nullptr)
|
||||
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
|
||||
if (!InsertByteCode(uid, bytecode))
|
||||
{
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
|
||||
SAFE_RELEASE(bytecode);
|
||||
return false;
|
||||
}
|
||||
g_vs_disk_cache.Append(uid, pbytecode->Data(), pbytecode->Size());
|
||||
|
||||
bool success = InsertByteCode(uid, pbytecode);
|
||||
pbytecode->Release();
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
|
||||
return success;
|
||||
g_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
|
||||
bytecode->Release();
|
||||
return SetShader(vertex_format);
|
||||
}
|
||||
|
||||
bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob)
|
||||
bool VertexShaderCache::SetUberShader(D3DVertexFormat* vertex_format)
|
||||
{
|
||||
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
|
||||
if (shader == nullptr)
|
||||
D3DVertexFormat* uber_vertex_format = static_cast<D3DVertexFormat*>(
|
||||
VertexLoaderManager::GetUberVertexFormat(vertex_format->GetVertexDeclaration()));
|
||||
UberShader::VertexShaderUid uid = UberShader::GetVertexShaderUid();
|
||||
if (last_uber_entry && last_uber_uid == uid)
|
||||
{
|
||||
if (!last_uber_entry->shader)
|
||||
return false;
|
||||
|
||||
uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
|
||||
D3D::stateman->SetVertexShader(last_uber_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
auto iter = ubervshaders.find(uid);
|
||||
if (iter != ubervshaders.end())
|
||||
{
|
||||
const VSCacheEntry& entry = iter->second;
|
||||
last_uber_uid = uid;
|
||||
last_uber_entry = &entry;
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
|
||||
if (!last_uber_entry->shader)
|
||||
return false;
|
||||
|
||||
uber_vertex_format->SetInputLayout(last_uber_entry->bytecode);
|
||||
D3D::stateman->SetVertexShader(last_uber_entry->shader);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Need to compile a new shader
|
||||
D3DBlob* bytecode = nullptr;
|
||||
ShaderCode code =
|
||||
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
D3D::CompileVertexShader(code.GetBuffer(), &bytecode);
|
||||
if (!InsertByteCode(uid, bytecode))
|
||||
{
|
||||
SAFE_RELEASE(bytecode);
|
||||
return false;
|
||||
}
|
||||
|
||||
g_uber_vs_disk_cache.Append(uid, bytecode->Data(), bytecode->Size());
|
||||
bytecode->Release();
|
||||
return SetUberShader(vertex_format);
|
||||
}
|
||||
|
||||
bool VertexShaderCache::InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob)
|
||||
{
|
||||
ID3D11VertexShader* shader =
|
||||
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
|
||||
bool result = InsertShader(uid, shader, blob);
|
||||
SAFE_RELEASE(shader);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VertexShaderCache::InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob)
|
||||
{
|
||||
ID3D11VertexShader* shader =
|
||||
blob ? D3D::CreateVertexShaderFromByteCode(blob->Data(), blob->Size()) : nullptr;
|
||||
bool result = InsertShader(uid, shader, blob);
|
||||
SAFE_RELEASE(shader);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VertexShaderCache::InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader,
|
||||
D3DBlob* blob)
|
||||
{
|
||||
auto iter = vshaders.find(uid);
|
||||
if (iter != vshaders.end() && !iter->second.pending)
|
||||
return false;
|
||||
|
||||
// TODO: Somehow make the debug name a bit more specific
|
||||
D3D::SetDebugObjectName((ID3D11DeviceChild*)shader, "a vertex shader of VertexShaderCache");
|
||||
VSCacheEntry& newentry = vshaders[uid];
|
||||
newentry.pending = false;
|
||||
if (!shader || !blob)
|
||||
return false;
|
||||
|
||||
// Make an entry in the table
|
||||
VSCacheEntry entry;
|
||||
entry.shader = shader;
|
||||
entry.SetByteCode(bcodeblob);
|
||||
shader->AddRef();
|
||||
newentry.SetByteCode(blob);
|
||||
newentry.shader = shader;
|
||||
|
||||
vshaders[uid] = entry;
|
||||
last_entry = &vshaders[uid];
|
||||
INCSTAT(stats.numPixelShadersCreated);
|
||||
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(vshaders.size()));
|
||||
return true;
|
||||
}
|
||||
|
||||
INCSTAT(stats.numVertexShadersCreated);
|
||||
SETSTAT(stats.numVertexShadersAlive, (int)vshaders.size());
|
||||
bool VertexShaderCache::InsertShader(const UberShader::VertexShaderUid& uid,
|
||||
ID3D11VertexShader* shader, D3DBlob* blob)
|
||||
{
|
||||
auto iter = ubervshaders.find(uid);
|
||||
if (iter != ubervshaders.end() && !iter->second.pending)
|
||||
return false;
|
||||
|
||||
VSCacheEntry& newentry = ubervshaders[uid];
|
||||
newentry.pending = false;
|
||||
if (!shader || !blob)
|
||||
return false;
|
||||
|
||||
shader->AddRef();
|
||||
newentry.SetByteCode(blob);
|
||||
newentry.shader = shader;
|
||||
return true;
|
||||
}
|
||||
|
||||
void VertexShaderCache::RetreiveAsyncShaders()
|
||||
{
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
}
|
||||
|
||||
void VertexShaderCache::QueueUberShaderCompiles()
|
||||
{
|
||||
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& uid) {
|
||||
if (ubervshaders.find(uid) != ubervshaders.end())
|
||||
return;
|
||||
|
||||
g_async_compiler->QueueWorkItem(
|
||||
g_async_compiler->CreateWorkItem<UberVertexShaderCompilerWorkItem>(uid));
|
||||
});
|
||||
}
|
||||
|
||||
void VertexShaderCache::WaitForBackgroundCompilesToComplete()
|
||||
{
|
||||
g_async_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
|
||||
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
|
||||
static_cast<int>(completed), static_cast<int>(total));
|
||||
});
|
||||
g_async_compiler->RetrieveWorkItems();
|
||||
Host_UpdateProgressDialog("", -1, -1);
|
||||
|
||||
// Switch from precompile -> runtime compiler threads.
|
||||
g_async_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
}
|
||||
|
||||
VertexShaderCache::VertexShaderCompilerWorkItem::VertexShaderCompilerWorkItem(
|
||||
const VertexShaderUid& uid)
|
||||
{
|
||||
std::memcpy(&m_uid, &uid, sizeof(uid));
|
||||
}
|
||||
|
||||
VertexShaderCache::VertexShaderCompilerWorkItem::~VertexShaderCompilerWorkItem()
|
||||
{
|
||||
SAFE_RELEASE(m_bytecode);
|
||||
SAFE_RELEASE(m_vs);
|
||||
}
|
||||
|
||||
bool VertexShaderCache::VertexShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
GenerateVertexShaderCode(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
|
||||
if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
|
||||
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VertexShaderCache::VertexShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
if (InsertShader(m_uid, m_vs, m_bytecode))
|
||||
g_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
|
||||
}
|
||||
|
||||
VertexShaderCache::UberVertexShaderCompilerWorkItem::UberVertexShaderCompilerWorkItem(
|
||||
const UberShader::VertexShaderUid& uid)
|
||||
{
|
||||
std::memcpy(&m_uid, &uid, sizeof(uid));
|
||||
}
|
||||
|
||||
VertexShaderCache::UberVertexShaderCompilerWorkItem::~UberVertexShaderCompilerWorkItem()
|
||||
{
|
||||
SAFE_RELEASE(m_bytecode);
|
||||
SAFE_RELEASE(m_vs);
|
||||
}
|
||||
|
||||
bool VertexShaderCache::UberVertexShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
UberShader::GenVertexShader(APIType::D3D, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
|
||||
if (D3D::CompileVertexShader(code.GetBuffer(), &m_bytecode))
|
||||
m_vs = D3D::CreateVertexShaderFromByteCode(m_bytecode);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VertexShaderCache::UberVertexShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
if (InsertShader(m_uid, m_vs, m_bytecode))
|
||||
g_uber_vs_disk_cache.Append(m_uid, m_bytecode->Data(), m_bytecode->Size());
|
||||
}
|
||||
|
||||
} // namespace DX11
|
||||
|
|
|
@ -9,10 +9,14 @@
|
|||
#include "VideoBackends/D3D/D3DBase.h"
|
||||
#include "VideoBackends/D3D/D3DBlob.h"
|
||||
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
|
||||
namespace DX11
|
||||
{
|
||||
class D3DVertexFormat;
|
||||
|
||||
class VertexShaderCache
|
||||
{
|
||||
public:
|
||||
|
@ -20,10 +24,12 @@ public:
|
|||
static void Reload();
|
||||
static void Clear();
|
||||
static void Shutdown();
|
||||
static bool SetShader(); // TODO: Should be renamed to LoadShader
|
||||
static bool SetShader(D3DVertexFormat* vertex_format);
|
||||
static bool SetUberShader(D3DVertexFormat* vertex_format);
|
||||
static void RetreiveAsyncShaders();
|
||||
static void QueueUberShaderCompiles();
|
||||
static void WaitForBackgroundCompilesToComplete();
|
||||
|
||||
static ID3D11VertexShader* GetActiveShader() { return last_entry->shader; }
|
||||
static D3DBlob* GetActiveShaderBytecode() { return last_entry->bytecode; }
|
||||
static ID3D11Buffer*& GetConstantBuffer();
|
||||
|
||||
static ID3D11VertexShader* GetSimpleVertexShader();
|
||||
|
@ -31,15 +37,20 @@ public:
|
|||
static ID3D11InputLayout* GetSimpleInputLayout();
|
||||
static ID3D11InputLayout* GetClearInputLayout();
|
||||
|
||||
static bool InsertByteCode(const VertexShaderUid& uid, D3DBlob* bcodeblob);
|
||||
static bool InsertByteCode(const VertexShaderUid& uid, D3DBlob* blob);
|
||||
static bool InsertByteCode(const UberShader::VertexShaderUid& uid, D3DBlob* blob);
|
||||
static bool InsertShader(const VertexShaderUid& uid, ID3D11VertexShader* shader, D3DBlob* blob);
|
||||
static bool InsertShader(const UberShader::VertexShaderUid& uid, ID3D11VertexShader* shader,
|
||||
D3DBlob* blob);
|
||||
|
||||
private:
|
||||
struct VSCacheEntry
|
||||
{
|
||||
ID3D11VertexShader* shader;
|
||||
D3DBlob* bytecode; // needed to initialize the input layout
|
||||
bool pending;
|
||||
|
||||
VSCacheEntry() : shader(nullptr), bytecode(nullptr) {}
|
||||
VSCacheEntry() : shader(nullptr), bytecode(nullptr), pending(false) {}
|
||||
void SetByteCode(D3DBlob* blob)
|
||||
{
|
||||
SAFE_RELEASE(bytecode);
|
||||
|
@ -52,13 +63,49 @@ private:
|
|||
SAFE_RELEASE(bytecode);
|
||||
}
|
||||
};
|
||||
|
||||
class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
VertexShaderCompilerWorkItem(const VertexShaderUid& uid);
|
||||
~VertexShaderCompilerWorkItem() override;
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
VertexShaderUid m_uid;
|
||||
D3DBlob* m_bytecode = nullptr;
|
||||
ID3D11VertexShader* m_vs = nullptr;
|
||||
};
|
||||
|
||||
class UberVertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
UberVertexShaderCompilerWorkItem(const UberShader::VertexShaderUid& uid);
|
||||
~UberVertexShaderCompilerWorkItem() override;
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
UberShader::VertexShaderUid m_uid;
|
||||
D3DBlob* m_bytecode = nullptr;
|
||||
ID3D11VertexShader* m_vs = nullptr;
|
||||
};
|
||||
|
||||
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
|
||||
typedef std::map<UberShader::VertexShaderUid, VSCacheEntry> UberVSCache;
|
||||
|
||||
static void LoadShaderCache();
|
||||
static void SetInputLayout();
|
||||
|
||||
static VSCache vshaders;
|
||||
static UberVSCache ubervshaders;
|
||||
static const VSCacheEntry* last_entry;
|
||||
static const VSCacheEntry* last_uber_entry;
|
||||
static VertexShaderUid last_uid;
|
||||
static UberShader::VertexShaderUid last_uber_uid;
|
||||
};
|
||||
|
||||
} // namespace DX11
|
||||
|
|
|
@ -78,6 +78,8 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
g_Config.backend_info.bSupportsST3CTextures = false;
|
||||
g_Config.backend_info.bSupportsBitfield = false;
|
||||
g_Config.backend_info.bSupportsDynamicSamplerIndexing = false;
|
||||
|
||||
IDXGIFactory* factory;
|
||||
IDXGIAdapter* ad;
|
||||
|
@ -159,6 +161,7 @@ void VideoBackend::Video_Prepare()
|
|||
VertexShaderCache::Init();
|
||||
PixelShaderCache::Init();
|
||||
GeometryShaderCache::Init();
|
||||
VertexShaderCache::WaitForBackgroundCompilesToComplete();
|
||||
D3D::InitUtils();
|
||||
BBox::Init();
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ class NullNativeVertexFormat : public NativeVertexFormat
|
|||
{
|
||||
public:
|
||||
NullNativeVertexFormat() {}
|
||||
void SetupVertexPointers() override {}
|
||||
};
|
||||
|
||||
std::unique_ptr<NativeVertexFormat>
|
||||
|
|
|
@ -57,6 +57,7 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl)
|
|||
|
||||
glGenVertexArrays(1, &VAO);
|
||||
glBindVertexArray(VAO);
|
||||
ProgramShaderCache::BindVertexFormat(this);
|
||||
|
||||
// the element buffer is bound directly to the vao, so we must it set for every vao
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->m_index_buffers);
|
||||
|
@ -74,16 +75,10 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl)
|
|||
SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, _vtx_decl.texcoords[i]);
|
||||
|
||||
SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, _vtx_decl.posmtx);
|
||||
|
||||
vm->m_last_vao = VAO;
|
||||
}
|
||||
|
||||
GLVertexFormat::~GLVertexFormat()
|
||||
{
|
||||
glDeleteVertexArrays(1, &VAO);
|
||||
}
|
||||
|
||||
void GLVertexFormat::SetupVertexPointers()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,17 +4,25 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "Common/GL/GLUtil.h"
|
||||
#include "Common/LinearDiskCache.h"
|
||||
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/GeometryShaderGen.h"
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
|
||||
class cInterfaceBase;
|
||||
|
||||
namespace OGL
|
||||
{
|
||||
class GLVertexFormat;
|
||||
|
||||
class SHADERUID
|
||||
{
|
||||
public:
|
||||
|
@ -24,30 +32,53 @@ public:
|
|||
|
||||
bool operator<(const SHADERUID& r) const
|
||||
{
|
||||
return std::tie(puid, vuid, guid) < std::tie(r.puid, r.vuid, r.guid);
|
||||
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
|
||||
}
|
||||
|
||||
bool operator==(const SHADERUID& r) const
|
||||
{
|
||||
return std::tie(puid, vuid, guid) == std::tie(r.puid, r.vuid, r.guid);
|
||||
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
|
||||
}
|
||||
};
|
||||
class UBERSHADERUID
|
||||
{
|
||||
public:
|
||||
UberShader::VertexShaderUid vuid;
|
||||
UberShader::PixelShaderUid puid;
|
||||
GeometryShaderUid guid;
|
||||
|
||||
bool operator<(const UBERSHADERUID& r) const
|
||||
{
|
||||
return std::tie(vuid, puid, guid) < std::tie(r.vuid, r.puid, r.guid);
|
||||
}
|
||||
|
||||
bool operator==(const UBERSHADERUID& r) const
|
||||
{
|
||||
return std::tie(vuid, puid, guid) == std::tie(r.vuid, r.puid, r.guid);
|
||||
}
|
||||
};
|
||||
|
||||
struct SHADER
|
||||
{
|
||||
SHADER() : glprogid(0) {}
|
||||
void Destroy()
|
||||
{
|
||||
glDeleteProgram(glprogid);
|
||||
glprogid = 0;
|
||||
DestroyShaders();
|
||||
if (glprogid)
|
||||
{
|
||||
glDeleteProgram(glprogid);
|
||||
glprogid = 0;
|
||||
}
|
||||
}
|
||||
GLuint glprogid; // OpenGL program id
|
||||
|
||||
std::string strvprog, strpprog, strgprog;
|
||||
GLuint vsid = 0;
|
||||
GLuint gsid = 0;
|
||||
GLuint psid = 0;
|
||||
GLuint glprogid = 0;
|
||||
|
||||
void SetProgramVariables();
|
||||
void SetProgramBindings(bool is_compute);
|
||||
void Bind() const;
|
||||
void DestroyShaders();
|
||||
};
|
||||
|
||||
class ProgramShaderCache
|
||||
|
@ -57,43 +88,126 @@ public:
|
|||
{
|
||||
SHADER shader;
|
||||
bool in_cache;
|
||||
bool pending;
|
||||
|
||||
void Destroy() { shader.Destroy(); }
|
||||
};
|
||||
|
||||
static PCacheEntry GetShaderProgram();
|
||||
static SHADER* SetShader(u32 primitive_type);
|
||||
static void GetShaderId(SHADERUID* uid, u32 primitive_type);
|
||||
static SHADER* SetShader(u32 primitive_type, const GLVertexFormat* vertex_format);
|
||||
static SHADER* SetUberShader(u32 primitive_type, const GLVertexFormat* vertex_format);
|
||||
static void BindVertexFormat(const GLVertexFormat* vertex_format);
|
||||
static void InvalidateVertexFormat();
|
||||
static void BindLastVertexFormat();
|
||||
|
||||
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
|
||||
const std::string& gcode = "");
|
||||
static bool CompileComputeShader(SHADER& shader, const std::string& code);
|
||||
static GLuint CompileSingleShader(GLuint type, const std::string& code);
|
||||
static GLuint CompileSingleShader(GLenum type, const std::string& code);
|
||||
static bool CheckShaderCompileResult(GLuint id, GLenum type, const std::string& code);
|
||||
static bool CheckProgramLinkResult(GLuint id, const std::string& vcode, const std::string& pcode,
|
||||
const std::string& gcode);
|
||||
static void UploadConstants();
|
||||
|
||||
static void Init();
|
||||
static void Reload();
|
||||
static void Shutdown();
|
||||
static void CreateHeader();
|
||||
static void RetrieveAsyncShaders();
|
||||
static void PrecompileUberShaders();
|
||||
|
||||
private:
|
||||
class ProgramShaderCacheInserter : public LinearDiskCacheReader<SHADERUID, u8>
|
||||
template <typename UIDType>
|
||||
class ProgramShaderCacheInserter : public LinearDiskCacheReader<UIDType, u8>
|
||||
{
|
||||
public:
|
||||
void Read(const SHADERUID& key, const u8* value, u32 value_size) override;
|
||||
ProgramShaderCacheInserter(std::map<UIDType, PCacheEntry>& shader_map)
|
||||
: m_shader_map(shader_map)
|
||||
{
|
||||
}
|
||||
|
||||
void Read(const UIDType& key, const u8* value, u32 value_size) override
|
||||
{
|
||||
if (m_shader_map.find(key) != m_shader_map.end())
|
||||
return;
|
||||
|
||||
PCacheEntry& entry = m_shader_map[key];
|
||||
if (!CreateCacheEntryFromBinary(&entry, value, value_size))
|
||||
{
|
||||
m_shader_map.erase(key);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<UIDType, PCacheEntry>& m_shader_map;
|
||||
};
|
||||
|
||||
class SharedContextAsyncShaderCompiler : public VideoCommon::AsyncShaderCompiler
|
||||
{
|
||||
protected:
|
||||
virtual bool WorkerThreadInitMainThread(void** param) override;
|
||||
virtual bool WorkerThreadInitWorkerThread(void* param) override;
|
||||
virtual void WorkerThreadExit(void* param) override;
|
||||
};
|
||||
|
||||
struct SharedContextData
|
||||
{
|
||||
std::unique_ptr<cInterfaceBase> context;
|
||||
GLuint prerender_VBO;
|
||||
GLuint prerender_VAO;
|
||||
GLuint prerender_IBO;
|
||||
};
|
||||
|
||||
class ShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
ShaderCompileWorkItem(const SHADERUID& uid);
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
SHADERUID m_uid;
|
||||
SHADER m_program;
|
||||
};
|
||||
|
||||
class UberShaderCompileWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
UberShaderCompileWorkItem(const UBERSHADERUID& uid);
|
||||
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
UBERSHADERUID m_uid;
|
||||
SHADER m_program;
|
||||
};
|
||||
|
||||
typedef std::map<SHADERUID, PCacheEntry> PCache;
|
||||
typedef std::map<UBERSHADERUID, PCacheEntry> UberPCache;
|
||||
|
||||
static GLuint CreateProgramFromBinary(const u8* value, u32 value_size);
|
||||
static bool CreateCacheEntryFromBinary(PCacheEntry* entry, const u8* value, u32 value_size);
|
||||
static void LoadProgramBinaries();
|
||||
static void SaveProgramBinaries();
|
||||
static void DestroyShaders();
|
||||
static void CreatePrerenderArrays(SharedContextData* data);
|
||||
static void DestroyPrerenderArrays(SharedContextData* data);
|
||||
static void DrawPrerenderArray(const SHADER& shader, u32 primitive_type);
|
||||
|
||||
typedef std::map<SHADERUID, PCacheEntry> PCache;
|
||||
static PCache pshaders;
|
||||
static UberPCache ubershaders;
|
||||
static PCacheEntry* last_entry;
|
||||
static PCacheEntry* last_uber_entry;
|
||||
static SHADERUID last_uid;
|
||||
static UBERSHADERUID last_uber_uid;
|
||||
|
||||
static std::unique_ptr<SharedContextAsyncShaderCompiler> s_async_compiler;
|
||||
static u32 s_ubo_buffer_size;
|
||||
static s32 s_ubo_align;
|
||||
static u32 s_last_VAO;
|
||||
};
|
||||
|
||||
} // namespace OGL
|
||||
|
|
|
@ -119,11 +119,11 @@ static const u8 rasters[CHARACTER_COUNT][CHARACTER_HEIGHT] = {
|
|||
static const char* s_vertexShaderSrc = "uniform vec2 charSize;\n"
|
||||
"uniform vec2 offset;"
|
||||
"in vec2 rawpos;\n"
|
||||
"in vec2 tex0;\n"
|
||||
"in vec2 rawtex0;\n"
|
||||
"out vec2 uv0;\n"
|
||||
"void main(void) {\n"
|
||||
" gl_Position = vec4(rawpos + offset,0,1);\n"
|
||||
" uv0 = tex0 * charSize;\n"
|
||||
" uv0 = rawtex0 * charSize;\n"
|
||||
"}\n";
|
||||
|
||||
static const char* s_fragmentShaderSrc = "SAMPLER_BINDING(8) uniform sampler2D samp8;\n"
|
||||
|
|
|
@ -447,6 +447,12 @@ Renderer::Renderer()
|
|||
// Clip distance support is useless without a method to clamp the depth range
|
||||
g_Config.backend_info.bSupportsDepthClamp = GLExtensions::Supports("GL_ARB_depth_clamp");
|
||||
|
||||
// Desktop OpenGL supports bitfield manulipation and dynamic sampler indexing if it supports
|
||||
// shader5. OpenGL ES 3.1 supports it implicitly without an extension
|
||||
g_Config.backend_info.bSupportsBitfield = GLExtensions::Supports("GL_ARB_gpu_shader5");
|
||||
g_Config.backend_info.bSupportsDynamicSamplerIndexing =
|
||||
GLExtensions::Supports("GL_ARB_gpu_shader5");
|
||||
|
||||
g_ogl_config.bSupportsGLSLCache = GLExtensions::Supports("GL_ARB_get_program_binary");
|
||||
g_ogl_config.bSupportsGLPinnedMemory = GLExtensions::Supports("GL_AMD_pinned_memory");
|
||||
g_ogl_config.bSupportsGLSync = GLExtensions::Supports("GL_ARB_sync");
|
||||
|
@ -515,6 +521,8 @@ Renderer::Renderer()
|
|||
g_ogl_config.bSupportsMSAA = true;
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupports2DTextureStorageMultisample = true;
|
||||
g_Config.backend_info.bSupportsBitfield = true;
|
||||
g_Config.backend_info.bSupportsDynamicSamplerIndexing = g_ogl_config.bSupportsAEP;
|
||||
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
|
||||
!g_ogl_config.bSupports3DTextureStorageMultisample)
|
||||
{
|
||||
|
@ -542,6 +550,8 @@ Renderer::Renderer()
|
|||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupports2DTextureStorageMultisample = true;
|
||||
g_ogl_config.bSupports3DTextureStorageMultisample = true;
|
||||
g_Config.backend_info.bSupportsBitfield = true;
|
||||
g_Config.backend_info.bSupportsDynamicSamplerIndexing = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1462,6 +1472,7 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight,
|
|||
|
||||
// Clean out old stuff from caches. It's not worth it to clean out the shader caches.
|
||||
g_texture_cache->Cleanup(frameCount);
|
||||
ProgramShaderCache::RetrieveAsyncShaders();
|
||||
|
||||
// Render to the framebuffer.
|
||||
FramebufferManager::SetFramebuffer(0);
|
||||
|
@ -1758,10 +1769,9 @@ void Renderer::RestoreAPIState()
|
|||
SetBlendMode(true);
|
||||
SetViewport();
|
||||
|
||||
ProgramShaderCache::BindLastVertexFormat();
|
||||
const VertexManager* const vm = static_cast<VertexManager*>(g_vertex_manager.get());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vm->m_vertex_buffers);
|
||||
if (vm->m_last_vao)
|
||||
glBindVertexArray(vm->m_last_vao);
|
||||
|
||||
OGLTexture::SetStage();
|
||||
}
|
||||
|
|
|
@ -58,6 +58,7 @@ struct VideoConfig
|
|||
bool bSupportsConservativeDepth;
|
||||
bool bSupportsImageLoadStore;
|
||||
bool bSupportsAniso;
|
||||
bool bSupportsBitfield;
|
||||
|
||||
const char* gl_vendor;
|
||||
const char* gl_renderer;
|
||||
|
|
|
@ -53,8 +53,6 @@ void VertexManager::CreateDeviceObjects()
|
|||
|
||||
s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
|
||||
m_index_buffers = s_indexBuffer->m_buffer;
|
||||
|
||||
m_last_vao = 0;
|
||||
}
|
||||
|
||||
void VertexManager::DestroyDeviceObjects()
|
||||
|
@ -142,22 +140,13 @@ void VertexManager::vFlush()
|
|||
GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
|
||||
u32 stride = nativeVertexFmt->GetVertexStride();
|
||||
|
||||
if (m_last_vao != nativeVertexFmt->VAO)
|
||||
{
|
||||
glBindVertexArray(nativeVertexFmt->VAO);
|
||||
m_last_vao = nativeVertexFmt->VAO;
|
||||
}
|
||||
ProgramShaderCache::SetShader(m_current_primitive_type, nativeVertexFmt);
|
||||
|
||||
PrepareDrawBuffers(stride);
|
||||
|
||||
ProgramShaderCache::SetShader(m_current_primitive_type);
|
||||
|
||||
// upload global constants
|
||||
ProgramShaderCache::UploadConstants();
|
||||
|
||||
// setup the pointers
|
||||
nativeVertexFmt->SetupVertexPointers();
|
||||
|
||||
if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation())
|
||||
{
|
||||
glEnable(GL_STENCIL_TEST);
|
||||
|
@ -171,24 +160,6 @@ void VertexManager::vFlush()
|
|||
glDisable(GL_STENCIL_TEST);
|
||||
}
|
||||
|
||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
|
||||
{
|
||||
// save the shaders
|
||||
ProgramShaderCache::PCacheEntry prog = ProgramShaderCache::GetShaderProgram();
|
||||
std::string filename = StringFromFormat(
|
||||
"%sps%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId);
|
||||
std::ofstream fps;
|
||||
File::OpenFStream(fps, filename, std::ios_base::out);
|
||||
fps << prog.shader.strpprog;
|
||||
|
||||
filename = StringFromFormat("%svs%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(),
|
||||
g_ActiveConfig.iSaveTargetId);
|
||||
std::ofstream fvs;
|
||||
File::OpenFStream(fvs, filename, std::ios_base::out);
|
||||
fvs << prog.shader.strvprog;
|
||||
}
|
||||
#endif
|
||||
g_Config.iSaveTargetId++;
|
||||
ClearEFBCache();
|
||||
}
|
||||
|
|
|
@ -20,8 +20,6 @@ public:
|
|||
GLVertexFormat(const PortableVertexDeclaration& vtx_decl);
|
||||
~GLVertexFormat();
|
||||
|
||||
void SetupVertexPointers() override;
|
||||
|
||||
GLuint VAO;
|
||||
};
|
||||
|
||||
|
@ -42,7 +40,6 @@ public:
|
|||
// NativeVertexFormat use this
|
||||
GLuint m_vertex_buffers;
|
||||
GLuint m_index_buffers;
|
||||
GLuint m_last_vao;
|
||||
|
||||
protected:
|
||||
void ResetBuffer(u32 stride) override;
|
||||
|
|
|
@ -30,7 +30,6 @@ class NullNativeVertexFormat : public NativeVertexFormat
|
|||
{
|
||||
public:
|
||||
NullNativeVertexFormat(const PortableVertexDeclaration& _vtx_decl) { vtx_decl = _vtx_decl; }
|
||||
void SetupVertexPointers() override {}
|
||||
};
|
||||
|
||||
std::unique_ptr<NativeVertexFormat>
|
||||
|
|
|
@ -769,7 +769,7 @@ void Tev::Draw()
|
|||
// - scaling of the "k" coefficient isn't clear either.
|
||||
|
||||
// First, calculate the offset from the viewport center (normalized to 0..1)
|
||||
float offset = (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center) - 342)) /
|
||||
float offset = (Position[0] - (static_cast<s32>(bpmem.fogRange.Base.Center.Value()) - 342)) /
|
||||
static_cast<float>(xfmem.viewport.wd);
|
||||
|
||||
// Based on that, choose the index such that points which are far away from the z-axis use the
|
||||
|
|
|
@ -443,7 +443,7 @@ void TransformTexCoord(const InputVertexData* src, OutputVertexData* dst, bool s
|
|||
dst->texCoords[coordNum].z = 1.0f;
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype);
|
||||
ERROR_LOG(VIDEO, "Bad tex gen type %i", texinfo.texgentype.Value());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "Core/ConfigManager.h"
|
||||
|
||||
#include "VideoBackends/Vulkan/CommandBufferManager.h"
|
||||
#include "VideoBackends/Vulkan/ShaderCompiler.h"
|
||||
#include "VideoBackends/Vulkan/StreamBuffer.h"
|
||||
#include "VideoBackends/Vulkan/Util.h"
|
||||
|
@ -59,6 +60,19 @@ bool ObjectCache::Initialize()
|
|||
if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer)
|
||||
return false;
|
||||
|
||||
m_dummy_texture = Texture2D::Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT,
|
||||
VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_LINEAR,
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
|
||||
m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
VkClearColorValue clear_color = {};
|
||||
VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
|
||||
vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
|
||||
m_dummy_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
&clear_color, 1, &clear_range);
|
||||
m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(),
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -99,17 +113,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
|
|||
{UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
|
||||
VK_SHADER_STAGE_GEOMETRY_BIT}};
|
||||
|
||||
// Annoying these have to be split, apparently we can't partially update an array without the
|
||||
// validation layers throwing a warning.
|
||||
static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = {
|
||||
{0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
{7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}};
|
||||
{0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT}};
|
||||
|
||||
static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = {
|
||||
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}};
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "Common/LinearDiskCache.h"
|
||||
|
||||
#include "VideoBackends/Vulkan/Constants.h"
|
||||
#include "VideoBackends/Vulkan/Texture2D.h"
|
||||
|
||||
#include "VideoCommon/GeometryShaderGen.h"
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
|
@ -62,6 +63,9 @@ public:
|
|||
VkSampler GetLinearSampler() const { return m_linear_sampler; }
|
||||
VkSampler GetSampler(const SamplerState& info);
|
||||
|
||||
// Dummy image for samplers that are unbound
|
||||
Texture2D* GetDummyImage() const { return m_dummy_texture.get(); }
|
||||
VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); }
|
||||
// Perform at startup, create descriptor layouts, compiles all static shaders.
|
||||
bool Initialize();
|
||||
|
||||
|
@ -89,6 +93,9 @@ private:
|
|||
VkSampler m_linear_sampler = VK_NULL_HANDLE;
|
||||
|
||||
std::map<SamplerState, VkSampler> m_sampler_cache;
|
||||
|
||||
// Dummy image for samplers that are unbound
|
||||
std::unique_ptr<Texture2D> m_dummy_texture;
|
||||
};
|
||||
|
||||
extern std::unique_ptr<ObjectCache> g_object_cache;
|
||||
|
|
|
@ -149,7 +149,7 @@ static const std::string DEFAULT_FRAGMENT_SHADER_SOURCE = R"(
|
|||
|
||||
static const std::string POSTPROCESSING_SHADER_HEADER = R"(
|
||||
SAMPLER_BINDING(0) uniform sampler2DArray samp0;
|
||||
SAMPLER_BINDING(1) uniform sampler2D samp1;
|
||||
SAMPLER_BINDING(1) uniform sampler2DArray samp1;
|
||||
|
||||
layout(location = 0) in float3 uv0;
|
||||
layout(location = 1) in float4 col0;
|
||||
|
@ -176,7 +176,7 @@ static const std::string POSTPROCESSING_SHADER_HEADER = R"(
|
|||
|
||||
float4 SampleFontLocation(float2 location)
|
||||
{
|
||||
return texture(samp1, location);
|
||||
return texture(samp1, float3(location, 0.0));
|
||||
}
|
||||
|
||||
float2 GetResolution()
|
||||
|
|
|
@ -150,7 +150,7 @@ layout(std140, push_constant) uniform PCBlock {
|
|||
vec4 color;
|
||||
} PC;
|
||||
|
||||
layout(set = 1, binding = 0) uniform sampler2D samp0;
|
||||
layout(set = 1, binding = 0) uniform sampler2DArray samp0;
|
||||
|
||||
layout(location = 0) in vec2 uv0;
|
||||
|
||||
|
@ -158,7 +158,7 @@ layout(location = 0) out vec4 ocol0;
|
|||
|
||||
void main()
|
||||
{
|
||||
ocol0 = texture(samp0, uv0) * PC.color;
|
||||
ocol0 = texture(samp0, float3(uv0, 0.0)) * PC.color;
|
||||
}
|
||||
|
||||
)";
|
||||
|
@ -209,7 +209,7 @@ bool RasterFont::CreateTexture()
|
|||
// create the actual texture object
|
||||
m_texture = Texture2D::Create(CHARACTER_WIDTH * CHARACTER_COUNT, CHARACTER_HEIGHT, 1, 1,
|
||||
VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT,
|
||||
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
|
||||
VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
|
||||
if (!m_texture)
|
||||
return false;
|
||||
|
|
|
@ -113,9 +113,6 @@ bool Renderer::Initialize()
|
|||
m_bounding_box->GetGPUBufferSize());
|
||||
}
|
||||
|
||||
// Ensure all pipelines previously used by the game have been created.
|
||||
StateTracker::GetInstance()->ReloadPipelineUIDCache();
|
||||
|
||||
// Initialize post processing.
|
||||
m_post_processor = std::make_unique<VulkanPostProcessing>();
|
||||
if (!static_cast<VulkanPostProcessing*>(m_post_processor.get())
|
||||
|
@ -589,6 +586,9 @@ void Renderer::SwapImpl(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height
|
|||
|
||||
// Clean up stale textures.
|
||||
TextureCache::GetInstance()->Cleanup(frameCount);
|
||||
|
||||
// Pull in now-ready async shaders.
|
||||
g_shader_cache->RetrieveAsyncShaders();
|
||||
}
|
||||
|
||||
void Renderer::TransitionBuffersForSwap(const TargetRectangle& scaled_rect,
|
||||
|
@ -1132,6 +1132,8 @@ void Renderer::CheckForConfigChanges()
|
|||
bool old_force_filtering = g_ActiveConfig.bForceFiltering;
|
||||
bool old_use_xfb = g_ActiveConfig.bUseXFB;
|
||||
bool old_use_realxfb = g_ActiveConfig.bUseRealXFB;
|
||||
bool old_vertex_ubershaders = g_ActiveConfig.bForceVertexUberShaders;
|
||||
bool old_pixel_ubershaders = g_ActiveConfig.bForcePixelUberShaders;
|
||||
|
||||
// Copy g_Config to g_ActiveConfig.
|
||||
// NOTE: This can potentially race with the UI thread, however if it does, the changes will be
|
||||
|
@ -1145,6 +1147,8 @@ void Renderer::CheckForConfigChanges()
|
|||
bool aspect_changed = old_aspect_ratio != g_ActiveConfig.iAspectRatio;
|
||||
bool use_xfb_changed = old_use_xfb != g_ActiveConfig.bUseXFB;
|
||||
bool use_realxfb_changed = old_use_realxfb != g_ActiveConfig.bUseRealXFB;
|
||||
bool ubershaders_changed = old_vertex_ubershaders != g_ActiveConfig.bForceVertexUberShaders ||
|
||||
old_pixel_ubershaders != g_ActiveConfig.bForcePixelUberShaders;
|
||||
|
||||
// Update texture cache settings with any changed options.
|
||||
TextureCache::GetInstance()->OnConfigChanged(g_ActiveConfig);
|
||||
|
@ -1190,6 +1194,10 @@ void Renderer::CheckForConfigChanges()
|
|||
if (anisotropy_changed || force_texture_filtering_changed)
|
||||
ResetSamplerStates();
|
||||
|
||||
// Clear UID state if ubershaders are toggled.
|
||||
if (ubershaders_changed)
|
||||
StateTracker::GetInstance()->ClearShaders();
|
||||
|
||||
// Check for a changed post-processing shader and recompile if needed.
|
||||
static_cast<VulkanPostProcessing*>(m_post_processor.get())->UpdateConfig();
|
||||
}
|
||||
|
|
|
@ -15,13 +15,20 @@
|
|||
#include "Common/MsgHandler.h"
|
||||
|
||||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Host.h"
|
||||
|
||||
#include "VideoBackends/Vulkan/FramebufferManager.h"
|
||||
#include "VideoBackends/Vulkan/ShaderCompiler.h"
|
||||
#include "VideoBackends/Vulkan/StreamBuffer.h"
|
||||
#include "VideoBackends/Vulkan/Util.h"
|
||||
#include "VideoBackends/Vulkan/VertexFormat.h"
|
||||
#include "VideoBackends/Vulkan/VulkanContext.h"
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/GeometryShaderGen.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
|
||||
namespace Vulkan
|
||||
{
|
||||
|
@ -55,9 +62,22 @@ bool ShaderCache::Initialize()
|
|||
if (!CompileSharedShaders())
|
||||
return false;
|
||||
|
||||
m_async_shader_compiler = std::make_unique<VideoCommon::AsyncShaderCompiler>();
|
||||
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.CanPrecompileUberShaders() ?
|
||||
g_ActiveConfig.GetShaderPrecompilerThreads() :
|
||||
g_ActiveConfig.GetShaderCompilerThreads());
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::Shutdown()
|
||||
{
|
||||
if (m_async_shader_compiler)
|
||||
{
|
||||
m_async_shader_compiler->StopWorkerThreads();
|
||||
m_async_shader_compiler->RetrieveWorkItems();
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology)
|
||||
{
|
||||
return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP ||
|
||||
|
@ -365,13 +385,34 @@ std::pair<VkPipeline, bool> ShaderCache::GetPipelineWithCacheResult(const Pipeli
|
|||
{
|
||||
auto iter = m_pipeline_objects.find(info);
|
||||
if (iter != m_pipeline_objects.end())
|
||||
return {iter->second, true};
|
||||
{
|
||||
// If it's background compiling, ignore it, and recompile it synchronously.
|
||||
if (!iter->second.second)
|
||||
return std::make_pair(iter->second.first, true);
|
||||
else
|
||||
m_pipeline_objects.erase(iter);
|
||||
}
|
||||
|
||||
VkPipeline pipeline = CreatePipeline(info);
|
||||
m_pipeline_objects.emplace(info, pipeline);
|
||||
m_pipeline_objects.emplace(info, std::make_pair(pipeline, false));
|
||||
_assert_(pipeline != VK_NULL_HANDLE);
|
||||
return {pipeline, false};
|
||||
}
|
||||
|
||||
std::pair<std::pair<VkPipeline, bool>, bool>
|
||||
ShaderCache::GetPipelineWithCacheResultAsync(const PipelineInfo& info)
|
||||
{
|
||||
auto iter = m_pipeline_objects.find(info);
|
||||
if (iter != m_pipeline_objects.end())
|
||||
return std::make_pair(iter->second, true);
|
||||
|
||||
// Kick a job off.
|
||||
m_async_shader_compiler->QueueWorkItem(
|
||||
m_async_shader_compiler->CreateWorkItem<PipelineCompilerWorkItem>(info));
|
||||
m_pipeline_objects.emplace(info, std::make_pair(static_cast<VkPipeline>(VK_NULL_HANDLE), true));
|
||||
return std::make_pair(std::make_pair(static_cast<VkPipeline>(VK_NULL_HANDLE), true), false);
|
||||
}
|
||||
|
||||
VkPipeline ShaderCache::CreateComputePipeline(const ComputePipelineInfo& info)
|
||||
{
|
||||
VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
|
@ -409,10 +450,11 @@ VkPipeline ShaderCache::GetComputePipeline(const ComputePipelineInfo& info)
|
|||
|
||||
void ShaderCache::ClearPipelineCache()
|
||||
{
|
||||
// TODO: Stop any async compiling happening.
|
||||
for (const auto& it : m_pipeline_objects)
|
||||
{
|
||||
if (it.second != VK_NULL_HANDLE)
|
||||
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
|
||||
if (it.second.first != VK_NULL_HANDLE)
|
||||
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second.first, nullptr);
|
||||
}
|
||||
m_pipeline_objects.clear();
|
||||
|
||||
|
@ -620,7 +662,10 @@ void ShaderCache::SavePipelineCache()
|
|||
template <typename Uid>
|
||||
struct ShaderCacheReader : public LinearDiskCacheReader<Uid, u32>
|
||||
{
|
||||
ShaderCacheReader(std::map<Uid, VkShaderModule>& shader_map) : m_shader_map(shader_map) {}
|
||||
ShaderCacheReader(std::map<Uid, std::pair<VkShaderModule, bool>>& shader_map)
|
||||
: m_shader_map(shader_map)
|
||||
{
|
||||
}
|
||||
void Read(const Uid& key, const u32* value, u32 value_size) override
|
||||
{
|
||||
// We don't insert null modules into the shader map since creation could succeed later on.
|
||||
|
@ -630,10 +675,10 @@ struct ShaderCacheReader : public LinearDiskCacheReader<Uid, u32>
|
|||
if (module == VK_NULL_HANDLE)
|
||||
return;
|
||||
|
||||
m_shader_map.emplace(key, module);
|
||||
m_shader_map.emplace(key, std::make_pair(module, false));
|
||||
}
|
||||
|
||||
std::map<Uid, VkShaderModule>& m_shader_map;
|
||||
std::map<Uid, std::pair<VkShaderModule, bool>>& m_shader_map;
|
||||
};
|
||||
|
||||
void ShaderCache::LoadShaderCaches()
|
||||
|
@ -653,6 +698,13 @@ void ShaderCache::LoadShaderCaches()
|
|||
gs_reader);
|
||||
}
|
||||
|
||||
ShaderCacheReader<UberShader::VertexShaderUid> uber_vs_reader(m_uber_vs_cache.shader_map);
|
||||
m_uber_vs_cache.disk_cache.OpenAndRead(
|
||||
GetDiskShaderCacheFileName(APIType::Vulkan, "UberVS", false, true), uber_vs_reader);
|
||||
ShaderCacheReader<UberShader::PixelShaderUid> uber_ps_reader(m_uber_ps_cache.shader_map);
|
||||
m_uber_ps_cache.disk_cache.OpenAndRead(
|
||||
GetDiskShaderCacheFileName(APIType::Vulkan, "UberPS", false, true), uber_ps_reader);
|
||||
|
||||
SETSTAT(stats.numPixelShadersCreated, static_cast<int>(m_ps_cache.shader_map.size()));
|
||||
SETSTAT(stats.numPixelShadersAlive, static_cast<int>(m_ps_cache.shader_map.size()));
|
||||
SETSTAT(stats.numVertexShadersCreated, static_cast<int>(m_vs_cache.shader_map.size()));
|
||||
|
@ -666,8 +718,8 @@ static void DestroyShaderCache(T& cache)
|
|||
cache.disk_cache.Close();
|
||||
for (const auto& it : cache.shader_map)
|
||||
{
|
||||
if (it.second != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr);
|
||||
if (it.second.first != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.first, nullptr);
|
||||
}
|
||||
cache.shader_map.clear();
|
||||
}
|
||||
|
@ -680,6 +732,9 @@ void ShaderCache::DestroyShaderCaches()
|
|||
if (g_vulkan_context->SupportsGeometryShaders())
|
||||
DestroyShaderCache(m_gs_cache);
|
||||
|
||||
DestroyShaderCache(m_uber_vs_cache);
|
||||
DestroyShaderCache(m_uber_ps_cache);
|
||||
|
||||
SETSTAT(stats.numPixelShadersCreated, 0);
|
||||
SETSTAT(stats.numPixelShadersAlive, 0);
|
||||
SETSTAT(stats.numVertexShadersCreated, 0);
|
||||
|
@ -690,7 +745,13 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid)
|
|||
{
|
||||
auto it = m_vs_cache.shader_map.find(uid);
|
||||
if (it != m_vs_cache.shader_map.end())
|
||||
return it->second;
|
||||
{
|
||||
// If it's pending, compile it synchronously.
|
||||
if (!it->second.second)
|
||||
return it->second.first;
|
||||
else
|
||||
m_vs_cache.shader_map.erase(it);
|
||||
}
|
||||
|
||||
// Not in the cache, so compile the shader.
|
||||
ShaderCompiler::SPIRVCodeVector spv;
|
||||
|
@ -712,7 +773,7 @@ VkShaderModule ShaderCache::GetVertexShaderForUid(const VertexShaderUid& uid)
|
|||
}
|
||||
|
||||
// We still insert null entries to prevent further compilation attempts.
|
||||
m_vs_cache.shader_map.emplace(uid, module);
|
||||
m_vs_cache.shader_map.emplace(uid, std::make_pair(module, false));
|
||||
return module;
|
||||
}
|
||||
|
||||
|
@ -721,7 +782,13 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid
|
|||
_assert_(g_vulkan_context->SupportsGeometryShaders());
|
||||
auto it = m_gs_cache.shader_map.find(uid);
|
||||
if (it != m_gs_cache.shader_map.end())
|
||||
return it->second;
|
||||
{
|
||||
// If it's pending, compile it synchronously.
|
||||
if (!it->second.second)
|
||||
return it->second.first;
|
||||
else
|
||||
m_gs_cache.shader_map.erase(it);
|
||||
}
|
||||
|
||||
// Not in the cache, so compile the shader.
|
||||
ShaderCompiler::SPIRVCodeVector spv;
|
||||
|
@ -739,7 +806,7 @@ VkShaderModule ShaderCache::GetGeometryShaderForUid(const GeometryShaderUid& uid
|
|||
}
|
||||
|
||||
// We still insert null entries to prevent further compilation attempts.
|
||||
m_gs_cache.shader_map.emplace(uid, module);
|
||||
m_gs_cache.shader_map.emplace(uid, std::make_pair(module, false));
|
||||
return module;
|
||||
}
|
||||
|
||||
|
@ -747,7 +814,13 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid)
|
|||
{
|
||||
auto it = m_ps_cache.shader_map.find(uid);
|
||||
if (it != m_ps_cache.shader_map.end())
|
||||
return it->second;
|
||||
{
|
||||
// If it's pending, compile it synchronously.
|
||||
if (!it->second.second)
|
||||
return it->second.first;
|
||||
else
|
||||
m_ps_cache.shader_map.erase(it);
|
||||
}
|
||||
|
||||
// Not in the cache, so compile the shader.
|
||||
ShaderCompiler::SPIRVCodeVector spv;
|
||||
|
@ -769,7 +842,79 @@ VkShaderModule ShaderCache::GetPixelShaderForUid(const PixelShaderUid& uid)
|
|||
}
|
||||
|
||||
// We still insert null entries to prevent further compilation attempts.
|
||||
m_ps_cache.shader_map.emplace(uid, module);
|
||||
m_ps_cache.shader_map.emplace(uid, std::make_pair(module, false));
|
||||
return module;
|
||||
}
|
||||
|
||||
VkShaderModule ShaderCache::GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid)
|
||||
{
|
||||
auto it = m_uber_vs_cache.shader_map.find(uid);
|
||||
if (it != m_uber_vs_cache.shader_map.end())
|
||||
{
|
||||
// If it's pending, compile it synchronously.
|
||||
if (!it->second.second)
|
||||
return it->second.first;
|
||||
else
|
||||
m_uber_vs_cache.shader_map.erase(it);
|
||||
}
|
||||
|
||||
// Not in the cache, so compile the shader.
|
||||
ShaderCompiler::SPIRVCodeVector spv;
|
||||
VkShaderModule module = VK_NULL_HANDLE;
|
||||
ShaderCode source_code = UberShader::GenVertexShader(
|
||||
APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
if (ShaderCompiler::CompileVertexShader(&spv, source_code.GetBuffer().c_str(),
|
||||
source_code.GetBuffer().length()))
|
||||
{
|
||||
module = Util::CreateShaderModule(spv.data(), spv.size());
|
||||
|
||||
// Append to shader cache if it created successfully.
|
||||
if (module != VK_NULL_HANDLE)
|
||||
{
|
||||
m_uber_vs_cache.disk_cache.Append(uid, spv.data(), static_cast<u32>(spv.size()));
|
||||
INCSTAT(stats.numVertexShadersCreated);
|
||||
INCSTAT(stats.numVertexShadersAlive);
|
||||
}
|
||||
}
|
||||
|
||||
// We still insert null entries to prevent further compilation attempts.
|
||||
m_uber_vs_cache.shader_map.emplace(uid, std::make_pair(module, false));
|
||||
return module;
|
||||
}
|
||||
|
||||
VkShaderModule ShaderCache::GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid)
|
||||
{
|
||||
auto it = m_uber_ps_cache.shader_map.find(uid);
|
||||
if (it != m_uber_ps_cache.shader_map.end())
|
||||
{
|
||||
// If it's pending, compile it synchronously.
|
||||
if (!it->second.second)
|
||||
return it->second.first;
|
||||
else
|
||||
m_uber_ps_cache.shader_map.erase(it);
|
||||
}
|
||||
|
||||
// Not in the cache, so compile the shader.
|
||||
ShaderCompiler::SPIRVCodeVector spv;
|
||||
VkShaderModule module = VK_NULL_HANDLE;
|
||||
ShaderCode source_code =
|
||||
UberShader::GenPixelShader(APIType::Vulkan, ShaderHostConfig::GetCurrent(), uid.GetUidData());
|
||||
if (ShaderCompiler::CompileFragmentShader(&spv, source_code.GetBuffer().c_str(),
|
||||
source_code.GetBuffer().length()))
|
||||
{
|
||||
module = Util::CreateShaderModule(spv.data(), spv.size());
|
||||
|
||||
// Append to shader cache if it created successfully.
|
||||
if (module != VK_NULL_HANDLE)
|
||||
{
|
||||
m_uber_ps_cache.disk_cache.Append(uid, spv.data(), static_cast<u32>(spv.size()));
|
||||
INCSTAT(stats.numPixelShadersCreated);
|
||||
INCSTAT(stats.numPixelShadersAlive);
|
||||
}
|
||||
}
|
||||
|
||||
// We still insert null entries to prevent further compilation attempts.
|
||||
m_uber_ps_cache.shader_map.emplace(uid, std::make_pair(module, false));
|
||||
return module;
|
||||
}
|
||||
|
||||
|
@ -782,6 +927,9 @@ void ShaderCache::RecompileSharedShaders()
|
|||
|
||||
void ShaderCache::ReloadShaderAndPipelineCaches()
|
||||
{
|
||||
m_async_shader_compiler->WaitUntilCompletion();
|
||||
m_async_shader_compiler->RetrieveWorkItems();
|
||||
|
||||
SavePipelineCache();
|
||||
DestroyShaderCaches();
|
||||
DestroyPipelineCache();
|
||||
|
@ -795,6 +943,9 @@ void ShaderCache::ReloadShaderAndPipelineCaches()
|
|||
{
|
||||
CreatePipelineCache();
|
||||
}
|
||||
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
PrecompileUberShaders();
|
||||
}
|
||||
|
||||
std::string ShaderCache::GetUtilityShaderHeader() const
|
||||
|
@ -1026,4 +1177,214 @@ void ShaderCache::DestroySharedShaders()
|
|||
DestroyShader(m_screen_quad_geometry_shader);
|
||||
DestroyShader(m_passthrough_geometry_shader);
|
||||
}
|
||||
|
||||
void ShaderCache::CreateDummyPipeline(const UberShader::VertexShaderUid& vuid,
|
||||
const GeometryShaderUid& guid,
|
||||
const UberShader::PixelShaderUid& puid)
|
||||
{
|
||||
PortableVertexDeclaration vertex_decl;
|
||||
std::memset(&vertex_decl, 0, sizeof(vertex_decl));
|
||||
|
||||
PipelineInfo pinfo;
|
||||
pinfo.vertex_format =
|
||||
static_cast<const VertexFormat*>(VertexLoaderManager::GetUberVertexFormat(vertex_decl));
|
||||
pinfo.pipeline_layout = g_object_cache->GetPipelineLayout(
|
||||
g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation() ?
|
||||
PIPELINE_LAYOUT_BBOX :
|
||||
PIPELINE_LAYOUT_STANDARD);
|
||||
pinfo.vs = GetVertexUberShaderForUid(vuid);
|
||||
pinfo.gs = (!guid.GetUidData()->IsPassthrough() && g_vulkan_context->SupportsGeometryShaders()) ?
|
||||
GetGeometryShaderForUid(guid) :
|
||||
VK_NULL_HANDLE;
|
||||
pinfo.ps = GetPixelUberShaderForUid(puid);
|
||||
pinfo.render_pass = FramebufferManager::GetInstance()->GetEFBLoadRenderPass();
|
||||
pinfo.rasterization_state.bits = Util::GetNoCullRasterizationState().bits;
|
||||
pinfo.depth_stencil_state.bits = Util::GetNoDepthTestingDepthStencilState().bits;
|
||||
pinfo.blend_state.hex = Util::GetNoBlendingBlendState().hex;
|
||||
switch (guid.GetUidData()->primitive_type)
|
||||
{
|
||||
case PRIMITIVE_POINTS:
|
||||
pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
break;
|
||||
case PRIMITIVE_LINES:
|
||||
pinfo.primitive_topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
break;
|
||||
case PRIMITIVE_TRIANGLES:
|
||||
pinfo.primitive_topology = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
|
||||
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP :
|
||||
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
break;
|
||||
}
|
||||
GetPipelineWithCacheResultAsync(pinfo);
|
||||
}
|
||||
|
||||
void ShaderCache::PrecompileUberShaders()
|
||||
{
|
||||
UberShader::EnumerateVertexShaderUids([&](const UberShader::VertexShaderUid& vuid) {
|
||||
UberShader::EnumeratePixelShaderUids([&](const UberShader::PixelShaderUid& puid) {
|
||||
// UIDs must have compatible texgens, a mismatching combination will never be queried.
|
||||
if (vuid.GetUidData()->num_texgens != puid.GetUidData()->num_texgens)
|
||||
return;
|
||||
|
||||
EnumerateGeometryShaderUids([&](const GeometryShaderUid& guid) {
|
||||
if (guid.GetUidData()->numTexGens != vuid.GetUidData()->num_texgens)
|
||||
return;
|
||||
|
||||
CreateDummyPipeline(vuid, guid, puid);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
WaitForBackgroundCompilesToComplete();
|
||||
|
||||
// Switch to the runtime/background thread config.
|
||||
m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads());
|
||||
}
|
||||
|
||||
void ShaderCache::WaitForBackgroundCompilesToComplete()
|
||||
{
|
||||
m_async_shader_compiler->WaitUntilCompletion([](size_t completed, size_t total) {
|
||||
Host_UpdateProgressDialog(GetStringT("Compiling shaders...").c_str(),
|
||||
static_cast<int>(completed), static_cast<int>(total));
|
||||
});
|
||||
m_async_shader_compiler->RetrieveWorkItems();
|
||||
Host_UpdateProgressDialog("", -1, -1);
|
||||
}
|
||||
|
||||
void ShaderCache::RetrieveAsyncShaders()
|
||||
{
|
||||
m_async_shader_compiler->RetrieveWorkItems();
|
||||
}
|
||||
|
||||
std::pair<VkShaderModule, bool> ShaderCache::GetVertexShaderForUidAsync(const VertexShaderUid& uid)
|
||||
{
|
||||
auto it = m_vs_cache.shader_map.find(uid);
|
||||
if (it != m_vs_cache.shader_map.end())
|
||||
return it->second;
|
||||
|
||||
// Kick a compile job off.
|
||||
m_async_shader_compiler->QueueWorkItem(
|
||||
m_async_shader_compiler->CreateWorkItem<VertexShaderCompilerWorkItem>(uid));
|
||||
m_vs_cache.shader_map.emplace(uid,
|
||||
std::make_pair(static_cast<VkShaderModule>(VK_NULL_HANDLE), true));
|
||||
return std::make_pair<VkShaderModule, bool>(VK_NULL_HANDLE, true);
|
||||
}
|
||||
|
||||
std::pair<VkShaderModule, bool> ShaderCache::GetPixelShaderForUidAsync(const PixelShaderUid& uid)
|
||||
{
|
||||
auto it = m_ps_cache.shader_map.find(uid);
|
||||
if (it != m_ps_cache.shader_map.end())
|
||||
return it->second;
|
||||
|
||||
// Kick a compile job off.
|
||||
m_async_shader_compiler->QueueWorkItem(
|
||||
m_async_shader_compiler->CreateWorkItem<PixelShaderCompilerWorkItem>(uid));
|
||||
m_ps_cache.shader_map.emplace(uid,
|
||||
std::make_pair(static_cast<VkShaderModule>(VK_NULL_HANDLE), true));
|
||||
return std::make_pair<VkShaderModule, bool>(VK_NULL_HANDLE, true);
|
||||
}
|
||||
|
||||
bool ShaderCache::VertexShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
GenerateVertexShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
if (!ShaderCompiler::CompileVertexShader(&m_spirv, code.GetBuffer().c_str(),
|
||||
code.GetBuffer().length()))
|
||||
return true;
|
||||
|
||||
m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::VertexShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
auto it = g_shader_cache->m_vs_cache.shader_map.find(m_uid);
|
||||
if (it == g_shader_cache->m_vs_cache.shader_map.end())
|
||||
{
|
||||
g_shader_cache->m_vs_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false));
|
||||
g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(),
|
||||
static_cast<u32>(m_spirv.size()));
|
||||
return;
|
||||
}
|
||||
|
||||
// The main thread may have also compiled this shader.
|
||||
if (!it->second.second)
|
||||
{
|
||||
if (m_module != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
// No longer pending.
|
||||
it->second.first = m_module;
|
||||
it->second.second = false;
|
||||
g_shader_cache->m_vs_cache.disk_cache.Append(m_uid, m_spirv.data(),
|
||||
static_cast<u32>(m_spirv.size()));
|
||||
}
|
||||
|
||||
bool ShaderCache::PixelShaderCompilerWorkItem::Compile()
|
||||
{
|
||||
ShaderCode code =
|
||||
GeneratePixelShaderCode(APIType::Vulkan, ShaderHostConfig::GetCurrent(), m_uid.GetUidData());
|
||||
if (!ShaderCompiler::CompileFragmentShader(&m_spirv, code.GetBuffer().c_str(),
|
||||
code.GetBuffer().length()))
|
||||
return true;
|
||||
|
||||
m_module = Util::CreateShaderModule(m_spirv.data(), m_spirv.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::PixelShaderCompilerWorkItem::Retrieve()
|
||||
{
|
||||
auto it = g_shader_cache->m_ps_cache.shader_map.find(m_uid);
|
||||
if (it == g_shader_cache->m_ps_cache.shader_map.end())
|
||||
{
|
||||
g_shader_cache->m_ps_cache.shader_map.emplace(m_uid, std::make_pair(m_module, false));
|
||||
g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(),
|
||||
static_cast<u32>(m_spirv.size()));
|
||||
return;
|
||||
}
|
||||
|
||||
// The main thread may have also compiled this shader.
|
||||
if (!it->second.second)
|
||||
{
|
||||
if (m_module != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_module, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
// No longer pending.
|
||||
it->second.first = m_module;
|
||||
it->second.second = false;
|
||||
g_shader_cache->m_ps_cache.disk_cache.Append(m_uid, m_spirv.data(),
|
||||
static_cast<u32>(m_spirv.size()));
|
||||
}
|
||||
|
||||
bool ShaderCache::PipelineCompilerWorkItem::Compile()
|
||||
{
|
||||
m_pipeline = g_shader_cache->CreatePipeline(m_info);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderCache::PipelineCompilerWorkItem::Retrieve()
|
||||
{
|
||||
auto it = g_shader_cache->m_pipeline_objects.find(m_info);
|
||||
if (it == g_shader_cache->m_pipeline_objects.end())
|
||||
{
|
||||
g_shader_cache->m_pipeline_objects.emplace(m_info, std::make_pair(m_pipeline, false));
|
||||
return;
|
||||
}
|
||||
|
||||
// The main thread may have also compiled this shader.
|
||||
if (!it->second.second)
|
||||
{
|
||||
if (m_pipeline != VK_NULL_HANDLE)
|
||||
vkDestroyPipeline(g_vulkan_context->GetDevice(), m_pipeline, nullptr);
|
||||
return;
|
||||
}
|
||||
|
||||
// No longer pending.
|
||||
it->second.first = m_pipeline;
|
||||
it->second.second = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,16 +10,21 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/LinearDiskCache.h"
|
||||
|
||||
#include "VideoBackends/Vulkan/Constants.h"
|
||||
#include "VideoBackends/Vulkan/ObjectCache.h"
|
||||
#include "VideoBackends/Vulkan/ShaderCompiler.h"
|
||||
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include "VideoCommon/GeometryShaderGen.h"
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
#include "VideoCommon/RenderState.h"
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
|
||||
namespace Vulkan
|
||||
|
@ -92,8 +97,17 @@ public:
|
|||
VkShaderModule GetGeometryShaderForUid(const GeometryShaderUid& uid);
|
||||
VkShaderModule GetPixelShaderForUid(const PixelShaderUid& uid);
|
||||
|
||||
// Ubershader caches
|
||||
VkShaderModule GetVertexUberShaderForUid(const UberShader::VertexShaderUid& uid);
|
||||
VkShaderModule GetPixelUberShaderForUid(const UberShader::PixelShaderUid& uid);
|
||||
|
||||
// Accesses ShaderGen shader caches asynchronously
|
||||
std::pair<VkShaderModule, bool> GetVertexShaderForUidAsync(const VertexShaderUid& uid);
|
||||
std::pair<VkShaderModule, bool> GetPixelShaderForUidAsync(const PixelShaderUid& uid);
|
||||
|
||||
// Perform at startup, create descriptor layouts, compiles all static shaders.
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
|
||||
// Creates a pipeline for the specified description. The resulting pipeline, if successful
|
||||
// is not stored anywhere, this is left up to the caller.
|
||||
|
@ -106,6 +120,8 @@ public:
|
|||
// resulted in a pipeline being created, the second field of the return value will be false,
|
||||
// otherwise for a cache hit it will be true.
|
||||
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
|
||||
std::pair<std::pair<VkPipeline, bool>, bool>
|
||||
GetPipelineWithCacheResultAsync(const PipelineInfo& info);
|
||||
|
||||
// Creates a compute pipeline, and does not track the handle.
|
||||
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
|
||||
|
@ -134,6 +150,10 @@ public:
|
|||
VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; }
|
||||
VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; }
|
||||
VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; }
|
||||
void PrecompileUberShaders();
|
||||
void WaitForBackgroundCompilesToComplete();
|
||||
void RetrieveAsyncShaders();
|
||||
|
||||
private:
|
||||
bool CreatePipelineCache();
|
||||
bool LoadPipelineCache();
|
||||
|
@ -144,17 +164,26 @@ private:
|
|||
bool CompileSharedShaders();
|
||||
void DestroySharedShaders();
|
||||
|
||||
// We generate a dummy pipeline with some defaults in the blend/depth states,
|
||||
// that way the driver is forced to compile something (looking at you, NVIDIA).
|
||||
// It can then hopefully re-use part of this pipeline for others in the future.
|
||||
void CreateDummyPipeline(const UberShader::VertexShaderUid& vuid, const GeometryShaderUid& guid,
|
||||
const UberShader::PixelShaderUid& puid);
|
||||
|
||||
template <typename Uid>
|
||||
struct ShaderModuleCache
|
||||
{
|
||||
std::map<Uid, VkShaderModule> shader_map;
|
||||
std::map<Uid, std::pair<VkShaderModule, bool>> shader_map;
|
||||
LinearDiskCache<Uid, u32> disk_cache;
|
||||
};
|
||||
ShaderModuleCache<VertexShaderUid> m_vs_cache;
|
||||
ShaderModuleCache<GeometryShaderUid> m_gs_cache;
|
||||
ShaderModuleCache<PixelShaderUid> m_ps_cache;
|
||||
ShaderModuleCache<UberShader::VertexShaderUid> m_uber_vs_cache;
|
||||
ShaderModuleCache<UberShader::PixelShaderUid> m_uber_ps_cache;
|
||||
|
||||
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
|
||||
std::unordered_map<PipelineInfo, std::pair<VkPipeline, bool>, PipelineInfoHash>
|
||||
m_pipeline_objects;
|
||||
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
|
||||
m_compute_pipeline_objects;
|
||||
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
|
||||
|
@ -165,6 +194,45 @@ private:
|
|||
VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE;
|
||||
VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE;
|
||||
VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE;
|
||||
|
||||
std::unique_ptr<VideoCommon::AsyncShaderCompiler> m_async_shader_compiler;
|
||||
|
||||
// TODO: Use templates to reduce the number of these classes.
|
||||
class VertexShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
VertexShaderCompilerWorkItem(const VertexShaderUid& uid) : m_uid(uid) {}
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
VertexShaderUid m_uid;
|
||||
ShaderCompiler::SPIRVCodeVector m_spirv;
|
||||
VkShaderModule m_module = VK_NULL_HANDLE;
|
||||
};
|
||||
class PixelShaderCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
PixelShaderCompilerWorkItem(const PixelShaderUid& uid) : m_uid(uid) {}
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
PixelShaderUid m_uid;
|
||||
ShaderCompiler::SPIRVCodeVector m_spirv;
|
||||
VkShaderModule m_module = VK_NULL_HANDLE;
|
||||
};
|
||||
class PipelineCompilerWorkItem : public VideoCommon::AsyncShaderCompiler::WorkItem
|
||||
{
|
||||
public:
|
||||
PipelineCompilerWorkItem(const PipelineInfo& info) : m_info(info) {}
|
||||
bool Compile() override;
|
||||
void Retrieve() override;
|
||||
|
||||
private:
|
||||
PipelineInfo m_info;
|
||||
VkPipeline m_pipeline;
|
||||
};
|
||||
};
|
||||
|
||||
extern std::unique_ptr<ShaderCache> g_shader_cache;
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "VideoCommon/GeometryShaderManager.h"
|
||||
#include "VideoCommon/PixelShaderManager.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
||||
|
@ -77,12 +78,13 @@ bool StateTracker::Initialize()
|
|||
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
|
||||
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
|
||||
m_bbox_enabled = false;
|
||||
ClearShaders();
|
||||
|
||||
// Initialize all samplers to point by default
|
||||
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
||||
{
|
||||
m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
m_bindings.ps_samplers[i].imageView = VK_NULL_HANDLE;
|
||||
m_bindings.ps_samplers[i].imageView = g_object_cache->GetDummyImageView();
|
||||
m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler();
|
||||
}
|
||||
|
||||
|
@ -154,6 +156,10 @@ void StateTracker::ReloadPipelineUIDCache()
|
|||
PipelineInserter inserter(this);
|
||||
m_uid_cache.OpenAndRead(filename, inserter);
|
||||
}
|
||||
|
||||
// If we were using background compilation, ensure everything is ready before continuing.
|
||||
if (g_ActiveConfig.bBackgroundShaderCompiling)
|
||||
g_shader_cache->WaitForBackgroundCompilesToComplete();
|
||||
}
|
||||
|
||||
void StateTracker::AppendToPipelineUIDCache(const PipelineInfo& info)
|
||||
|
@ -178,7 +184,8 @@ bool StateTracker::PrecachePipelineUID(const SerializedPipelineUID& uid)
|
|||
|
||||
// Need to create the vertex declaration first, rather than deferring to when a game creates a
|
||||
// vertex loader that uses this format, since we need it to create a pipeline.
|
||||
pinfo.vertex_format = VertexFormat::GetOrCreateMatchingFormat(uid.vertex_decl);
|
||||
pinfo.vertex_format =
|
||||
static_cast<VertexFormat*>(VertexLoaderManager::GetOrCreateMatchingFormat(uid.vertex_decl));
|
||||
pinfo.pipeline_layout = uid.ps_uid.GetUidData()->bounding_box ?
|
||||
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) :
|
||||
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
|
||||
|
@ -209,11 +216,19 @@ bool StateTracker::PrecachePipelineUID(const SerializedPipelineUID& uid)
|
|||
pinfo.blend_state.hex = uid.blend_state_bits;
|
||||
pinfo.primitive_topology = uid.primitive_topology;
|
||||
|
||||
VkPipeline pipeline = g_shader_cache->GetPipeline(pinfo);
|
||||
if (pipeline == VK_NULL_HANDLE)
|
||||
if (g_ActiveConfig.bBackgroundShaderCompiling)
|
||||
{
|
||||
WARN_LOG(VIDEO, "Failed to get pipeline from cached UID.");
|
||||
return false;
|
||||
// Use async for multithreaded compilation.
|
||||
g_shader_cache->GetPipelineWithCacheResultAsync(pinfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
VkPipeline pipeline = g_shader_cache->GetPipeline(pinfo);
|
||||
if (pipeline == VK_NULL_HANDLE)
|
||||
{
|
||||
WARN_LOG(VIDEO, "Failed to get pipeline from cached UID.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// We don't need to do anything with this pipeline, just make sure it exists.
|
||||
|
@ -267,11 +282,11 @@ void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& ren
|
|||
|
||||
void StateTracker::SetVertexFormat(const VertexFormat* vertex_format)
|
||||
{
|
||||
if (m_pipeline_state.vertex_format == vertex_format)
|
||||
if (m_vertex_format == vertex_format)
|
||||
return;
|
||||
|
||||
m_pipeline_state.vertex_format = vertex_format;
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
||||
m_vertex_format = vertex_format;
|
||||
UpdatePipelineVertexFormat();
|
||||
}
|
||||
|
||||
void StateTracker::SetPrimitiveTopology(VkPrimitiveTopology primitive_topology)
|
||||
|
@ -323,14 +338,87 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type)
|
|||
{
|
||||
VertexShaderUid vs_uid = GetVertexShaderUid();
|
||||
PixelShaderUid ps_uid = GetPixelShaderUid();
|
||||
|
||||
bool changed = false;
|
||||
|
||||
if (vs_uid != m_vs_uid)
|
||||
bool use_ubershaders = g_ActiveConfig.bDisableSpecializedShaders;
|
||||
if (g_ActiveConfig.CanBackgroundCompileShaders() && !g_ActiveConfig.bDisableSpecializedShaders)
|
||||
{
|
||||
m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid);
|
||||
m_vs_uid = vs_uid;
|
||||
changed = true;
|
||||
// Look up both VS and PS, and check if we can compile it asynchronously.
|
||||
auto vs = g_shader_cache->GetVertexShaderForUidAsync(vs_uid);
|
||||
auto ps = g_shader_cache->GetPixelShaderForUidAsync(ps_uid);
|
||||
if (vs.second || ps.second)
|
||||
{
|
||||
// One of the shaders is still pending. Use the ubershader for both.
|
||||
use_ubershaders = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use the standard shaders for both.
|
||||
if (m_pipeline_state.vs != vs.first)
|
||||
{
|
||||
m_pipeline_state.vs = vs.first;
|
||||
m_vs_uid = vs_uid;
|
||||
changed = true;
|
||||
}
|
||||
if (m_pipeline_state.ps != ps.first)
|
||||
{
|
||||
m_pipeline_state.ps = ps.first;
|
||||
m_ps_uid = ps_uid;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Normal shader path. No ubershaders.
|
||||
if (vs_uid != m_vs_uid)
|
||||
{
|
||||
m_vs_uid = vs_uid;
|
||||
m_pipeline_state.vs = g_shader_cache->GetVertexShaderForUid(vs_uid);
|
||||
changed = true;
|
||||
}
|
||||
if (ps_uid != m_ps_uid)
|
||||
{
|
||||
m_ps_uid = ps_uid;
|
||||
m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Ubershader fallback?
|
||||
bool uber_vertex_shader = use_ubershaders || g_ActiveConfig.bForceVertexUberShaders;
|
||||
bool uber_pixel_shader = use_ubershaders || g_ActiveConfig.bForcePixelUberShaders;
|
||||
bool using_ubershaders = uber_vertex_shader || uber_pixel_shader;
|
||||
|
||||
// Switching to/from ubershaders? Have to adjust the vertex format and pipeline layout.
|
||||
if (using_ubershaders != m_using_ubershaders)
|
||||
{
|
||||
m_using_ubershaders = using_ubershaders;
|
||||
UpdatePipelineLayout();
|
||||
UpdatePipelineVertexFormat();
|
||||
}
|
||||
|
||||
if (uber_vertex_shader)
|
||||
{
|
||||
UberShader::VertexShaderUid uber_vs_uid = UberShader::GetVertexShaderUid();
|
||||
VkShaderModule vs = g_shader_cache->GetVertexUberShaderForUid(uber_vs_uid);
|
||||
if (vs != m_pipeline_state.vs)
|
||||
{
|
||||
m_uber_vs_uid = uber_vs_uid;
|
||||
m_pipeline_state.vs = vs;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if (uber_pixel_shader)
|
||||
{
|
||||
UberShader::PixelShaderUid uber_ps_uid = UberShader::GetPixelShaderUid();
|
||||
VkShaderModule ps = g_shader_cache->GetPixelUberShaderForUid(uber_ps_uid);
|
||||
if (ps != m_pipeline_state.ps)
|
||||
{
|
||||
m_uber_ps_uid = uber_ps_uid;
|
||||
m_pipeline_state.ps = ps;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (g_vulkan_context->SupportsGeometryShaders())
|
||||
|
@ -338,29 +426,39 @@ bool StateTracker::CheckForShaderChanges(u32 gx_primitive_type)
|
|||
GeometryShaderUid gs_uid = GetGeometryShaderUid(gx_primitive_type);
|
||||
if (gs_uid != m_gs_uid)
|
||||
{
|
||||
m_gs_uid = gs_uid;
|
||||
if (gs_uid.GetUidData()->IsPassthrough())
|
||||
m_pipeline_state.gs = VK_NULL_HANDLE;
|
||||
else
|
||||
m_pipeline_state.gs = g_shader_cache->GetGeometryShaderForUid(gs_uid);
|
||||
|
||||
m_gs_uid = gs_uid;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (ps_uid != m_ps_uid)
|
||||
{
|
||||
m_pipeline_state.ps = g_shader_cache->GetPixelShaderForUid(ps_uid);
|
||||
m_ps_uid = ps_uid;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (changed)
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
void StateTracker::ClearShaders()
|
||||
{
|
||||
// Set the UIDs to something that will never match, so on the first access they are checked.
|
||||
std::memset(&m_vs_uid, 0xFF, sizeof(m_vs_uid));
|
||||
std::memset(&m_gs_uid, 0xFF, sizeof(m_gs_uid));
|
||||
std::memset(&m_ps_uid, 0xFF, sizeof(m_ps_uid));
|
||||
std::memset(&m_uber_vs_uid, 0xFF, sizeof(m_uber_vs_uid));
|
||||
std::memset(&m_uber_ps_uid, 0xFF, sizeof(m_uber_ps_uid));
|
||||
|
||||
m_pipeline_state.vs = VK_NULL_HANDLE;
|
||||
m_pipeline_state.gs = VK_NULL_HANDLE;
|
||||
m_pipeline_state.ps = VK_NULL_HANDLE;
|
||||
m_pipeline_state.vertex_format = nullptr;
|
||||
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
||||
}
|
||||
|
||||
void StateTracker::UpdateVertexShaderConstants()
|
||||
{
|
||||
if (!VertexShaderManager::dirty || !ReserveConstantStorage())
|
||||
|
@ -557,24 +655,8 @@ void StateTracker::SetBBoxEnable(bool enable)
|
|||
if (m_bbox_enabled == enable)
|
||||
return;
|
||||
|
||||
// Change the number of active descriptor sets, as well as the pipeline layout
|
||||
if (enable)
|
||||
{
|
||||
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX);
|
||||
m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS;
|
||||
|
||||
// The bbox buffer never changes, so we defer descriptor updates until it is enabled.
|
||||
if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)
|
||||
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pipeline_state.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
|
||||
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
|
||||
}
|
||||
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
|
||||
m_bbox_enabled = enable;
|
||||
UpdatePipelineLayout();
|
||||
}
|
||||
|
||||
void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range)
|
||||
|
@ -590,7 +672,7 @@ void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceS
|
|||
m_bindings.ps_ssbo.range = range;
|
||||
|
||||
// Defer descriptor update until bbox is actually enabled.
|
||||
if (m_bbox_enabled)
|
||||
if (IsSSBODescriptorRequired())
|
||||
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
|
||||
}
|
||||
|
||||
|
@ -599,7 +681,7 @@ void StateTracker::UnbindTexture(VkImageView view)
|
|||
for (VkDescriptorImageInfo& it : m_bindings.ps_samplers)
|
||||
{
|
||||
if (it.imageView == view)
|
||||
it.imageView = VK_NULL_HANDLE;
|
||||
it.imageView = g_object_cache->GetDummyImageView();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -609,7 +691,7 @@ void StateTracker::InvalidateDescriptorSets()
|
|||
m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS;
|
||||
|
||||
// Defer SSBO descriptor update until bbox is actually enabled.
|
||||
if (!m_bbox_enabled)
|
||||
if (!IsSSBODescriptorRequired())
|
||||
m_dirty_flags &= ~DIRTY_FLAG_PS_SSBO;
|
||||
}
|
||||
|
||||
|
@ -886,15 +968,49 @@ void StateTracker::EndClearRenderPass()
|
|||
EndRenderPass();
|
||||
}
|
||||
|
||||
VkPipeline StateTracker::GetPipelineAndCacheUID(const PipelineInfo& info)
|
||||
VkPipeline StateTracker::GetPipelineAndCacheUID()
|
||||
{
|
||||
auto result = g_shader_cache->GetPipelineWithCacheResult(info);
|
||||
// We can't cache ubershader uids, only normal shader uids.
|
||||
if (g_ActiveConfig.CanBackgroundCompileShaders() && !m_using_ubershaders)
|
||||
{
|
||||
// Append to UID cache if it is a new pipeline.
|
||||
auto result = g_shader_cache->GetPipelineWithCacheResultAsync(m_pipeline_state);
|
||||
if (!result.second && g_ActiveConfig.bShaderCache)
|
||||
AppendToPipelineUIDCache(m_pipeline_state);
|
||||
|
||||
// Add to the UID cache if it is a new pipeline.
|
||||
if (!result.second && g_ActiveConfig.bShaderCache)
|
||||
AppendToPipelineUIDCache(info);
|
||||
// Still waiting for the pipeline to compile?
|
||||
if (!result.first.second)
|
||||
return result.first.first;
|
||||
|
||||
return result.first;
|
||||
// Use ubershader instead.
|
||||
m_using_ubershaders = true;
|
||||
UpdatePipelineLayout();
|
||||
UpdatePipelineVertexFormat();
|
||||
|
||||
PipelineInfo uber_info = m_pipeline_state;
|
||||
UberShader::VertexShaderUid uber_vuid = UberShader::GetVertexShaderUid();
|
||||
UberShader::PixelShaderUid uber_puid = UberShader::GetPixelShaderUid();
|
||||
uber_info.vs = g_shader_cache->GetVertexUberShaderForUid(uber_vuid);
|
||||
uber_info.ps = g_shader_cache->GetPixelUberShaderForUid(uber_puid);
|
||||
|
||||
auto uber_result = g_shader_cache->GetPipelineWithCacheResult(uber_info);
|
||||
return uber_result.first;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Add to the UID cache if it is a new pipeline.
|
||||
auto result = g_shader_cache->GetPipelineWithCacheResult(m_pipeline_state);
|
||||
if (!result.second && !m_using_ubershaders && g_ActiveConfig.bShaderCache)
|
||||
AppendToPipelineUIDCache(m_pipeline_state);
|
||||
|
||||
return result.first;
|
||||
}
|
||||
}
|
||||
|
||||
bool StateTracker::IsSSBODescriptorRequired() const
|
||||
{
|
||||
return m_bbox_enabled || (m_using_ubershaders && g_ActiveConfig.bBBoxEnable &&
|
||||
g_ActiveConfig.BBoxUseFragmentShaderImplementation());
|
||||
}
|
||||
|
||||
bool StateTracker::UpdatePipeline()
|
||||
|
@ -904,16 +1020,56 @@ bool StateTracker::UpdatePipeline()
|
|||
return false;
|
||||
|
||||
// Grab a new pipeline object, this can fail.
|
||||
m_pipeline_object = GetPipelineAndCacheUID(m_pipeline_state);
|
||||
m_pipeline_object = GetPipelineAndCacheUID();
|
||||
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE_BINDING;
|
||||
return m_pipeline_object != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void StateTracker::UpdatePipelineLayout()
|
||||
{
|
||||
const bool use_bbox_pipeline_layout = IsSSBODescriptorRequired();
|
||||
VkPipelineLayout pipeline_layout =
|
||||
use_bbox_pipeline_layout ? g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_BBOX) :
|
||||
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD);
|
||||
if (m_pipeline_state.pipeline_layout == pipeline_layout)
|
||||
return;
|
||||
|
||||
// Change the number of active descriptor sets, as well as the pipeline layout
|
||||
m_pipeline_state.pipeline_layout = pipeline_layout;
|
||||
if (use_bbox_pipeline_layout)
|
||||
{
|
||||
m_num_active_descriptor_sets = NUM_GX_DRAW_WITH_BBOX_DESCRIPTOR_SETS;
|
||||
|
||||
// The bbox buffer never changes, so we defer descriptor updates until it is enabled.
|
||||
if (m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)
|
||||
m_dirty_flags |= DIRTY_FLAG_PS_SSBO;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_num_active_descriptor_sets = NUM_GX_DRAW_DESCRIPTOR_SETS;
|
||||
}
|
||||
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE | DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
|
||||
}
|
||||
|
||||
void StateTracker::UpdatePipelineVertexFormat()
|
||||
{
|
||||
const NativeVertexFormat* vertex_format =
|
||||
m_using_ubershaders ?
|
||||
VertexLoaderManager::GetUberVertexFormat(m_vertex_format->GetVertexDeclaration()) :
|
||||
m_vertex_format;
|
||||
if (m_pipeline_state.vertex_format == vertex_format)
|
||||
return;
|
||||
|
||||
m_pipeline_state.vertex_format = static_cast<const VertexFormat*>(vertex_format);
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE;
|
||||
}
|
||||
|
||||
bool StateTracker::UpdateDescriptorSet()
|
||||
{
|
||||
const size_t MAX_DESCRIPTOR_WRITES = NUM_UBO_DESCRIPTOR_SET_BINDINGS + // UBO
|
||||
NUM_PIXEL_SHADER_SAMPLERS + // Samplers
|
||||
1 + // Samplers
|
||||
1; // SSBO
|
||||
std::array<VkWriteDescriptorSet, MAX_DESCRIPTOR_WRITES> writes;
|
||||
u32 num_writes = 0;
|
||||
|
@ -954,30 +1110,22 @@ bool StateTracker::UpdateDescriptorSet()
|
|||
if (set == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
||||
{
|
||||
const VkDescriptorImageInfo& info = m_bindings.ps_samplers[i];
|
||||
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
|
||||
{
|
||||
writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
static_cast<uint32_t>(i),
|
||||
0,
|
||||
1,
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
&info,
|
||||
nullptr,
|
||||
nullptr};
|
||||
}
|
||||
}
|
||||
writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
0,
|
||||
0,
|
||||
static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
m_bindings.ps_samplers.data(),
|
||||
nullptr,
|
||||
nullptr};
|
||||
|
||||
m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set;
|
||||
m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING;
|
||||
}
|
||||
|
||||
if (m_bbox_enabled &&
|
||||
(m_dirty_flags & DIRTY_FLAG_PS_SSBO ||
|
||||
if ((m_dirty_flags & DIRTY_FLAG_PS_SSBO ||
|
||||
m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE))
|
||||
{
|
||||
VkDescriptorSetLayout layout =
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
#include "VideoCommon/RenderBase.h"
|
||||
#include "VideoCommon/UberShaderPixel.h"
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
|
||||
namespace Vulkan
|
||||
|
@ -60,6 +62,7 @@ public:
|
|||
void SetBlendState(const BlendingState& state);
|
||||
|
||||
bool CheckForShaderChanges(u32 gx_primitive_type);
|
||||
void ClearShaders();
|
||||
|
||||
void UpdateVertexShaderConstants();
|
||||
void UpdateGeometryShaderConstants();
|
||||
|
@ -159,8 +162,8 @@ private:
|
|||
DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11),
|
||||
DIRTY_FLAG_PIPELINE_BINDING = (1 << 12),
|
||||
|
||||
DIRTY_FLAG_ALL_DESCRIPTOR_SETS =
|
||||
DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO
|
||||
DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO |
|
||||
DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO
|
||||
};
|
||||
|
||||
bool Initialize();
|
||||
|
@ -178,9 +181,15 @@ private:
|
|||
|
||||
// Obtains a Vulkan pipeline object for the specified pipeline configuration.
|
||||
// Also adds this pipeline configuration to the UID cache if it is not present already.
|
||||
VkPipeline GetPipelineAndCacheUID(const PipelineInfo& info);
|
||||
VkPipeline GetPipelineAndCacheUID();
|
||||
|
||||
// Are bounding box ubershaders enabled? If so, we need to ensure the SSBO is set up,
|
||||
// since the bbox writes are determined by a uniform.
|
||||
bool IsSSBODescriptorRequired() const;
|
||||
|
||||
bool UpdatePipeline();
|
||||
void UpdatePipelineLayout();
|
||||
void UpdatePipelineVertexFormat();
|
||||
bool UpdateDescriptorSet();
|
||||
|
||||
// Allocates storage in the uniform buffer of the specified size. If this storage cannot be
|
||||
|
@ -203,10 +212,14 @@ private:
|
|||
VertexShaderUid m_vs_uid = {};
|
||||
GeometryShaderUid m_gs_uid = {};
|
||||
PixelShaderUid m_ps_uid = {};
|
||||
UberShader::VertexShaderUid m_uber_vs_uid = {};
|
||||
UberShader::PixelShaderUid m_uber_ps_uid = {};
|
||||
bool m_using_ubershaders = false;
|
||||
|
||||
// pipeline state
|
||||
PipelineInfo m_pipeline_state = {};
|
||||
VkPipeline m_pipeline_object = VK_NULL_HANDLE;
|
||||
const VertexFormat* m_vertex_format = nullptr;
|
||||
|
||||
// shader bindings
|
||||
std::array<VkDescriptorSet, NUM_DESCRIPTOR_SET_BIND_POINTS> m_descriptor_sets = {};
|
||||
|
|
|
@ -575,8 +575,7 @@ void UtilityShaderDraw::BindDescriptors()
|
|||
{
|
||||
// TODO: This method is a mess, clean it up
|
||||
std::array<VkDescriptorSet, NUM_DESCRIPTOR_SET_BIND_POINTS> bind_descriptor_sets = {};
|
||||
std::array<VkWriteDescriptorSet, NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS>
|
||||
set_writes = {};
|
||||
std::array<VkWriteDescriptorSet, NUM_UBO_DESCRIPTOR_SET_BINDINGS + 1> set_writes = {};
|
||||
uint32_t num_set_writes = 0;
|
||||
|
||||
VkDescriptorBufferInfo dummy_uniform_buffer = {
|
||||
|
@ -633,29 +632,32 @@ void UtilityShaderDraw::BindDescriptors()
|
|||
// Check if we have any at all, skip the binding process entirely if we don't
|
||||
if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS)
|
||||
{
|
||||
// We need to fill it with non-empty images.
|
||||
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
||||
{
|
||||
if (m_ps_samplers[i].imageView == VK_NULL_HANDLE)
|
||||
{
|
||||
m_ps_samplers[i].imageView = g_object_cache->GetDummyImageView();
|
||||
m_ps_samplers[i].sampler = g_object_cache->GetPointSampler();
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate a new descriptor set
|
||||
VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(
|
||||
g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS));
|
||||
if (set == VK_NULL_HANDLE)
|
||||
PanicAlert("Failed to allocate descriptor set for utility draw");
|
||||
|
||||
for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++)
|
||||
{
|
||||
const VkDescriptorImageInfo& info = m_ps_samplers[i];
|
||||
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
|
||||
{
|
||||
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
static_cast<uint32_t>(i),
|
||||
0,
|
||||
1,
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
&info,
|
||||
nullptr,
|
||||
nullptr};
|
||||
}
|
||||
}
|
||||
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
0,
|
||||
0,
|
||||
static_cast<u32>(NUM_PIXEL_SHADER_SAMPLERS),
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
m_ps_samplers.data(),
|
||||
nullptr,
|
||||
nullptr};
|
||||
|
||||
bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set;
|
||||
}
|
||||
|
|
|
@ -53,17 +53,9 @@ VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl)
|
|||
SetupInputState();
|
||||
}
|
||||
|
||||
VertexFormat* VertexFormat::GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
|
||||
const VkPipelineVertexInputStateCreateInfo& VertexFormat::GetVertexInputStateInfo() const
|
||||
{
|
||||
auto vertex_format_map = VertexLoaderManager::GetNativeVertexFormatMap();
|
||||
auto iter = vertex_format_map->find(decl);
|
||||
if (iter == vertex_format_map->end())
|
||||
{
|
||||
auto ipair = vertex_format_map->emplace(decl, std::make_unique<VertexFormat>(decl));
|
||||
iter = ipair.first;
|
||||
}
|
||||
|
||||
return static_cast<VertexFormat*>(iter->second.get());
|
||||
return m_input_state_info;
|
||||
}
|
||||
|
||||
void VertexFormat::MapAttributes()
|
||||
|
@ -136,9 +128,4 @@ void VertexFormat::AddAttribute(uint32_t location, uint32_t binding, VkFormat fo
|
|||
m_attribute_descriptions[m_num_attributes].offset = offset;
|
||||
m_num_attributes++;
|
||||
}
|
||||
|
||||
void VertexFormat::SetupVertexPointers()
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -16,24 +16,13 @@ class VertexFormat : public ::NativeVertexFormat
|
|||
public:
|
||||
VertexFormat(const PortableVertexDeclaration& in_vtx_decl);
|
||||
|
||||
// Creates or obtains a pointer to a VertexFormat representing decl.
|
||||
// If this results in a VertexFormat being created, if the game later uses a matching vertex
|
||||
// declaration, the one that was previously created will be used.
|
||||
static VertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl);
|
||||
|
||||
// Passed to pipeline state creation
|
||||
const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const
|
||||
{
|
||||
return m_input_state_info;
|
||||
}
|
||||
const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const;
|
||||
|
||||
// Converting PortableVertexDeclaration -> Vulkan types
|
||||
void MapAttributes();
|
||||
void SetupInputState();
|
||||
|
||||
// Not used in the Vulkan backend.
|
||||
void SetupVertexPointers() override;
|
||||
|
||||
private:
|
||||
void AddAttribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);
|
||||
|
||||
|
|
|
@ -236,6 +236,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
|
|||
config->backend_info.bSupportsMultithreading = true; // Assumed support.
|
||||
config->backend_info.bSupportsComputeShaders = true; // Assumed support.
|
||||
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
|
||||
config->backend_info.bSupportsBitfield = true; // Assumed support.
|
||||
config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support.
|
||||
config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support.
|
||||
config->backend_info.bSupportsPostProcessing = true; // Assumed support.
|
||||
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
|
||||
|
|
|
@ -253,6 +253,7 @@ bool VideoBackend::Initialize(void* window_handle)
|
|||
g_renderer.reset();
|
||||
StateTracker::DestroyInstance();
|
||||
g_framebuffer_manager.reset();
|
||||
g_shader_cache->Shutdown();
|
||||
g_shader_cache.reset();
|
||||
g_object_cache.reset();
|
||||
g_command_buffer_mgr.reset();
|
||||
|
@ -262,6 +263,14 @@ bool VideoBackend::Initialize(void* window_handle)
|
|||
return false;
|
||||
}
|
||||
|
||||
// Ensure all pipelines previously used by the game have been created.
|
||||
StateTracker::GetInstance()->ReloadPipelineUIDCache();
|
||||
|
||||
// Lastly, precompile ubershaders, if requested.
|
||||
// This has to be done after the texture cache and shader cache are initialized.
|
||||
if (g_ActiveConfig.CanPrecompileUberShaders())
|
||||
g_shader_cache->PrecompileUberShaders();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -293,6 +302,7 @@ void VideoBackend::Shutdown()
|
|||
void VideoBackend::Video_Cleanup()
|
||||
{
|
||||
g_command_buffer_mgr->WaitForGPUIdle();
|
||||
g_shader_cache->Shutdown();
|
||||
|
||||
// Save all cached pipelines out to disk for next time.
|
||||
if (g_ActiveConfig.bShaderCache)
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "VideoCommon/AsyncShaderCompiler.h"
|
||||
#include <thread>
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/Logging/Log.h"
|
||||
|
||||
namespace VideoCommon
|
||||
{
|
||||
AsyncShaderCompiler::AsyncShaderCompiler()
|
||||
{
|
||||
}
|
||||
|
||||
AsyncShaderCompiler::~AsyncShaderCompiler()
|
||||
{
|
||||
// Pending work can be left at shutdown.
|
||||
// The work item classes are expected to clean up after themselves.
|
||||
_assert_(!HasWorkerThreads());
|
||||
_assert_(m_completed_work.empty());
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::QueueWorkItem(WorkItemPtr item)
|
||||
{
|
||||
// If no worker threads are available, compile synchronously.
|
||||
if (!HasWorkerThreads())
|
||||
{
|
||||
item->Compile();
|
||||
m_completed_work.push_back(std::move(item));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_pending_work_lock);
|
||||
m_pending_work.push_back(std::move(item));
|
||||
m_worker_thread_wake.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::RetrieveWorkItems()
|
||||
{
|
||||
std::deque<WorkItemPtr> completed_work;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_completed_work_lock);
|
||||
m_completed_work.swap(completed_work);
|
||||
}
|
||||
|
||||
while (!completed_work.empty())
|
||||
{
|
||||
completed_work.front()->Retrieve();
|
||||
completed_work.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::HasPendingWork()
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_pending_work_lock);
|
||||
return !m_pending_work.empty() || m_busy_workers.load() != 0;
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::WaitUntilCompletion()
|
||||
{
|
||||
while (HasPendingWork())
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::WaitUntilCompletion(
|
||||
const std::function<void(size_t, size_t)>& progress_callback)
|
||||
{
|
||||
if (!HasPendingWork())
|
||||
return;
|
||||
|
||||
// Wait a second before opening a progress dialog.
|
||||
// This way, if the operation completes quickly, we don't annoy the user.
|
||||
constexpr u32 CHECK_INTERVAL_MS = 50;
|
||||
constexpr auto CHECK_INTERVAL = std::chrono::milliseconds(CHECK_INTERVAL_MS);
|
||||
for (u32 i = 0; i < (1000 / CHECK_INTERVAL_MS); i++)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(CHECK_INTERVAL));
|
||||
if (!HasPendingWork())
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab the number of pending items. We use this to work out how many are left.
|
||||
size_t total_items = 0;
|
||||
{
|
||||
// Safe to hold both locks here, since nowhere else does.
|
||||
std::lock_guard<std::mutex> pending_guard(m_pending_work_lock);
|
||||
std::lock_guard<std::mutex> completed_guard(m_completed_work_lock);
|
||||
total_items = m_completed_work.size() + m_pending_work.size() + m_busy_workers.load() + 1;
|
||||
}
|
||||
|
||||
// Update progress while the compiles complete.
|
||||
for (;;)
|
||||
{
|
||||
size_t remaining_items;
|
||||
{
|
||||
std::lock_guard<std::mutex> pending_guard(m_pending_work_lock);
|
||||
if (m_pending_work.empty() && !m_busy_workers.load())
|
||||
break;
|
||||
remaining_items = m_pending_work.size();
|
||||
}
|
||||
|
||||
progress_callback(total_items - remaining_items, total_items);
|
||||
std::this_thread::sleep_for(CHECK_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::StartWorkerThreads(u32 num_worker_threads)
|
||||
{
|
||||
if (num_worker_threads == 0)
|
||||
return true;
|
||||
|
||||
for (u32 i = 0; i < num_worker_threads; i++)
|
||||
{
|
||||
void* thread_param = nullptr;
|
||||
if (!WorkerThreadInitMainThread(&thread_param))
|
||||
{
|
||||
WARN_LOG(VIDEO, "Failed to initialize shader compiler worker thread.");
|
||||
break;
|
||||
}
|
||||
|
||||
m_worker_thread_start_result.store(false);
|
||||
|
||||
std::thread thr(&AsyncShaderCompiler::WorkerThreadEntryPoint, this, thread_param);
|
||||
m_init_event.Wait();
|
||||
|
||||
if (!m_worker_thread_start_result.load())
|
||||
{
|
||||
WARN_LOG(VIDEO, "Failed to start shader compiler worker thread.");
|
||||
thr.join();
|
||||
break;
|
||||
}
|
||||
|
||||
m_worker_threads.push_back(std::move(thr));
|
||||
}
|
||||
|
||||
return HasWorkerThreads();
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::ResizeWorkerThreads(u32 num_worker_threads)
|
||||
{
|
||||
if (m_worker_threads.size() == num_worker_threads)
|
||||
return true;
|
||||
|
||||
StopWorkerThreads();
|
||||
return StartWorkerThreads(num_worker_threads);
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::HasWorkerThreads() const
|
||||
{
|
||||
return !m_worker_threads.empty();
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::StopWorkerThreads()
|
||||
{
|
||||
if (!HasWorkerThreads())
|
||||
return;
|
||||
|
||||
// Signal worker threads to stop, and wake all of them.
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_pending_work_lock);
|
||||
m_exit_flag.Set();
|
||||
m_worker_thread_wake.notify_all();
|
||||
}
|
||||
|
||||
// Wait for worker threads to exit.
|
||||
for (std::thread& thr : m_worker_threads)
|
||||
thr.join();
|
||||
m_worker_threads.clear();
|
||||
m_exit_flag.Clear();
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::WorkerThreadInitMainThread(void** param)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AsyncShaderCompiler::WorkerThreadInitWorkerThread(void* param)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::WorkerThreadExit(void* param)
|
||||
{
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::WorkerThreadEntryPoint(void* param)
|
||||
{
|
||||
// Initialize worker thread with backend-specific method.
|
||||
if (!WorkerThreadInitWorkerThread(param))
|
||||
{
|
||||
WARN_LOG(VIDEO, "Failed to initialize shader compiler worker.");
|
||||
m_worker_thread_start_result.store(false);
|
||||
m_init_event.Set();
|
||||
return;
|
||||
}
|
||||
|
||||
m_worker_thread_start_result.store(true);
|
||||
m_init_event.Set();
|
||||
|
||||
WorkerThreadRun();
|
||||
|
||||
WorkerThreadExit(param);
|
||||
}
|
||||
|
||||
void AsyncShaderCompiler::WorkerThreadRun()
|
||||
{
|
||||
std::unique_lock<std::mutex> pending_lock(m_pending_work_lock);
|
||||
while (!m_exit_flag.IsSet())
|
||||
{
|
||||
m_worker_thread_wake.wait(pending_lock);
|
||||
|
||||
while (!m_pending_work.empty() && !m_exit_flag.IsSet())
|
||||
{
|
||||
m_busy_workers++;
|
||||
WorkItemPtr item(std::move(m_pending_work.front()));
|
||||
m_pending_work.pop_front();
|
||||
pending_lock.unlock();
|
||||
|
||||
if (item->Compile())
|
||||
{
|
||||
std::lock_guard<std::mutex> completed_guard(m_completed_work_lock);
|
||||
m_completed_work.push_back(std::move(item));
|
||||
}
|
||||
|
||||
pending_lock.lock();
|
||||
m_busy_workers--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Event.h"
|
||||
#include "Common/Flag.h"
|
||||
|
||||
namespace VideoCommon
|
||||
{
|
||||
class AsyncShaderCompiler
|
||||
{
|
||||
public:
|
||||
class WorkItem
|
||||
{
|
||||
public:
|
||||
virtual ~WorkItem() = default;
|
||||
virtual bool Compile() = 0;
|
||||
virtual void Retrieve() = 0;
|
||||
};
|
||||
|
||||
using WorkItemPtr = std::unique_ptr<WorkItem>;
|
||||
|
||||
AsyncShaderCompiler();
|
||||
virtual ~AsyncShaderCompiler();
|
||||
|
||||
template <typename T, typename... Params>
|
||||
static WorkItemPtr CreateWorkItem(Params... params)
|
||||
{
|
||||
return std::unique_ptr<WorkItem>(new T(params...));
|
||||
}
|
||||
|
||||
void QueueWorkItem(WorkItemPtr item);
|
||||
void RetrieveWorkItems();
|
||||
bool HasPendingWork();
|
||||
|
||||
// Simpler version without progress updates.
|
||||
void WaitUntilCompletion();
|
||||
|
||||
// Calls progress_callback periodically, with completed_items, and total_items.
|
||||
void WaitUntilCompletion(const std::function<void(size_t, size_t)>& progress_callback);
|
||||
|
||||
// Needed because of calling virtual methods in shutdown procedure.
|
||||
bool StartWorkerThreads(u32 num_worker_threads);
|
||||
bool ResizeWorkerThreads(u32 num_worker_threads);
|
||||
bool HasWorkerThreads() const;
|
||||
void StopWorkerThreads();
|
||||
|
||||
protected:
|
||||
virtual bool WorkerThreadInitMainThread(void** param);
|
||||
virtual bool WorkerThreadInitWorkerThread(void* param);
|
||||
virtual void WorkerThreadExit(void* param);
|
||||
|
||||
private:
|
||||
void WorkerThreadEntryPoint(void* param);
|
||||
void WorkerThreadRun();
|
||||
|
||||
Common::Flag m_exit_flag;
|
||||
Common::Event m_init_event;
|
||||
|
||||
std::vector<std::thread> m_worker_threads;
|
||||
std::atomic_bool m_worker_thread_start_result{false};
|
||||
|
||||
std::deque<WorkItemPtr> m_pending_work;
|
||||
std::mutex m_pending_work_lock;
|
||||
std::condition_variable m_worker_thread_wake;
|
||||
std::atomic_size_t m_busy_workers{0};
|
||||
|
||||
std::deque<WorkItemPtr> m_completed_work;
|
||||
std::mutex m_completed_work_lock;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -24,8 +24,7 @@ float FogParam0::GetA() const
|
|||
float FogParam3::GetC() const
|
||||
{
|
||||
// scale mantissa from 11 to 23 bits
|
||||
const u32 integral = (static_cast<u32>(c_sign) << 31) | (static_cast<u32>(c_exp) << 23) |
|
||||
(static_cast<u32>(c_mant) << 12);
|
||||
const u32 integral = (c_sign.Value() << 31) | (c_exp.Value() << 23) | (c_mant.Value() << 12);
|
||||
|
||||
float real;
|
||||
std::memcpy(&real, &integral, sizeof(u32));
|
||||
|
|
|
@ -301,40 +301,37 @@ struct TevStageCombiner
|
|||
{
|
||||
union ColorCombiner
|
||||
{
|
||||
struct // abc=8bit,d=10bit
|
||||
{
|
||||
u32 d : 4; // TEVSELCC_X
|
||||
u32 c : 4; // TEVSELCC_X
|
||||
u32 b : 4; // TEVSELCC_X
|
||||
u32 a : 4; // TEVSELCC_X
|
||||
// abc=8bit,d=10bit
|
||||
BitField<0, 4, u32> d; // TEVSELCC_X
|
||||
BitField<4, 4, u32> c; // TEVSELCC_X
|
||||
BitField<8, 4, u32> b; // TEVSELCC_X
|
||||
BitField<12, 4, u32> a; // TEVSELCC_X
|
||||
|
||||
u32 bias : 2;
|
||||
u32 op : 1;
|
||||
u32 clamp : 1;
|
||||
BitField<16, 2, u32> bias;
|
||||
BitField<18, 1, u32> op;
|
||||
BitField<19, 1, u32> clamp;
|
||||
|
||||
BitField<20, 2, u32> shift;
|
||||
BitField<22, 2, u32> dest; // 1,2,3
|
||||
|
||||
u32 shift : 2;
|
||||
u32 dest : 2; // 1,2,3
|
||||
};
|
||||
u32 hex;
|
||||
};
|
||||
union AlphaCombiner
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 rswap : 2;
|
||||
u32 tswap : 2;
|
||||
u32 d : 3; // TEVSELCA_
|
||||
u32 c : 3; // TEVSELCA_
|
||||
u32 b : 3; // TEVSELCA_
|
||||
u32 a : 3; // TEVSELCA_
|
||||
BitField<0, 2, u32> rswap;
|
||||
BitField<2, 2, u32> tswap;
|
||||
BitField<4, 3, u32> d; // TEVSELCA_
|
||||
BitField<7, 3, u32> c; // TEVSELCA_
|
||||
BitField<10, 3, u32> b; // TEVSELCA_
|
||||
BitField<13, 3, u32> a; // TEVSELCA_
|
||||
|
||||
u32 bias : 2; // GXTevBias
|
||||
u32 op : 1;
|
||||
u32 clamp : 1;
|
||||
BitField<16, 2, u32> bias; // GXTevBias
|
||||
BitField<18, 1, u32> op;
|
||||
BitField<19, 1, u32> clamp;
|
||||
|
||||
BitField<20, 2, u32> shift;
|
||||
BitField<22, 2, u32> dest; // 1,2,3
|
||||
|
||||
u32 shift : 2;
|
||||
u32 dest : 2; // 1,2,3
|
||||
};
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
@ -353,21 +350,18 @@ struct TevStageCombiner
|
|||
|
||||
union TevStageIndirect
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 bt : 2; // Indirect tex stage ID
|
||||
u32 fmt : 2; // Format: ITF_X
|
||||
u32 bias : 3; // ITB_X
|
||||
u32 bs : 2; // ITBA_X, indicates which coordinate will become the 'bump alpha'
|
||||
u32 mid : 4; // Matrix ID to multiply offsets with
|
||||
u32 sw : 3; // ITW_X, wrapping factor for S of regular coord
|
||||
u32 tw : 3; // ITW_X, wrapping factor for T of regular coord
|
||||
u32 lb_utclod : 1; // Use modified or unmodified texture coordinates for LOD computation
|
||||
u32 fb_addprev : 1; // 1 if the texture coordinate results from the previous TEV stage should
|
||||
// be added
|
||||
u32 pad0 : 3;
|
||||
u32 rid : 8;
|
||||
};
|
||||
BitField<0, 2, u32> bt; // Indirect tex stage ID
|
||||
BitField<2, 2, u32> fmt; // Format: ITF_X
|
||||
BitField<4, 3, u32> bias; // ITB_X
|
||||
BitField<7, 2, u32> bs; // ITBA_X, indicates which coordinate will become the 'bump alpha'
|
||||
BitField<9, 4, u32> mid; // Matrix ID to multiply offsets with
|
||||
BitField<13, 3, u32> sw; // ITW_X, wrapping factor for S of regular coord
|
||||
BitField<16, 3, u32> tw; // ITW_X, wrapping factor for T of regular coord
|
||||
BitField<19, 1, u32> lb_utclod; // Use modified or unmodified texture
|
||||
// coordinates for LOD computation
|
||||
BitField<20, 1, u32> fb_addprev; // 1 if the texture coordinate results from the previous TEV
|
||||
// stage should be added
|
||||
|
||||
struct
|
||||
{
|
||||
u32 hex : 21;
|
||||
|
@ -381,28 +375,23 @@ union TevStageIndirect
|
|||
|
||||
union TwoTevStageOrders
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 texmap0 : 3; // Indirect tex stage texmap
|
||||
u32 texcoord0 : 3;
|
||||
u32 enable0 : 1; // 1 if should read from texture
|
||||
u32 colorchan0 : 3; // RAS1_CC_X
|
||||
BitField<0, 3, u32> texmap0; // Indirect tex stage texmap
|
||||
BitField<3, 3, u32> texcoord0;
|
||||
BitField<6, 1, u32> enable0; // 1 if should read from texture
|
||||
BitField<7, 3, u32> colorchan0; // RAS1_CC_X
|
||||
|
||||
u32 pad0 : 2;
|
||||
BitField<12, 3, u32> texmap1;
|
||||
BitField<15, 3, u32> texcoord1;
|
||||
BitField<18, 1, u32> enable1; // 1 if should read from texture
|
||||
BitField<19, 3, u32> colorchan1; // RAS1_CC_X
|
||||
|
||||
u32 texmap1 : 3;
|
||||
u32 texcoord1 : 3;
|
||||
u32 enable1 : 1; // 1 if should read from texture
|
||||
u32 colorchan1 : 3; // RAS1_CC_X
|
||||
BitField<24, 8, u32> rid;
|
||||
|
||||
u32 pad1 : 2;
|
||||
u32 rid : 8;
|
||||
};
|
||||
u32 hex;
|
||||
int getTexMap(int i) const { return i ? texmap1 : texmap0; }
|
||||
int getTexCoord(int i) const { return i ? texcoord1 : texcoord0; }
|
||||
int getEnable(int i) const { return i ? enable1 : enable0; }
|
||||
int getColorChan(int i) const { return i ? colorchan1 : colorchan0; }
|
||||
u32 getTexMap(int i) const { return i ? texmap1.Value() : texmap0.Value(); }
|
||||
u32 getTexCoord(int i) const { return i ? texcoord1.Value() : texcoord0.Value(); }
|
||||
u32 getEnable(int i) const { return i ? enable1.Value() : enable0.Value(); }
|
||||
u32 getColorChan(int i) const { return i ? colorchan1.Value() : colorchan0.Value(); }
|
||||
};
|
||||
|
||||
union TEXSCALE
|
||||
|
@ -527,20 +516,14 @@ union TexTLUT
|
|||
|
||||
union ZTex1
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 bias : 24;
|
||||
};
|
||||
BitField<0, 24, u32> bias;
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
union ZTex2
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 type : 2; // TEV_Z_TYPE_X
|
||||
u32 op : 2; // GXZTexOp
|
||||
};
|
||||
BitField<0, 2, u32> type; // TEV_Z_TYPE_X
|
||||
BitField<2, 2, u32> op; // GXZTexOp
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
@ -681,14 +664,12 @@ union FogParam0
|
|||
|
||||
union FogParam3
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 c_mant : 11;
|
||||
u32 c_exp : 8;
|
||||
u32 c_sign : 1;
|
||||
u32 proj : 1; // 0 - perspective, 1 - orthographic
|
||||
u32 fsel : 3; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 - backward exp, 7 - backward exp2
|
||||
};
|
||||
BitField<0, 11, u32> c_mant;
|
||||
BitField<11, 8, u32> c_exp;
|
||||
BitField<19, 1, u32> c_sign;
|
||||
BitField<20, 1, u32> proj; // 0 - perspective, 1 - orthographic
|
||||
BitField<21, 3, u32> fsel; // 0 - off, 2 - linear, 4 - exp, 5 - exp2, 6 -
|
||||
// backward exp, 7 - backward exp2
|
||||
|
||||
// amount to subtract from eyespacez after range adjustment
|
||||
float GetC() const;
|
||||
|
@ -698,15 +679,12 @@ union FogParam3
|
|||
|
||||
union FogRangeKElement
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 HI : 12;
|
||||
u32 LO : 12;
|
||||
u32 regid : 8;
|
||||
};
|
||||
BitField<0, 12, u32> HI;
|
||||
BitField<12, 12, u32> LO;
|
||||
BitField<24, 8, u32> regid;
|
||||
|
||||
// TODO: Which scaling coefficient should we use here? This is just a guess!
|
||||
float GetValue(int i) const { return (i ? HI : LO) / 256.f; }
|
||||
float GetValue(int i) const { return (i ? HI.Value() : LO.Value()) / 256.f; }
|
||||
u32 HEX;
|
||||
};
|
||||
|
||||
|
@ -714,13 +692,9 @@ struct FogRangeParams
|
|||
{
|
||||
union RangeBase
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 Center : 10; // viewport center + 342
|
||||
u32 Enabled : 1;
|
||||
u32 unused : 13;
|
||||
u32 regid : 8;
|
||||
};
|
||||
BitField<0, 10, u32> Center; // viewport center + 342
|
||||
BitField<10, 1, u32> Enabled;
|
||||
BitField<24, 8, u32> regid;
|
||||
u32 hex;
|
||||
};
|
||||
RangeBase Base;
|
||||
|
@ -736,12 +710,9 @@ struct FogParams
|
|||
|
||||
union FogColor
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 b : 8;
|
||||
u32 g : 8;
|
||||
u32 r : 8;
|
||||
};
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
@ -771,11 +742,8 @@ union ZMode
|
|||
|
||||
union ConstantAlpha
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 alpha : 8;
|
||||
u32 enable : 1;
|
||||
};
|
||||
BitField<0, 8, u32> alpha;
|
||||
BitField<8, 1, u32> enable;
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
@ -881,19 +849,16 @@ union TevReg
|
|||
|
||||
union TevKSel
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 swap1 : 2;
|
||||
u32 swap2 : 2;
|
||||
u32 kcsel0 : 5;
|
||||
u32 kasel0 : 5;
|
||||
u32 kcsel1 : 5;
|
||||
u32 kasel1 : 5;
|
||||
};
|
||||
BitField<0, 2, u32> swap1;
|
||||
BitField<2, 2, u32> swap2;
|
||||
BitField<4, 5, u32> kcsel0;
|
||||
BitField<9, 5, u32> kasel0;
|
||||
BitField<14, 5, u32> kcsel1;
|
||||
BitField<19, 5, u32> kasel1;
|
||||
u32 hex;
|
||||
|
||||
int getKC(int i) const { return i ? kcsel1 : kcsel0; }
|
||||
int getKA(int i) const { return i ? kasel1 : kasel0; }
|
||||
u32 getKC(int i) const { return i ? kcsel1.Value() : kcsel0.Value(); }
|
||||
u32 getKA(int i) const { return i ? kasel1.Value() : kasel0.Value(); }
|
||||
};
|
||||
|
||||
union AlphaTest
|
||||
|
|
|
@ -93,6 +93,9 @@ static void BPWritten(const BPCmd& bp)
|
|||
(u32)bpmem.genMode.cullmode, (u32)bpmem.genMode.numindstages,
|
||||
(u32)bpmem.genMode.zfreeze);
|
||||
|
||||
if (bp.changes)
|
||||
PixelShaderManager::SetGenModeChanged();
|
||||
|
||||
// Only call SetGenerationMode when cull mode changes.
|
||||
if (bp.changes & 0xC000)
|
||||
SetGenerationMode();
|
||||
|
@ -155,12 +158,20 @@ static void BPWritten(const BPCmd& bp)
|
|||
// Set Color Mask
|
||||
if (bp.changes & 0x18) // colorupdate | alphaupdate
|
||||
SetColorMask();
|
||||
|
||||
// Dither
|
||||
if (bp.changes & 0x04)
|
||||
PixelShaderManager::SetBlendModeChanged();
|
||||
}
|
||||
return;
|
||||
case BPMEM_CONSTANTALPHA: // Set Destination Alpha
|
||||
PRIM_LOG("constalpha: alp=%d, en=%d", bpmem.dstalpha.alpha, bpmem.dstalpha.enable);
|
||||
if (bp.changes & 0xFF)
|
||||
PixelShaderManager::SetDestAlpha();
|
||||
PRIM_LOG("constalpha: alp=%d, en=%d", bpmem.dstalpha.alpha.Value(),
|
||||
bpmem.dstalpha.enable.Value());
|
||||
if (bp.changes)
|
||||
{
|
||||
PixelShaderManager::SetAlpha();
|
||||
PixelShaderManager::SetDestAlphaChanged();
|
||||
}
|
||||
if (bp.changes & 0x100)
|
||||
SetBlendMode();
|
||||
return;
|
||||
|
@ -237,6 +248,7 @@ static void BPWritten(const BPCmd& bp)
|
|||
// the number of lines copied is determined by the y scale * source efb height
|
||||
|
||||
BoundingBox::active = false;
|
||||
PixelShaderManager::SetBoundingBoxActive(false);
|
||||
|
||||
float yScale;
|
||||
if (PE_copy.scale_invert)
|
||||
|
@ -317,12 +329,13 @@ static void BPWritten(const BPCmd& bp)
|
|||
PixelShaderManager::SetAlpha();
|
||||
if (bp.changes)
|
||||
{
|
||||
PixelShaderManager::SetAlphaTestChanged();
|
||||
g_renderer->SetColorMask();
|
||||
SetBlendMode();
|
||||
}
|
||||
return;
|
||||
case BPMEM_BIAS: // BIAS
|
||||
PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias);
|
||||
PRIM_LOG("ztex bias=0x%x", bpmem.ztex1.bias.Value());
|
||||
if (bp.changes)
|
||||
PixelShaderManager::SetZTextureBias();
|
||||
return;
|
||||
|
@ -331,7 +344,7 @@ static void BPWritten(const BPCmd& bp)
|
|||
if (bp.changes & 3)
|
||||
PixelShaderManager::SetZTextureTypeChanged();
|
||||
if (bp.changes & 12)
|
||||
VertexShaderManager::SetViewportChanged();
|
||||
PixelShaderManager::SetZTextureOpChanged();
|
||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||
const char* pzop[] = {"DISABLE", "ADD", "REPLACE", "?"};
|
||||
const char* pztype[] = {"Z8", "Z16", "Z24", "?"};
|
||||
|
@ -389,6 +402,7 @@ static void BPWritten(const BPCmd& bp)
|
|||
{
|
||||
u8 offset = bp.address & 2;
|
||||
BoundingBox::active = true;
|
||||
PixelShaderManager::SetBoundingBoxActive(true);
|
||||
|
||||
if (g_ActiveConfig.backend_info.bSupportsBBox && g_ActiveConfig.bBBoxEnable)
|
||||
{
|
||||
|
@ -425,6 +439,11 @@ static void BPWritten(const BPCmd& bp)
|
|||
* 3 BC0 - Ind. Tex Stage 0 NTexCoord
|
||||
* 0 BI0 - Ind. Tex Stage 0 NTexMap */
|
||||
case BPMEM_IREF:
|
||||
{
|
||||
if (bp.changes)
|
||||
PixelShaderManager::SetTevIndirectChanged();
|
||||
return;
|
||||
}
|
||||
|
||||
case BPMEM_TEV_KSEL: // Texture Environment Swap Mode Table 0
|
||||
case BPMEM_TEV_KSEL + 1: // Texture Environment Swap Mode Table 1
|
||||
|
@ -434,6 +453,8 @@ static void BPWritten(const BPCmd& bp)
|
|||
case BPMEM_TEV_KSEL + 5: // Texture Environment Swap Mode Table 5
|
||||
case BPMEM_TEV_KSEL + 6: // Texture Environment Swap Mode Table 6
|
||||
case BPMEM_TEV_KSEL + 7: // Texture Environment Swap Mode Table 7
|
||||
PixelShaderManager::SetTevKSel(bp.address - BPMEM_TEV_KSEL, bp.newvalue);
|
||||
return;
|
||||
|
||||
/* This Register can be used to limit to which bits of BP registers is
|
||||
* actually written to. The mask is only valid for the next BP write,
|
||||
|
@ -566,6 +587,7 @@ static void BPWritten(const BPCmd& bp)
|
|||
// -------------------------
|
||||
case BPMEM_TREF:
|
||||
case BPMEM_TREF + 4:
|
||||
PixelShaderManager::SetTevOrder(bp.address - BPMEM_TREF, bp.newvalue);
|
||||
return;
|
||||
// ----------------------
|
||||
// Set wrap size
|
||||
|
@ -629,15 +651,18 @@ static void BPWritten(const BPCmd& bp)
|
|||
// --------------
|
||||
// Indirect Tev
|
||||
// --------------
|
||||
case BPMEM_IND_CMD: // Indirect 0-15
|
||||
case BPMEM_IND_CMD:
|
||||
PixelShaderManager::SetTevIndirectChanged();
|
||||
return;
|
||||
// --------------------------------------------------
|
||||
// Set Color/Alpha of a Tev
|
||||
// BPMEM_TEV_COLOR_ENV - Dest, Shift, Clamp, Sub, Bias, Sel A, Sel B, Sel C, Sel D
|
||||
// BPMEM_TEV_ALPHA_ENV - Dest, Shift, Clamp, Sub, Bias, Sel A, Sel B, Sel C, Sel D, T Swap, R Swap
|
||||
// --------------------------------------------------
|
||||
case BPMEM_TEV_COLOR_ENV: // Texture Environment Color/Alpha 0-7
|
||||
case BPMEM_TEV_COLOR_ENV + 16: // Texture Environment Color/Alpha 8-15
|
||||
case BPMEM_TEV_COLOR_ENV: // Texture Environment 1
|
||||
case BPMEM_TEV_COLOR_ENV + 16:
|
||||
PixelShaderManager::SetTevCombiner((bp.address - BPMEM_TEV_COLOR_ENV) >> 1,
|
||||
(bp.address - BPMEM_TEV_COLOR_ENV) & 1, bp.newvalue);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
|
@ -1281,7 +1306,7 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
|
|||
"Tex sel: %d\n",
|
||||
(data[0] - BPMEM_TEV_ALPHA_ENV) / 2, tevin[ac.a], tevin[ac.b], tevin[ac.c],
|
||||
tevin[ac.d], tevbias[ac.bias], tevop[ac.op], no_yes[ac.clamp],
|
||||
tevscale[ac.shift], tevout[ac.dest], ac.rswap, ac.tswap);
|
||||
tevscale[ac.shift], tevout[ac.dest], ac.rswap.Value(), ac.tswap.Value());
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
set(SRCS
|
||||
AbstractTexture.cpp
|
||||
AsyncRequests.cpp
|
||||
AsyncShaderCompiler.cpp
|
||||
BoundingBox.cpp
|
||||
BPFunctions.cpp
|
||||
BPMemory.cpp
|
||||
|
@ -31,6 +32,9 @@ set(SRCS
|
|||
RenderState.cpp
|
||||
ShaderGenCommon.cpp
|
||||
Statistics.cpp
|
||||
UberShaderCommon.cpp
|
||||
UberShaderPixel.cpp
|
||||
UberShaderVertex.cpp
|
||||
TextureCacheBase.cpp
|
||||
TextureConfig.cpp
|
||||
TextureConversionShader.cpp
|
||||
|
|
|
@ -24,11 +24,31 @@ struct PixelShaderConstants
|
|||
int4 fogi;
|
||||
float4 fogf[2];
|
||||
float4 zslope;
|
||||
float4 efbscale;
|
||||
float efbscale[2];
|
||||
|
||||
// Constants from here onwards are only used in ubershaders.
|
||||
u32 genmode; // .z
|
||||
u32 alphaTest; // .w
|
||||
u32 fogParam3; // .x
|
||||
u32 fogRangeBase; // .y
|
||||
u32 dstalpha; // .z
|
||||
u32 ztex_op; // .w
|
||||
u32 early_ztest; // .x (bool)
|
||||
u32 rgba6_format; // .y (bool)
|
||||
u32 dither; // .z (bool)
|
||||
u32 bounding_box; // .w (bool)
|
||||
uint4 pack1[16]; // .xy - combiners, .z - tevind, .w - iref
|
||||
uint4 pack2[8]; // .x - tevorder, .y - tevksel
|
||||
int4 konst[32]; // .rgba
|
||||
};
|
||||
|
||||
struct VertexShaderConstants
|
||||
{
|
||||
u32 components; // .x
|
||||
u32 xfmem_dualTexInfo; // .y
|
||||
u32 xfmem_numColorChans; // .z
|
||||
u32 pad1; // .w
|
||||
|
||||
float4 posnormalmatrix[6];
|
||||
float4 projection[4];
|
||||
int4 materials[4];
|
||||
|
@ -45,7 +65,10 @@ struct VertexShaderConstants
|
|||
float4 normalmatrices[32];
|
||||
float4 posttransformmatrices[64];
|
||||
float4 pixelcentercorrection;
|
||||
float4 viewport;
|
||||
float viewport[2]; // .xy
|
||||
float pad2[2]; // .zw
|
||||
|
||||
uint4 xfmem_pack1[8]; // .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha
|
||||
};
|
||||
|
||||
struct GeometryShaderConstants
|
||||
|
|
|
@ -98,7 +98,10 @@ static BugInfo m_known_bugs[] = {
|
|||
BUG_BROKEN_BITWISE_OP_NEGATION, -1.0, -1.0, true},
|
||||
{API_VULKAN, OS_ALL, VENDOR_ATI, DRIVER_ATI, Family::UNKNOWN, BUG_PRIMITIVE_RESTART, -1.0, -1.0,
|
||||
true},
|
||||
};
|
||||
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_I965, Family::UNKNOWN,
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true},
|
||||
{API_OPENGL, OS_LINUX, VENDOR_MESA, DRIVER_NOUVEAU, Family::UNKNOWN,
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION, -1.0, -1.0, true}};
|
||||
|
||||
static std::map<Bug, BugInfo> m_bugs;
|
||||
|
||||
|
|
|
@ -247,6 +247,12 @@ enum Bug
|
|||
// fail compilation with no useful diagnostic log. This can be worked around by storing
|
||||
// the negated value to a temporary variable then using that in the bitwise op.
|
||||
BUG_BROKEN_BITWISE_OP_NEGATION,
|
||||
|
||||
// Bug: Shaders are recompiled on the main thread after being previously compiled on
|
||||
// a worker thread on Mesa i965.
|
||||
// Started version: -1
|
||||
// Ended Version: -1
|
||||
BUG_SHARED_CONTEXT_SHADER_COMPILATION,
|
||||
};
|
||||
|
||||
// Initializes our internal vendor, device family, and driver version
|
||||
|
|
|
@ -364,3 +364,23 @@ static void EndPrimitive(ShaderCode& out, const ShaderHostConfig& host_config,
|
|||
else
|
||||
out.Write("\toutput.RestartStrip();\n");
|
||||
}
|
||||
|
||||
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback)
|
||||
{
|
||||
GeometryShaderUid uid;
|
||||
std::memset(&uid, 0, sizeof(uid));
|
||||
|
||||
static constexpr std::array<u32, 3> primitive_lut = {
|
||||
{PRIMITIVE_TRIANGLES, PRIMITIVE_LINES, PRIMITIVE_POINTS}};
|
||||
for (u32 primitive : primitive_lut)
|
||||
{
|
||||
auto* guid = uid.GetUidData<geometry_shader_uid_data>();
|
||||
guid->primitive_type = primitive;
|
||||
|
||||
for (u32 texgens = 0; texgens <= 8; texgens++)
|
||||
{
|
||||
guid->numTexGens = texgens;
|
||||
callback(uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/ShaderGenCommon.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
|
@ -28,3 +29,4 @@ typedef ShaderUid<geometry_shader_uid_data> GeometryShaderUid;
|
|||
ShaderCode GenerateGeometryShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
|
||||
const geometry_shader_uid_data* uid_data);
|
||||
GeometryShaderUid GetGeometryShaderUid(u32 primitive_type);
|
||||
void EnumerateGeometryShaderUids(const std::function<void(const GeometryShaderUid&)>& callback);
|
||||
|
|
|
@ -194,9 +194,6 @@ void VideoBackendBase::InitializeShared()
|
|||
g_Config.UpdateProjectionHack();
|
||||
g_Config.VerifyValidity();
|
||||
UpdateActiveConfig();
|
||||
|
||||
// Notify the core that the video backend is ready
|
||||
Host_Message(WM_USER_CREATE);
|
||||
}
|
||||
|
||||
void VideoBackendBase::ShutdownShared()
|
||||
|
|
|
@ -106,8 +106,6 @@ class NativeVertexFormat : NonCopyable
|
|||
{
|
||||
public:
|
||||
virtual ~NativeVertexFormat() {}
|
||||
virtual void SetupVertexPointers() = 0;
|
||||
|
||||
u32 GetVertexStride() const { return vtx_decl.stride; }
|
||||
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
|
||||
protected:
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "VideoCommon/CommandProcessor.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/PixelEngine.h"
|
||||
#include "VideoCommon/PixelShaderManager.h"
|
||||
|
||||
namespace PixelEngine
|
||||
{
|
||||
|
@ -231,6 +232,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
{
|
||||
mmio->Register(base | (PE_BBOX_LEFT + 2 * i), MMIO::ComplexRead<u16>([i](u32) {
|
||||
BoundingBox::active = false;
|
||||
PixelShaderManager::SetBoundingBoxActive(false);
|
||||
return g_video_backend->Video_GetBoundingBox(i);
|
||||
}),
|
||||
MMIO::InvalidWrite<u16>());
|
||||
|
|
|
@ -179,7 +179,7 @@ PixelShaderUid GetPixelShaderUid()
|
|||
u32 numStages = uid_data->genMode_numtevstages + 1;
|
||||
|
||||
const bool forced_early_z =
|
||||
g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() &&
|
||||
bpmem.UseEarlyDepthTest() &&
|
||||
(g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
|
||||
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
|
||||
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
|
||||
|
@ -192,18 +192,6 @@ PixelShaderUid GetPixelShaderUid()
|
|||
uid_data->per_pixel_depth = per_pixel_depth;
|
||||
uid_data->forced_early_z = forced_early_z;
|
||||
|
||||
if (!uid_data->forced_early_z && bpmem.UseEarlyDepthTest() &&
|
||||
(!g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED))
|
||||
{
|
||||
static bool warn_once = true;
|
||||
if (warn_once)
|
||||
WARN_LOG(VIDEO, "Early z test enabled but not possible to emulate with current "
|
||||
"configuration. Make sure to enable fast depth calculations. If this message "
|
||||
"still shows up your hardware isn't able to emulate the feature properly (a "
|
||||
"GPU with D3D 11.0 / OGL 4.2 support is required).");
|
||||
warn_once = false;
|
||||
}
|
||||
|
||||
if (g_ActiveConfig.bEnablePixelLighting)
|
||||
{
|
||||
// The lighting shader only needs the two color bits of the 23bit component bit array.
|
||||
|
@ -333,6 +321,110 @@ PixelShaderUid GetPixelShaderUid()
|
|||
return out;
|
||||
}
|
||||
|
||||
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
|
||||
bool per_pixel_lighting, bool bounding_box)
|
||||
{
|
||||
// dot product for integer vectors
|
||||
out.Write("int idot(int3 x, int3 y)\n"
|
||||
"{\n"
|
||||
"\tint3 tmp = x * y;\n"
|
||||
"\treturn tmp.x + tmp.y + tmp.z;\n"
|
||||
"}\n");
|
||||
|
||||
out.Write("int idot(int4 x, int4 y)\n"
|
||||
"{\n"
|
||||
"\tint4 tmp = x * y;\n"
|
||||
"\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
|
||||
"}\n\n");
|
||||
|
||||
// rounding + casting to integer at once in a single function
|
||||
out.Write("int iround(float x) { return int (round(x)); }\n"
|
||||
"int2 iround(float2 x) { return int2(round(x)); }\n"
|
||||
"int3 iround(float3 x) { return int3(round(x)); }\n"
|
||||
"int4 iround(float4 x) { return int4(round(x)); }\n\n");
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
// Declare samplers
|
||||
out.Write("SamplerState samp[8] : register(s0);\n");
|
||||
out.Write("\n");
|
||||
out.Write("Texture2DArray Tex[8] : register(t0);\n");
|
||||
}
|
||||
out.Write("\n");
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n");
|
||||
else
|
||||
out.Write("cbuffer PSBlock : register(b0) {\n");
|
||||
|
||||
out.Write("\tint4 " I_COLORS "[4];\n"
|
||||
"\tint4 " I_KCOLORS "[4];\n"
|
||||
"\tint4 " I_ALPHA ";\n"
|
||||
"\tfloat4 " I_TEXDIMS "[8];\n"
|
||||
"\tint4 " I_ZBIAS "[2];\n"
|
||||
"\tint4 " I_INDTEXSCALE "[2];\n"
|
||||
"\tint4 " I_INDTEXMTX "[6];\n"
|
||||
"\tint4 " I_FOGCOLOR ";\n"
|
||||
"\tint4 " I_FOGI ";\n"
|
||||
"\tfloat4 " I_FOGF "[2];\n"
|
||||
"\tfloat4 " I_ZSLOPE ";\n"
|
||||
"\tfloat2 " I_EFBSCALE ";\n"
|
||||
"\tuint bpmem_genmode;\n"
|
||||
"\tuint bpmem_alphaTest;\n"
|
||||
"\tuint bpmem_fogParam3;\n"
|
||||
"\tuint bpmem_fogRangeBase;\n"
|
||||
"\tuint bpmem_dstalpha;\n"
|
||||
"\tuint bpmem_ztex_op;\n"
|
||||
"\tbool bpmem_early_ztest;\n"
|
||||
"\tbool bpmem_rgba6_format;\n"
|
||||
"\tbool bpmem_dither;\n"
|
||||
"\tbool bpmem_bounding_box;\n"
|
||||
"\tuint4 bpmem_pack1[16];\n" // .xy - combiners, .z - tevind
|
||||
"\tuint4 bpmem_pack2[8];\n" // .x - tevorder, .y - tevksel
|
||||
"\tint4 konstLookup[32];\n"
|
||||
"};\n\n");
|
||||
out.Write("#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)\n"
|
||||
"#define bpmem_tevind(i) (bpmem_pack1[(i)].z)\n"
|
||||
"#define bpmem_iref(i) (bpmem_pack1[(i)].w)\n"
|
||||
"#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)\n"
|
||||
"#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)\n\n");
|
||||
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
out.Write("%s", s_lighting_struct);
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
|
||||
else
|
||||
out.Write("cbuffer VSBlock : register(b1) {\n");
|
||||
|
||||
out.Write(s_shader_uniforms);
|
||||
out.Write("};\n");
|
||||
}
|
||||
|
||||
if (bounding_box)
|
||||
{
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("SSBO_BINDING(0) buffer BBox {\n"
|
||||
"\tint4 bbox_data;\n"
|
||||
"};\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
|
||||
}
|
||||
}
|
||||
|
||||
out.Write("struct VS_OUTPUT {\n");
|
||||
GenerateVSOutputMembers(out, ApiType, num_texgens, per_pixel_lighting, "");
|
||||
out.Write("};\n");
|
||||
}
|
||||
|
||||
static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n,
|
||||
APIType ApiType, bool stereo);
|
||||
static void WriteTevRegular(ShaderCode& out, const char* components, int bias, int op, int clamp,
|
||||
|
@ -360,100 +452,11 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
|
|||
out.Write("//%i TEV stages, %i texgens, %i IND stages\n", numStages, uid_data->genMode_numtexgens,
|
||||
uid_data->genMode_numindstages);
|
||||
|
||||
// dot product for integer vectors
|
||||
out.Write("int idot(int3 x, int3 y)\n"
|
||||
"{\n"
|
||||
"\tint3 tmp = x * y;\n"
|
||||
"\treturn tmp.x + tmp.y + tmp.z;\n"
|
||||
"}\n");
|
||||
// Stuff that is shared between ubershaders and pixelgen.
|
||||
WritePixelShaderCommonHeader(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting,
|
||||
uid_data->bounding_box);
|
||||
|
||||
out.Write("int idot(int4 x, int4 y)\n"
|
||||
"{\n"
|
||||
"\tint4 tmp = x * y;\n"
|
||||
"\treturn tmp.x + tmp.y + tmp.z + tmp.w;\n"
|
||||
"}\n\n");
|
||||
|
||||
// rounding + casting to integer at once in a single function
|
||||
out.Write("int iround(float x) { return int (round(x)); }\n"
|
||||
"int2 iround(float2 x) { return int2(round(x)); }\n"
|
||||
"int3 iround(float3 x) { return int3(round(x)); }\n"
|
||||
"int4 iround(float4 x) { return int4(round(x)); }\n\n");
|
||||
|
||||
if (ApiType == APIType::OpenGL)
|
||||
{
|
||||
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp[8];\n");
|
||||
}
|
||||
else if (ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
|
||||
out.Write("SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n");
|
||||
out.Write("SAMPLER_BINDING(2) uniform sampler2DArray samp2;\n");
|
||||
out.Write("SAMPLER_BINDING(3) uniform sampler2DArray samp3;\n");
|
||||
out.Write("SAMPLER_BINDING(4) uniform sampler2DArray samp4;\n");
|
||||
out.Write("SAMPLER_BINDING(5) uniform sampler2DArray samp5;\n");
|
||||
out.Write("SAMPLER_BINDING(6) uniform sampler2DArray samp6;\n");
|
||||
out.Write("SAMPLER_BINDING(7) uniform sampler2DArray samp7;\n");
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
// Declare samplers
|
||||
out.Write("SamplerState samp[8] : register(s0);\n");
|
||||
out.Write("\n");
|
||||
out.Write("Texture2DArray Tex[8] : register(t0);\n");
|
||||
}
|
||||
out.Write("\n");
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n");
|
||||
else
|
||||
out.Write("cbuffer PSBlock : register(b0) {\n");
|
||||
|
||||
out.Write("\tint4 " I_COLORS "[4];\n"
|
||||
"\tint4 " I_KCOLORS "[4];\n"
|
||||
"\tint4 " I_ALPHA ";\n"
|
||||
"\tfloat4 " I_TEXDIMS "[8];\n"
|
||||
"\tint4 " I_ZBIAS "[2];\n"
|
||||
"\tint4 " I_INDTEXSCALE "[2];\n"
|
||||
"\tint4 " I_INDTEXMTX "[6];\n"
|
||||
"\tint4 " I_FOGCOLOR ";\n"
|
||||
"\tint4 " I_FOGI ";\n"
|
||||
"\tfloat4 " I_FOGF "[2];\n"
|
||||
"\tfloat4 " I_ZSLOPE ";\n"
|
||||
"\tfloat4 " I_EFBSCALE ";\n"
|
||||
"};\n");
|
||||
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
out.Write("%s", s_lighting_struct);
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
|
||||
else
|
||||
out.Write("cbuffer VSBlock : register(b1) {\n");
|
||||
|
||||
out.Write(s_shader_uniforms);
|
||||
out.Write("};\n");
|
||||
}
|
||||
|
||||
if (uid_data->bounding_box)
|
||||
{
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("SSBO_BINDING(0) buffer BBox {\n"
|
||||
"\tint4 bbox_data;\n"
|
||||
"};\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("globallycoherent RWBuffer<int> bbox_data : register(u2);\n");
|
||||
}
|
||||
}
|
||||
|
||||
out.Write("struct VS_OUTPUT {\n");
|
||||
GenerateVSOutputMembers(out, ApiType, uid_data->genMode_numtexgens, per_pixel_lighting, "");
|
||||
out.Write("};\n");
|
||||
|
||||
if (uid_data->forced_early_z)
|
||||
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
||||
{
|
||||
// Zcomploc (aka early_ztest) is a way to control whether depth test is done before
|
||||
// or after texturing and alpha test. PC graphics APIs used to provide no way to emulate
|
||||
|
@ -549,7 +552,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
|
|||
// Let's set up attributes
|
||||
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
|
||||
{
|
||||
out.Write("%s in float3 uv%d;\n", GetInterpolationQualifier(msaa, ssaa), i);
|
||||
out.Write("%s in float3 tex%d;\n", GetInterpolationQualifier(msaa, ssaa), i);
|
||||
}
|
||||
out.Write("%s in float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
if (per_pixel_lighting)
|
||||
|
@ -560,13 +563,6 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
|
|||
}
|
||||
|
||||
out.Write("void main()\n{\n");
|
||||
|
||||
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
|
||||
{
|
||||
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
|
||||
out.Write("\tfloat3 uv%d = tex%d;\n", i, i);
|
||||
}
|
||||
|
||||
out.Write("\tfloat4 rawpos = gl_FragCoord;\n");
|
||||
}
|
||||
else // D3D
|
||||
|
@ -582,7 +578,8 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
|
|||
|
||||
// compute window position if needed because binding semantic WPOS is not widely supported
|
||||
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
|
||||
out.Write(",\n in %s float3 uv%d : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa), i, i);
|
||||
out.Write(",\n in %s float3 tex%d : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa), i,
|
||||
i);
|
||||
out.Write(",\n in %s float4 clipPos : TEXCOORD%d", GetInterpolationQualifier(msaa, ssaa),
|
||||
uid_data->genMode_numtexgens);
|
||||
if (per_pixel_lighting)
|
||||
|
@ -645,7 +642,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host
|
|||
for (unsigned int i = 0; i < uid_data->genMode_numtexgens; ++i)
|
||||
{
|
||||
out.Write("\tint2 fixpoint_uv%d = int2(", i);
|
||||
out.Write("(uv%d.z == 0.0 ? uv%d.xy : uv%d.xy / uv%d.z)", i, i, i, i);
|
||||
out.Write("(tex%d.z == 0.0 ? tex%d.xy : tex%d.xy / tex%d.z)", i, i, i, i);
|
||||
out.Write(" * " I_TEXDIMS "[%d].zw);\n", i);
|
||||
// TODO: S24 overflows here?
|
||||
}
|
||||
|
@ -824,7 +821,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
|
|||
const char* tevIndAlphaSel[] = {"", "x", "y", "z"};
|
||||
const char* tevIndAlphaMask[] = {"248", "224", "240",
|
||||
"248"}; // 0b11111000, 0b11100000, 0b11110000, 0b11111000
|
||||
out.Write("alphabump = iindtex%d.%s & %s;\n", tevind.bt, tevIndAlphaSel[tevind.bs],
|
||||
out.Write("alphabump = iindtex%d.%s & %s;\n", tevind.bt.Value(), tevIndAlphaSel[tevind.bs],
|
||||
tevIndAlphaMask[tevind.fmt]);
|
||||
}
|
||||
else
|
||||
|
@ -836,7 +833,8 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
|
|||
{
|
||||
// format
|
||||
const char* tevIndFmtMask[] = {"255", "31", "15", "7"};
|
||||
out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, tevind.bt, tevIndFmtMask[tevind.fmt]);
|
||||
out.Write("\tint3 iindtevcrd%d = iindtex%d & %s;\n", n, tevind.bt.Value(),
|
||||
tevIndFmtMask[tevind.fmt]);
|
||||
|
||||
// bias - TODO: Check if this needs to be this complicated..
|
||||
const char* tevIndBiasField[] = {"", "x", "y", "xy",
|
||||
|
@ -1166,11 +1164,6 @@ static void SampleTexture(ShaderCode& out, const char* texcoords, const char* te
|
|||
"[%d].xy, %s))).%s;\n",
|
||||
texmap, texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
|
||||
}
|
||||
else if (ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("iround(255.0 * texture(samp%d, float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n",
|
||||
texmap, texcoords, texmap, stereo ? "layer" : "0.0", texswap);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("iround(255.0 * texture(samp[%d], float3(%s.xy * " I_TEXDIMS "[%d].xy, %s))).%s;\n",
|
||||
|
|
|
@ -159,4 +159,7 @@ typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
|
|||
|
||||
ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host_config,
|
||||
const pixel_shader_uid_data* uid_data);
|
||||
void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texgens,
|
||||
bool per_pixel_lighting, bool bounding_box);
|
||||
ShaderCode GeneratePixelShaderCode(APIType ApiType, const pixel_shader_uid_data* uid_data);
|
||||
PixelShaderUid GetPixelShaderUid();
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
|
||||
bool PixelShaderManager::s_bFogRangeAdjustChanged;
|
||||
bool PixelShaderManager::s_bViewPortChanged;
|
||||
bool PixelShaderManager::s_bIndirectDirty;
|
||||
bool PixelShaderManager::s_bDestAlphaDirty;
|
||||
|
||||
PixelShaderConstants PixelShaderManager::constants;
|
||||
bool PixelShaderManager::dirty;
|
||||
|
@ -40,6 +42,38 @@ void PixelShaderManager::Init()
|
|||
SetTexCoordChanged(6);
|
||||
SetTexCoordChanged(7);
|
||||
|
||||
// fixed Konstants
|
||||
for (int component = 0; component < 4; component++)
|
||||
{
|
||||
constants.konst[0][component] = 255; // 1
|
||||
constants.konst[1][component] = 223; // 7/8
|
||||
constants.konst[2][component] = 191; // 3/4
|
||||
constants.konst[3][component] = 159; // 5/8
|
||||
constants.konst[4][component] = 128; // 1/2
|
||||
constants.konst[5][component] = 96; // 3/8
|
||||
constants.konst[6][component] = 64; // 1/4
|
||||
constants.konst[7][component] = 32; // 1/8
|
||||
|
||||
// Invalid Konstants (reads as zero on hardware)
|
||||
constants.konst[8][component] = 0;
|
||||
constants.konst[9][component] = 0;
|
||||
constants.konst[10][component] = 0;
|
||||
constants.konst[11][component] = 0;
|
||||
|
||||
// Annoyingly, alpha reads zero values for the .rgb colors (offically
|
||||
// defined as invalid)
|
||||
// If it wasn't for this, we could just use one of the first 3 colunms
|
||||
// instead of
|
||||
// wasting an entire 4th column just for alpha.
|
||||
if (component == 3)
|
||||
{
|
||||
constants.konst[12][component] = 0;
|
||||
constants.konst[13][component] = 0;
|
||||
constants.konst[14][component] = 0;
|
||||
constants.konst[15][component] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
|
@ -99,6 +133,59 @@ void PixelShaderManager::SetConstants()
|
|||
dirty = true;
|
||||
s_bViewPortChanged = false;
|
||||
}
|
||||
|
||||
if (s_bIndirectDirty)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
constants.pack1[i][3] = 0;
|
||||
|
||||
for (u32 i = 0; i < (bpmem.genMode.numtevstages + 1); ++i)
|
||||
{
|
||||
u32 stage = bpmem.tevind[i].bt;
|
||||
if (stage < bpmem.genMode.numindstages)
|
||||
{
|
||||
// We set some extra bits so the ubershader can quickly check if these
|
||||
// features are in use.
|
||||
if (bpmem.tevind[i].IsActive())
|
||||
constants.pack1[stage][3] =
|
||||
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
|
||||
// Note: a tevind of zero just happens to be a passthrough, so no need
|
||||
// to set an extra bit.
|
||||
constants.pack1[i][2] =
|
||||
bpmem.tevind[i].hex; // TODO: This match shadergen, but videosw will
|
||||
// always wrap.
|
||||
|
||||
// The ubershader uses tevind != 0 as a condition whether to calculate texcoords,
|
||||
// even when texture is disabled, instead of the stage < bpmem.genMode.numindstages.
|
||||
// We set an unused bit here to indicate that the stage is active, even if it
|
||||
// is just a pass-through.
|
||||
constants.pack1[i][2] |= 0x80000000;
|
||||
}
|
||||
else
|
||||
{
|
||||
constants.pack1[i][2] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
dirty = true;
|
||||
s_bIndirectDirty = false;
|
||||
}
|
||||
|
||||
if (s_bDestAlphaDirty)
|
||||
{
|
||||
// Destination alpha is only enabled if alpha writes are enabled. Force entire uniform to zero
|
||||
// when disabled.
|
||||
u32 dstalpha = bpmem.blendmode.alphaupdate && bpmem.dstalpha.enable &&
|
||||
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 ?
|
||||
bpmem.dstalpha.hex :
|
||||
0;
|
||||
|
||||
if (constants.dstalpha != dstalpha)
|
||||
{
|
||||
constants.dstalpha = dstalpha;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
|
||||
|
@ -116,20 +203,78 @@ void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value)
|
|||
c[component] = value;
|
||||
dirty = true;
|
||||
|
||||
// Konst for ubershaders. We build the whole array on cpu so the gpu can do a single indirect
|
||||
// access.
|
||||
if (component != 3) // Alpha doesn't included in the .rgb konsts
|
||||
constants.konst[index + 12][component] = value;
|
||||
|
||||
// .rrrr .gggg .bbbb .aaaa konsts
|
||||
constants.konst[index + 16 + component * 4][0] = value;
|
||||
constants.konst[index + 16 + component * 4][1] = value;
|
||||
constants.konst[index + 16 + component * 4][2] = value;
|
||||
constants.konst[index + 16 + component * 4][3] = value;
|
||||
|
||||
PRIM_LOG("tev konst color%d: %d %d %d %d", index, c[0], c[1], c[2], c[3]);
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevOrder(int index, u32 order)
|
||||
{
|
||||
if (constants.pack2[index][0] != order)
|
||||
{
|
||||
constants.pack2[index][0] = order;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevKSel(int index, u32 ksel)
|
||||
{
|
||||
if (constants.pack2[index][1] != ksel)
|
||||
{
|
||||
constants.pack2[index][1] = ksel;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevCombiner(int index, int alpha, u32 combiner)
|
||||
{
|
||||
if (constants.pack1[index][alpha] != combiner)
|
||||
{
|
||||
constants.pack1[index][alpha] = combiner;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevIndirectChanged()
|
||||
{
|
||||
s_bIndirectDirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetAlpha()
|
||||
{
|
||||
constants.alpha[0] = bpmem.alpha_test.ref0;
|
||||
constants.alpha[1] = bpmem.alpha_test.ref1;
|
||||
constants.alpha[3] = static_cast<s32>(bpmem.dstalpha.alpha);
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetDestAlpha()
|
||||
void PixelShaderManager::SetAlphaTestChanged()
|
||||
{
|
||||
constants.alpha[3] = bpmem.dstalpha.alpha;
|
||||
dirty = true;
|
||||
// Force alphaTest Uniform to zero if it will always pass.
|
||||
// (set an extra bit to distinguish from "never && never")
|
||||
// TODO: we could optimize this further and check the actual constants,
|
||||
// i.e. "a <= 0" and "a >= 255" will always pass.
|
||||
u32 alpha_test =
|
||||
bpmem.alpha_test.TestResult() != AlphaTest::PASS ? bpmem.alpha_test.hex | 1 << 31 : 0;
|
||||
if (constants.alphaTest != alpha_test)
|
||||
{
|
||||
constants.alphaTest = alpha_test;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetDestAlphaChanged()
|
||||
{
|
||||
s_bDestAlphaDirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height)
|
||||
|
@ -235,6 +380,12 @@ void PixelShaderManager::SetZTextureTypeChanged()
|
|||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetZTextureOpChanged()
|
||||
{
|
||||
constants.ztex_op = bpmem.ztex2.op;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
|
||||
{
|
||||
TCoordInfo& tc = bpmem.texcoords[texmapid];
|
||||
|
@ -262,6 +413,7 @@ void PixelShaderManager::SetFogParamChanged()
|
|||
constants.fogi[1] = bpmem.fog.b_magnitude;
|
||||
constants.fogf[1][2] = bpmem.fog.c_proj_fsel.GetC();
|
||||
constants.fogi[3] = bpmem.fog.b_shift;
|
||||
constants.fogParam3 = bpmem.fog.c_proj_fsel.hex;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -269,6 +421,7 @@ void PixelShaderManager::SetFogParamChanged()
|
|||
constants.fogi[1] = 1;
|
||||
constants.fogf[1][2] = 0.f;
|
||||
constants.fogi[3] = 1;
|
||||
constants.fogParam3 = 0;
|
||||
}
|
||||
dirty = true;
|
||||
}
|
||||
|
@ -279,12 +432,68 @@ void PixelShaderManager::SetFogRangeAdjustChanged()
|
|||
return;
|
||||
|
||||
s_bFogRangeAdjustChanged = true;
|
||||
|
||||
if (constants.fogRangeBase != bpmem.fogRange.Base.hex)
|
||||
{
|
||||
constants.fogRangeBase = bpmem.fogRange.Base.hex;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetGenModeChanged()
|
||||
{
|
||||
constants.genmode = bpmem.genMode.hex;
|
||||
s_bIndirectDirty = true;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetZControlChanged()
|
||||
{
|
||||
u32 early_ztest = bpmem.zcontrol.early_ztest ? 1 : 0;
|
||||
u32 rgba6_format =
|
||||
(bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor) ? 1 :
|
||||
0;
|
||||
u32 dither = rgba6_format && bpmem.blendmode.dither;
|
||||
if (constants.early_ztest != early_ztest || constants.rgba6_format != rgba6_format ||
|
||||
constants.dither != dither)
|
||||
{
|
||||
constants.early_ztest = early_ztest;
|
||||
constants.rgba6_format = rgba6_format;
|
||||
constants.dither = dither;
|
||||
dirty = true;
|
||||
}
|
||||
s_bDestAlphaDirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetBlendModeChanged()
|
||||
{
|
||||
u32 dither = constants.rgba6_format && bpmem.blendmode.dither;
|
||||
if (constants.dither != dither)
|
||||
{
|
||||
constants.dither = dither;
|
||||
dirty = true;
|
||||
}
|
||||
s_bDestAlphaDirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetBoundingBoxActive(bool active)
|
||||
{
|
||||
const bool enable =
|
||||
active && g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation();
|
||||
|
||||
if (enable == (constants.bounding_box != 0))
|
||||
return;
|
||||
|
||||
constants.bounding_box = active;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::DoState(PointerWrap& p)
|
||||
{
|
||||
p.Do(s_bFogRangeAdjustChanged);
|
||||
p.Do(s_bViewPortChanged);
|
||||
p.Do(s_bIndirectDirty);
|
||||
p.Do(s_bDestAlphaDirty);
|
||||
|
||||
p.Do(constants);
|
||||
|
||||
|
|
|
@ -24,24 +24,36 @@ public:
|
|||
// so make sure to call them after memory is committed
|
||||
static void SetTevColor(int index, int component, s32 value);
|
||||
static void SetTevKonstColor(int index, int component, s32 value);
|
||||
static void SetTevOrder(int index, u32 order);
|
||||
static void SetTevKSel(int index, u32 ksel);
|
||||
static void SetTevCombiner(int index, int alpha, u32 combiner);
|
||||
static void SetAlpha();
|
||||
static void SetDestAlpha();
|
||||
static void SetAlphaTestChanged();
|
||||
static void SetDestAlphaChanged();
|
||||
static void SetTexDims(int texmapid, u32 width, u32 height);
|
||||
static void SetZTextureBias();
|
||||
static void SetViewportChanged();
|
||||
static void SetEfbScaleChanged(float scalex, float scaley);
|
||||
static void SetZSlope(float dfdx, float dfdy, float f0);
|
||||
static void SetIndMatrixChanged(int matrixidx);
|
||||
static void SetTevIndirectChanged();
|
||||
static void SetZTextureTypeChanged();
|
||||
static void SetZTextureOpChanged();
|
||||
static void SetIndTexScaleChanged(bool high);
|
||||
static void SetTexCoordChanged(u8 texmapid);
|
||||
static void SetFogColorChanged();
|
||||
static void SetFogParamChanged();
|
||||
static void SetFogRangeAdjustChanged();
|
||||
static void SetGenModeChanged();
|
||||
static void SetZControlChanged();
|
||||
static void SetBlendModeChanged();
|
||||
static void SetBoundingBoxActive(bool active);
|
||||
|
||||
static PixelShaderConstants constants;
|
||||
static bool dirty;
|
||||
|
||||
static bool s_bFogRangeAdjustChanged;
|
||||
static bool s_bViewPortChanged;
|
||||
static bool s_bIndirectDirty;
|
||||
static bool s_bDestAlphaDirty;
|
||||
};
|
||||
|
|
|
@ -29,6 +29,9 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
|
|||
bits.backend_atomics = g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics;
|
||||
bits.backend_depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp;
|
||||
bits.backend_reversed_depth_range = g_ActiveConfig.backend_info.bSupportsReversedDepthRange;
|
||||
bits.backend_bitfield = g_ActiveConfig.backend_info.bSupportsBitfield;
|
||||
bits.backend_dynamic_sampler_indexing =
|
||||
g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing;
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
@ -65,7 +68,7 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
|
|||
|
||||
if (include_host_config)
|
||||
{
|
||||
// We're using 18 bits, so 5 hex characters.
|
||||
// We're using 20 bits, so 5 hex characters.
|
||||
ShaderHostConfig host_config = ShaderHostConfig::GetCurrent();
|
||||
filename += StringFromFormat("-%05X", host_config.bits);
|
||||
}
|
||||
|
|
|
@ -176,7 +176,9 @@ union ShaderHostConfig
|
|||
u32 backend_atomics : 1;
|
||||
u32 backend_depth_clamp : 1;
|
||||
u32 backend_reversed_depth_range : 1;
|
||||
u32 pad : 14;
|
||||
u32 backend_bitfield : 1;
|
||||
u32 backend_dynamic_sampler_indexing : 1;
|
||||
u32 pad : 12;
|
||||
};
|
||||
|
||||
static ShaderHostConfig GetCurrent();
|
||||
|
@ -316,7 +318,10 @@ inline const char* GetInterpolationQualifier(bool msaa, bool ssaa,
|
|||
#define I_LINEPTPARAMS "clinept"
|
||||
#define I_TEXOFFSET "ctexoffset"
|
||||
|
||||
static const char s_shader_uniforms[] = "\tfloat4 " I_POSNORMALMATRIX "[6];\n"
|
||||
static const char s_shader_uniforms[] = "\tuint components;\n"
|
||||
"\tuint xfmem_dualTexInfo;\n"
|
||||
"\tuint xfmem_numColorChans;\n"
|
||||
"\tfloat4 " I_POSNORMALMATRIX "[6];\n"
|
||||
"\tfloat4 " I_PROJECTION "[4];\n"
|
||||
"\tint4 " I_MATERIALS "[4];\n"
|
||||
"\tLight " I_LIGHTS "[8];\n"
|
||||
|
@ -325,4 +330,9 @@ static const char s_shader_uniforms[] = "\tfloat4 " I_POSNORMALMATRIX "[6];\n"
|
|||
"\tfloat4 " I_NORMALMATRICES "[32];\n"
|
||||
"\tfloat4 " I_POSTTRANSFORMMATRICES "[64];\n"
|
||||
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
|
||||
"\tfloat2 " I_VIEWPORT_SIZE ";\n";
|
||||
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
|
||||
"\tuint4 xfmem_pack1[8];\n"
|
||||
"\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n"
|
||||
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
|
||||
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"
|
||||
"\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n";
|
||||
|
|
|
@ -0,0 +1,203 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "VideoCommon/UberShaderCommon.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
namespace UberShader
|
||||
{
|
||||
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
|
||||
const ShaderHostConfig& host_config)
|
||||
{
|
||||
// ==============================================
|
||||
// BitfieldExtract for APIs which don't have it
|
||||
// ==============================================
|
||||
if (!host_config.backend_bitfield)
|
||||
{
|
||||
out.Write("uint bitfieldExtract(uint val, int off, int size) {\n"
|
||||
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
|
||||
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
|
||||
"instruction.\n"
|
||||
" uint mask = uint((1 << size) - 1);\n"
|
||||
" return uint(val >> off) & mask;\n"
|
||||
"}\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
void WriteLightingFunction(ShaderCode& out)
|
||||
{
|
||||
// ==============================================
|
||||
// Lighting channel calculation helper
|
||||
// ==============================================
|
||||
out.Write("int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, "
|
||||
"float3 normal) {\n"
|
||||
" float3 ldir, h, cosAttn, distAttn;\n"
|
||||
" float dist, dist2, attn;\n"
|
||||
"\n"
|
||||
" switch (attnfunc) {\n");
|
||||
out.Write(" case %uu: // LIGNTATTN_NONE\n", LIGHTATTN_NONE);
|
||||
out.Write(" case %uu: // LIGHTATTN_DIR\n", LIGHTATTN_DIR);
|
||||
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
|
||||
" attn = 1.0;\n"
|
||||
" if (length(ldir) == 0.0)\n"
|
||||
" ldir = normal;\n"
|
||||
" break;\n\n");
|
||||
out.Write(" case %uu: // LIGHTATTN_SPEC\n", LIGHTATTN_SPEC);
|
||||
out.Write(" ldir = normalize(" I_LIGHTS "[index].pos.xyz - pos.xyz);\n"
|
||||
" attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, " I_LIGHTS
|
||||
"[index].dir.xyz)) : 0.0;\n"
|
||||
" cosAttn = " I_LIGHTS "[index].cosatt.xyz;\n");
|
||||
out.Write(" if (diffusefunc == %uu) // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
|
||||
out.Write(" distAttn = " I_LIGHTS "[index].distatt.xyz;\n"
|
||||
" else\n"
|
||||
" distAttn = normalize(" I_LIGHTS "[index].distatt.xyz);\n"
|
||||
" attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, "
|
||||
"float3(1.0, attn, attn*attn));\n"
|
||||
" break;\n\n");
|
||||
out.Write(" case %uu: // LIGHTATTN_SPOT\n", LIGHTATTN_SPOT);
|
||||
out.Write(" ldir = " I_LIGHTS "[index].pos.xyz - pos.xyz;\n"
|
||||
" dist2 = dot(ldir, ldir);\n"
|
||||
" dist = sqrt(dist2);\n"
|
||||
" ldir = ldir / dist;\n"
|
||||
" attn = max(0.0, dot(ldir, " I_LIGHTS "[index].dir.xyz));\n"
|
||||
" attn = max(0.0, " I_LIGHTS "[index].cosatt.x + " I_LIGHTS
|
||||
"[index].cosatt.y * attn + " I_LIGHTS "[index].cosatt.z * attn * attn) / dot(" I_LIGHTS
|
||||
"[index].distatt.xyz, float3(1.0, dist, dist2));\n"
|
||||
" break;\n\n");
|
||||
out.Write(" default:\n"
|
||||
" attn = 1.0;\n"
|
||||
" ldir = normal;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" switch (diffusefunc) {\n");
|
||||
out.Write(" case %uu: // LIGHTDIF_NONE\n", LIGHTDIF_NONE);
|
||||
out.Write(" return int4(round(attn * float4(" I_LIGHTS "[index].color)));\n\n");
|
||||
out.Write(" case %uu: // LIGHTDIF_SIGN\n", LIGHTDIF_SIGN);
|
||||
out.Write(" return int4(round(attn * dot(ldir, normal) * float4(" I_LIGHTS
|
||||
"[index].color)));\n\n");
|
||||
out.Write(" case %uu: // LIGHTDIF_CLAMP\n", LIGHTDIF_CLAMP);
|
||||
out.Write(" return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(" I_LIGHTS
|
||||
"[index].color)));\n\n");
|
||||
out.Write(" default:\n"
|
||||
" return int4(0, 0, 0, 0);\n"
|
||||
" }\n"
|
||||
"}\n\n");
|
||||
}
|
||||
|
||||
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
|
||||
const char* normal_var, const char* in_color_0_var,
|
||||
const char* in_color_1_var, const char* out_color_0_var,
|
||||
const char* out_color_1_var)
|
||||
{
|
||||
out.Write("// Lighting\n");
|
||||
out.Write("%sfor (uint chan = 0u; chan < xfmem_numColorChans; chan++) {\n",
|
||||
api_type == APIType::D3D ? "[loop] " : "");
|
||||
out.Write(" uint colorreg = xfmem_color(chan);\n"
|
||||
" uint alphareg = xfmem_alpha(chan);\n"
|
||||
" int4 mat = " I_MATERIALS "[chan + 2u]; \n"
|
||||
" int4 lacc = int4(255, 255, 255, 255);\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().matsource).c_str());
|
||||
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
|
||||
out.Write(" mat.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
|
||||
in_color_0_var, in_color_1_var);
|
||||
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
|
||||
out.Write(" mat.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
|
||||
out.Write(" else\n"
|
||||
" mat.xyz = int3(255, 255, 255);\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().matsource).c_str());
|
||||
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
|
||||
out.Write(" mat.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
|
||||
in_color_1_var);
|
||||
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
|
||||
out.Write(" mat.w = int(round(%s.w * 255.0));\n", in_color_0_var);
|
||||
out.Write(" else\n"
|
||||
" mat.w = 255;\n"
|
||||
" } else {\n"
|
||||
" mat.w = " I_MATERIALS " [chan + 2u].w;\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" if (%s != 0u) {\n",
|
||||
BitfieldExtract("colorreg", LitChannel().enablelighting).c_str());
|
||||
out.Write(" if (%s != 0u) {\n", BitfieldExtract("colorreg", LitChannel().ambsource).c_str());
|
||||
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
|
||||
out.Write(" lacc.xyz = int3(round(((chan == 0u) ? %s.xyz : %s.xyz) * 255.0));\n",
|
||||
in_color_0_var, in_color_1_var);
|
||||
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
|
||||
out.Write(" lacc.xyz = int3(round(%s.xyz * 255.0));\n", in_color_0_var);
|
||||
out.Write(" else\n"
|
||||
" lacc.xyz = int3(255, 255, 255);\n"
|
||||
" } else {\n"
|
||||
" lacc.xyz = " I_MATERIALS " [chan].xyz;\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
out.Write(" uint light_mask = %s | (%s << 4u);\n",
|
||||
BitfieldExtract("colorreg", LitChannel().lightMask0_3).c_str(),
|
||||
BitfieldExtract("colorreg", LitChannel().lightMask4_7).c_str());
|
||||
out.Write(" uint attnfunc = %s;\n",
|
||||
BitfieldExtract("colorreg", LitChannel().attnfunc).c_str());
|
||||
out.Write(" uint diffusefunc = %s;\n",
|
||||
BitfieldExtract("colorreg", LitChannel().diffusefunc).c_str());
|
||||
out.Write(
|
||||
" for (uint light_index = 0u; light_index < 8u; light_index++) {\n"
|
||||
" if ((light_mask & (1u << light_index)) != 0u)\n"
|
||||
" lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).xyz;\n",
|
||||
world_pos_var, normal_var);
|
||||
out.Write(" }\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" if (%s != 0u) {\n",
|
||||
BitfieldExtract("alphareg", LitChannel().enablelighting).c_str());
|
||||
out.Write(" if (%s != 0u) {\n", BitfieldExtract("alphareg", LitChannel().ambsource).c_str());
|
||||
out.Write(" if ((components & (%uu << chan)) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
|
||||
out.Write(" lacc.w = int(round(((chan == 0u) ? %s.w : %s.w) * 255.0));\n", in_color_0_var,
|
||||
in_color_1_var);
|
||||
out.Write(" else if ((components & %uu) != 0u) // VB_HAS_COLO0\n", VB_HAS_COL0);
|
||||
out.Write(" lacc.w = int(round(%s.w * 255.0));\n", in_color_0_var);
|
||||
out.Write(" else\n"
|
||||
" lacc.w = 255;\n"
|
||||
" } else {\n"
|
||||
" lacc.w = " I_MATERIALS " [chan].w;\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
out.Write(" uint light_mask = %s | (%s << 4u);\n",
|
||||
BitfieldExtract("alphareg", LitChannel().lightMask0_3).c_str(),
|
||||
BitfieldExtract("alphareg", LitChannel().lightMask4_7).c_str());
|
||||
out.Write(" uint attnfunc = %s;\n",
|
||||
BitfieldExtract("alphareg", LitChannel().attnfunc).c_str());
|
||||
out.Write(" uint diffusefunc = %s;\n",
|
||||
BitfieldExtract("alphareg", LitChannel().diffusefunc).c_str());
|
||||
out.Write(" for (uint light_index = 0u; light_index < 8u; light_index++) {\n\n"
|
||||
" if ((light_mask & (1u << light_index)) != 0u)\n\n"
|
||||
" lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, %s, %s).w;\n",
|
||||
world_pos_var, normal_var);
|
||||
out.Write(" }\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" lacc = clamp(lacc, 0, 255);\n"
|
||||
"\n"
|
||||
" // Hopefully GPUs that can support dynamic indexing will optimize this.\n"
|
||||
" float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n"
|
||||
" switch (chan) {\n"
|
||||
" case 0u: %s = lit_color; break;\n",
|
||||
out_color_0_var);
|
||||
out.Write(" case 1u: %s = lit_color; break;\n", out_color_1_var);
|
||||
out.Write(" }\n"
|
||||
"}\n"
|
||||
"\n");
|
||||
|
||||
out.Write("if (xfmem_numColorChans < 2u && (components & %uu) == 0u)\n", VB_HAS_COL1);
|
||||
out.Write(" %s = %s;\n\n", out_color_1_var, out_color_0_var);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "VideoCommon/ShaderGenCommon.h"
|
||||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
namespace UberShader
|
||||
{
|
||||
// Common functions across all ubershaders
|
||||
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
|
||||
const ShaderHostConfig& host_config);
|
||||
|
||||
// Vertex lighting
|
||||
void WriteLightingFunction(ShaderCode& out);
|
||||
void WriteVertexLighting(ShaderCode& out, APIType api_type, const char* world_pos_var,
|
||||
const char* normal_var, const char* in_color_0_var,
|
||||
const char* in_color_1_var, const char* out_color_0_var,
|
||||
const char* out_color_1_var);
|
||||
|
||||
// bitfieldExtract generator for BitField types
|
||||
template <typename T>
|
||||
std::string BitfieldExtract(const std::string& source, T type)
|
||||
{
|
||||
return StringFromFormat("bitfieldExtract(%s, %u, %u)", source.c_str(),
|
||||
static_cast<u32>(type.StartBit()), static_cast<u32>(type.NumBits()));
|
||||
}
|
||||
} // namespace UberShader
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2015 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
|
||||
namespace UberShader
|
||||
{
|
||||
#pragma pack(1)
|
||||
struct pixel_ubershader_uid_data
|
||||
{
|
||||
u32 num_texgens : 4;
|
||||
u32 early_depth : 1;
|
||||
u32 per_pixel_depth : 1;
|
||||
|
||||
u32 NumValues() const { return sizeof(pixel_ubershader_uid_data); }
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
typedef ShaderUid<pixel_ubershader_uid_data> PixelShaderUid;
|
||||
|
||||
PixelShaderUid GetPixelShaderUid();
|
||||
|
||||
ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
|
||||
const pixel_ubershader_uid_data* uid_data);
|
||||
|
||||
void EnumeratePixelShaderUids(const std::function<void(const PixelShaderUid&)>& callback);
|
||||
}
|
|
@ -0,0 +1,467 @@
|
|||
// Copyright 2015 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "VideoCommon/UberShaderVertex.h"
|
||||
#include "VideoCommon/DriverDetails.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
#include "VideoCommon/UberShaderCommon.h"
|
||||
#include "VideoCommon/VertexShaderGen.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
namespace UberShader
|
||||
{
|
||||
VertexShaderUid GetVertexShaderUid()
|
||||
{
|
||||
VertexShaderUid out;
|
||||
vertex_ubershader_uid_data* uid_data = out.GetUidData<vertex_ubershader_uid_data>();
|
||||
memset(uid_data, 0, sizeof(*uid_data));
|
||||
uid_data->num_texgens = xfmem.numTexGen.numTexGens;
|
||||
return out;
|
||||
}
|
||||
|
||||
static void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out);
|
||||
|
||||
ShaderCode GenVertexShader(APIType ApiType, const ShaderHostConfig& host_config,
|
||||
const vertex_ubershader_uid_data* uid_data)
|
||||
{
|
||||
const bool msaa = host_config.msaa;
|
||||
const bool ssaa = host_config.ssaa;
|
||||
const bool per_pixel_lighting = host_config.per_pixel_lighting;
|
||||
const bool vertex_rounding = host_config.vertex_rounding;
|
||||
const u32 numTexgen = uid_data->num_texgens;
|
||||
ShaderCode out;
|
||||
|
||||
out.Write("// Vertex UberShader\n\n");
|
||||
out.Write("%s", s_lighting_struct);
|
||||
|
||||
// uniforms
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
out.Write("UBO_BINDING(std140, 2) uniform VSBlock {\n");
|
||||
else
|
||||
out.Write("cbuffer VSBlock {\n");
|
||||
out.Write(s_shader_uniforms);
|
||||
out.Write("};\n");
|
||||
|
||||
out.Write("struct VS_OUTPUT {\n");
|
||||
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting, "");
|
||||
out.Write("};\n\n");
|
||||
|
||||
WriteUberShaderCommonHeader(out, ApiType, host_config);
|
||||
WriteLightingFunction(out);
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm0;\n", SHADER_NORM0_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm1;\n", SHADER_NORM1_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
|
||||
for (int i = 0; i < 8; ++i)
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i, i);
|
||||
|
||||
// We need to always use output blocks for Vulkan, but geometry shaders are also optional.
|
||||
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
|
||||
{
|
||||
out.Write("VARYING_LOCATION(0) out VertexData {\n");
|
||||
GenerateVSOutputMembers(out, ApiType, numTexgen, per_pixel_lighting,
|
||||
GetInterpolationQualifier(msaa, ssaa, true, false));
|
||||
out.Write("} vs;\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Let's set up attributes
|
||||
for (u32 i = 0; i < numTexgen; ++i)
|
||||
out.Write("%s out float3 tex%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
|
||||
|
||||
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
out.Write("%s out float3 Normal;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
out.Write("%s out float3 WorldPos;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
}
|
||||
out.Write("%s out float4 colors_0;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
out.Write("%s out float4 colors_1;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
}
|
||||
|
||||
out.Write("void main()\n{\n");
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
out.Write("VS_OUTPUT main(\n");
|
||||
|
||||
// inputs
|
||||
out.Write(" float3 rawnorm0 : NORMAL0,\n");
|
||||
out.Write(" float3 rawnorm1 : NORMAL1,\n");
|
||||
out.Write(" float3 rawnorm2 : NORMAL2,\n");
|
||||
out.Write(" float4 rawcolor0 : COLOR0,\n");
|
||||
out.Write(" float4 rawcolor1 : COLOR1,\n");
|
||||
for (int i = 0; i < 8; ++i)
|
||||
out.Write(" float3 rawtex%d : TEXCOORD%d,\n", i, i);
|
||||
out.Write(" uint posmtx : BLENDINDICES,\n");
|
||||
out.Write(" float4 rawpos : POSITION) {\n");
|
||||
}
|
||||
|
||||
out.Write("VS_OUTPUT o;\n"
|
||||
"\n");
|
||||
|
||||
// Transforms
|
||||
out.Write("// Position matrix\n"
|
||||
"float4 P0;\n"
|
||||
"float4 P1;\n"
|
||||
"float4 P2;\n"
|
||||
"\n"
|
||||
"// Normal matrix\n"
|
||||
"float3 N0;\n"
|
||||
"float3 N1;\n"
|
||||
"float3 N2;\n"
|
||||
"\n"
|
||||
"if ((components & %uu) != 0u) {// VB_HAS_POSMTXIDX\n",
|
||||
VB_HAS_POSMTXIDX);
|
||||
out.Write(" // Vertex format has a per-vertex matrix\n"
|
||||
" int posidx = int(posmtx.r);\n"
|
||||
" P0 = " I_TRANSFORMMATRICES "[posidx];\n"
|
||||
" P1 = " I_TRANSFORMMATRICES "[posidx+1];\n"
|
||||
" P2 = " I_TRANSFORMMATRICES "[posidx+2];\n"
|
||||
"\n"
|
||||
" int normidx = posidx >= 32 ? (posidx - 32) : posidx;\n"
|
||||
" N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
|
||||
" N1 = " I_NORMALMATRICES "[normidx+1].xyz;\n"
|
||||
" N2 = " I_NORMALMATRICES "[normidx+2].xyz;\n"
|
||||
"} else {\n"
|
||||
" // One shared matrix\n"
|
||||
" P0 = " I_POSNORMALMATRIX "[0];\n"
|
||||
" P1 = " I_POSNORMALMATRIX "[1];\n"
|
||||
" P2 = " I_POSNORMALMATRIX "[2];\n"
|
||||
" N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
|
||||
" N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
|
||||
" N2 = " I_POSNORMALMATRIX "[5].xyz;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n"
|
||||
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
|
||||
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
|
||||
"\n"
|
||||
"// Only the first normal gets normalized (TODO: why?)\n"
|
||||
"float3 _norm0 = float3(0.0, 0.0, 0.0);\n"
|
||||
"if ((components & %uu) != 0u) // VB_HAS_NRM0\n",
|
||||
VB_HAS_NRM0);
|
||||
out.Write(
|
||||
" _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n"
|
||||
"\n"
|
||||
"float3 _norm1 = float3(0.0, 0.0, 0.0);\n"
|
||||
"if ((components & %uu) != 0u) // VB_HAS_NRM1\n",
|
||||
VB_HAS_NRM1);
|
||||
out.Write(" _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n"
|
||||
"\n"
|
||||
"float3 _norm2 = float3(0.0, 0.0, 0.0);\n"
|
||||
"if ((components & %uu) != 0u) // VB_HAS_NRM2\n",
|
||||
VB_HAS_NRM2);
|
||||
out.Write(" _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n"
|
||||
"\n");
|
||||
|
||||
// Hardware Lighting
|
||||
WriteVertexLighting(out, ApiType, "pos.xyz", "_norm0", "rawcolor0", "rawcolor1", "o.colors_0",
|
||||
"o.colors_1");
|
||||
|
||||
// Texture Coordinates
|
||||
if (numTexgen > 0)
|
||||
GenVertexShaderTexGens(ApiType, numTexgen, out);
|
||||
|
||||
// clipPos/w needs to be done in pixel shader, not here
|
||||
out.Write("o.clipPos = o.pos;\n");
|
||||
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
out.Write("o.Normal = _norm0;\n");
|
||||
out.Write("o.WorldPos = pos.xyz;\n");
|
||||
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL0\n", VB_HAS_COL0);
|
||||
out.Write(" o.colors_0 = rawcolor0;\n");
|
||||
out.Write("if ((components & %uu) != 0u) // VB_HAS_COL1\n", VB_HAS_COL1);
|
||||
out.Write(" o.colors_1 = rawcolor1;\n");
|
||||
}
|
||||
|
||||
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
|
||||
// our own depth clipping and calculate the depth range before the perspective divide if
|
||||
// necessary.
|
||||
if (host_config.backend_depth_clamp)
|
||||
{
|
||||
// Since we're adjusting z for the depth range before the perspective divide, we have to do our
|
||||
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
|
||||
// We adjust our depth value for clipping purposes to match the perspective projection in the
|
||||
// software backend, which is a hack to fix Sonic Adventure and Unleashed games.
|
||||
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n");
|
||||
out.Write("o.clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w
|
||||
out.Write("o.clipDist1 = -clipDepth;\n"); // Far: z > 0
|
||||
}
|
||||
|
||||
// Write the true depth value. If the game uses depth textures, then the pixel shader will
|
||||
// override it with the correct values if not then early z culling will improve speed.
|
||||
// There are two different ways to do this, when the depth range is oversized, we process
|
||||
// the depth range in the vertex shader, if not we let the host driver handle it.
|
||||
//
|
||||
// Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
|
||||
// so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
|
||||
// We have to handle the depth range in the vertex shader instead of after the perspective
|
||||
// divide, because some games will use a depth range larger than what is allowed by the
|
||||
// graphics API. These large depth ranges will still be clipped to the 0..1 range, so these
|
||||
// games effectively add a depth bias to the values written to the depth buffer.
|
||||
out.Write("o.pos.z = o.pos.w * " I_PIXELCENTERCORRECTION ".w - "
|
||||
"o.pos.z * " I_PIXELCENTERCORRECTION ".z;\n");
|
||||
|
||||
if (!host_config.backend_clip_control)
|
||||
{
|
||||
// If the graphics API doesn't support a depth range of 0..1, then we need to map z to
|
||||
// the -1..1 range. Unfortunately we have to use a substraction, which is a lossy floating-point
|
||||
// operation that can introduce a round-trip error.
|
||||
out.Write("o.pos.z = o.pos.z * 2.0 - o.pos.w;\n");
|
||||
}
|
||||
|
||||
// Correct for negative viewports by mirroring all vertices. We need to negate the height here,
|
||||
// since the viewport height is already negated by the render backend.
|
||||
out.Write("o.pos.xy *= sign(" I_PIXELCENTERCORRECTION ".xy * float2(1.0, -1.0));\n");
|
||||
|
||||
// The console GPU places the pixel center at 7/12 in screen space unless
|
||||
// antialiasing is enabled, while D3D and OpenGL place it at 0.5. This results
|
||||
// in some primitives being placed one pixel too far to the bottom-right,
|
||||
// which in turn can be critical if it happens for clear quads.
|
||||
// Hence, we compensate for this pixel center difference so that primitives
|
||||
// get rasterized correctly.
|
||||
out.Write("o.pos.xy = o.pos.xy - o.pos.w * " I_PIXELCENTERCORRECTION ".xy;\n");
|
||||
|
||||
if (vertex_rounding)
|
||||
{
|
||||
// By now our position is in clip space. However, higher resolutions than the Wii outputs
|
||||
// cause an additional pixel offset. Due to a higher pixel density we need to correct this
|
||||
// by converting our clip-space position into the Wii's screen-space.
|
||||
// Acquire the right pixel and then convert it back.
|
||||
out.Write("if (o.pos.w == 1.0f)\n");
|
||||
out.Write("{\n");
|
||||
|
||||
out.Write("\tfloat ss_pixel_x = ((o.pos.x + 1.0f) * (" I_VIEWPORT_SIZE ".x * 0.5f));\n");
|
||||
out.Write("\tfloat ss_pixel_y = ((o.pos.y + 1.0f) * (" I_VIEWPORT_SIZE ".y * 0.5f));\n");
|
||||
|
||||
out.Write("\tss_pixel_x = round(ss_pixel_x);\n");
|
||||
out.Write("\tss_pixel_y = round(ss_pixel_y);\n");
|
||||
|
||||
out.Write("\to.pos.x = ((ss_pixel_x / (" I_VIEWPORT_SIZE ".x * 0.5f)) - 1.0f);\n");
|
||||
out.Write("\to.pos.y = ((ss_pixel_y / (" I_VIEWPORT_SIZE ".y * 0.5f)) - 1.0f);\n");
|
||||
out.Write("}\n");
|
||||
}
|
||||
|
||||
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
|
||||
{
|
||||
if (host_config.backend_geometry_shaders || ApiType == APIType::Vulkan)
|
||||
{
|
||||
AssignVSOutputMembers(out, "vs", "o", numTexgen, per_pixel_lighting);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: Pass interface blocks between shader stages even if geometry shaders
|
||||
// are not supported, however that will require at least OpenGL 3.2 support.
|
||||
for (u32 i = 0; i < numTexgen; ++i)
|
||||
out.Write("tex%d.xyz = o.tex%d;\n", i, i);
|
||||
out.Write("clipPos = o.clipPos;\n");
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
out.Write("Normal = o.Normal;\n");
|
||||
out.Write("WorldPos = o.WorldPos;\n");
|
||||
}
|
||||
out.Write("colors_0 = o.colors_0;\n");
|
||||
out.Write("colors_1 = o.colors_1;\n");
|
||||
}
|
||||
|
||||
if (host_config.backend_depth_clamp)
|
||||
{
|
||||
out.Write("gl_ClipDistance[0] = o.clipDist0;\n");
|
||||
out.Write("gl_ClipDistance[1] = o.clipDist1;\n");
|
||||
}
|
||||
|
||||
// Vulkan NDC space has Y pointing down (right-handed NDC space).
|
||||
if (ApiType == APIType::Vulkan)
|
||||
out.Write("gl_Position = float4(o.pos.x, -o.pos.y, o.pos.z, o.pos.w);\n");
|
||||
else
|
||||
out.Write("gl_Position = o.pos;\n");
|
||||
}
|
||||
else // D3D
|
||||
{
|
||||
out.Write("return o;\n");
|
||||
}
|
||||
out.Write("}\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void GenVertexShaderTexGens(APIType ApiType, u32 numTexgen, ShaderCode& out)
|
||||
{
|
||||
// The HLSL compiler complains that the output texture coordinates are uninitialized when trying
|
||||
// to dynamically index them.
|
||||
for (u32 i = 0; i < numTexgen; i++)
|
||||
out.Write("o.tex%u = float3(0.0, 0.0, 0.0);\n", i);
|
||||
|
||||
out.Write("// Texture coordinate generation\n");
|
||||
if (numTexgen == 1)
|
||||
out.Write("{ const uint texgen = 0u;\n");
|
||||
else
|
||||
out.Write("%sfor (uint texgen = 0u; texgen < %uu; texgen++) {\n",
|
||||
ApiType == APIType::D3D ? "[loop] " : "", numTexgen);
|
||||
|
||||
out.Write(" // Texcoord transforms\n");
|
||||
out.Write(" float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n"
|
||||
" uint texMtxInfo = xfmem_texMtxInfo(texgen);\n");
|
||||
out.Write(" switch (%s) {\n", BitfieldExtract("texMtxInfo", TexMtxInfo().sourcerow).c_str());
|
||||
out.Write(" case %uu: // XF_SRCGEOM_INROW\n", XF_SRCGEOM_INROW);
|
||||
out.Write(" coord.xyz = rawpos.xyz;\n");
|
||||
out.Write(" break;\n\n");
|
||||
out.Write(" case %uu: // XF_SRCNORMAL_INROW\n", XF_SRCNORMAL_INROW);
|
||||
out.Write(
|
||||
" coord.xyz = ((components & %uu /* VB_HAS_NRM0 */) != 0u) ? rawnorm0.xyz : coord.xyz;",
|
||||
VB_HAS_NRM0);
|
||||
out.Write(" break;\n\n");
|
||||
out.Write(" case %uu: // XF_SRCBINORMAL_T_INROW\n", XF_SRCBINORMAL_T_INROW);
|
||||
out.Write(
|
||||
" coord.xyz = ((components & %uu /* VB_HAS_NRM1 */) != 0u) ? rawnorm1.xyz : coord.xyz;",
|
||||
VB_HAS_NRM1);
|
||||
out.Write(" break;\n\n");
|
||||
out.Write(" case %uu: // XF_SRCBINORMAL_B_INROW\n", XF_SRCBINORMAL_B_INROW);
|
||||
out.Write(
|
||||
" coord.xyz = ((components & %uu /* VB_HAS_NRM2 */) != 0u) ? rawnorm2.xyz : coord.xyz;",
|
||||
VB_HAS_NRM2);
|
||||
out.Write(" break;\n\n");
|
||||
for (u32 i = 0; i < 8; i++)
|
||||
{
|
||||
out.Write(" case %uu: // XF_SRCTEX%u_INROW\n", XF_SRCTEX0_INROW + i, i);
|
||||
out.Write(
|
||||
" coord = ((components & %uu /* VB_HAS_UV%u */) != 0u) ? float4(rawtex%u.x, rawtex%u.y, "
|
||||
"1.0, 1.0) : coord;\n",
|
||||
VB_HAS_UV0 << i, i, i, i);
|
||||
out.Write(" break;\n\n");
|
||||
}
|
||||
out.Write(" }\n");
|
||||
out.Write("\n");
|
||||
|
||||
out.Write(" // Input form of AB11 sets z element to 1.0\n");
|
||||
out.Write(" if (%s == %uu) // inputform == XF_TEXINPUT_AB11\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().inputform).c_str(), XF_TEXINPUT_AB11);
|
||||
out.Write(" coord.z = 1.0f;\n");
|
||||
out.Write("\n");
|
||||
|
||||
out.Write(" // first transformation\n");
|
||||
out.Write(" uint texgentype = %s;\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().texgentype).c_str());
|
||||
out.Write(" float3 output_tex;\n"
|
||||
" switch (texgentype)\n"
|
||||
" {\n");
|
||||
out.Write(" case %uu: // XF_TEXGEN_EMBOSS_MAP\n", XF_TEXGEN_EMBOSS_MAP);
|
||||
out.Write(" {\n");
|
||||
out.Write(" uint light = %s;\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().embosslightshift).c_str());
|
||||
out.Write(" uint source = %s;\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().embosssourceshift).c_str());
|
||||
out.Write(" switch (source) {\n");
|
||||
for (u32 i = 0; i < numTexgen; i++)
|
||||
out.Write(" case %uu: output_tex.xyz = o.tex%u; break;\n", i, i);
|
||||
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
|
||||
" }\n");
|
||||
out.Write(" if ((components & %uu) != 0u) { // VB_HAS_NRM1 | VB_HAS_NRM2\n",
|
||||
VB_HAS_NRM1 | VB_HAS_NRM2); // Should this be VB_HAS_NRM1 | VB_HAS_NRM2
|
||||
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
|
||||
" output_tex.xyz += float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" break;\n\n");
|
||||
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC0\n", XF_TEXGEN_COLOR_STRGBC0);
|
||||
out.Write(" output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);\n"
|
||||
" break;\n\n");
|
||||
out.Write(" case %uu: // XF_TEXGEN_COLOR_STRGBC1\n", XF_TEXGEN_COLOR_STRGBC1);
|
||||
out.Write(" output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);\n"
|
||||
" break;\n\n");
|
||||
out.Write(" default: // Also XF_TEXGEN_REGULAR\n"
|
||||
" {\n");
|
||||
out.Write(" if ((components & (%uu /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {\n",
|
||||
VB_HAS_TEXMTXIDX0);
|
||||
out.Write(" // This is messy, due to dynamic indexing of the input texture coordinates.\n"
|
||||
" // Hopefully the compiler will unroll this whole loop anyway and the switch.\n"
|
||||
" int tmp = 0;\n"
|
||||
" switch (texgen) {\n");
|
||||
for (u32 i = 0; i < numTexgen; i++)
|
||||
out.Write(" case %uu: tmp = int(rawtex%u.z); break;\n", i, i);
|
||||
out.Write(" }\n"
|
||||
"\n");
|
||||
out.Write(" if (%s == %uu) {\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
|
||||
out.Write(" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
|
||||
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
|
||||
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 2]));\n"
|
||||
" } else {\n"
|
||||
" output_tex.xyz = float3(dot(coord, " I_TRANSFORMMATRICES "[tmp]),\n"
|
||||
" dot(coord, " I_TRANSFORMMATRICES "[tmp + 1]),\n"
|
||||
" 1.0);\n"
|
||||
" }\n"
|
||||
" } else {\n");
|
||||
out.Write(" if (%s == %uu) {\n",
|
||||
BitfieldExtract("texMtxInfo", TexMtxInfo().projection).c_str(), XF_TEXPROJ_STQ);
|
||||
out.Write(" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
|
||||
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
|
||||
" dot(coord, " I_TEXMATRICES "[3u * texgen + 2u]));\n"
|
||||
" } else {\n"
|
||||
" output_tex.xyz = float3(dot(coord, " I_TEXMATRICES "[3u * texgen]),\n"
|
||||
" dot(coord, " I_TEXMATRICES "[3u * texgen + 1u]),\n"
|
||||
" 1.0);\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" break;\n\n"
|
||||
" }\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" if (xfmem_dualTexInfo != 0u) {\n");
|
||||
out.Write(" uint postMtxInfo = xfmem_postMtxInfo(texgen);");
|
||||
out.Write(" uint base_index = %s;\n",
|
||||
BitfieldExtract("postMtxInfo", PostMtxInfo().index).c_str());
|
||||
out.Write(" float4 P0 = " I_POSTTRANSFORMMATRICES "[base_index & 0x3fu];\n"
|
||||
" float4 P1 = " I_POSTTRANSFORMMATRICES "[(base_index + 1u) & 0x3fu];\n"
|
||||
" float4 P2 = " I_POSTTRANSFORMMATRICES "[(base_index + 2u) & 0x3fu];\n"
|
||||
"\n");
|
||||
out.Write(" if (%s != 0u)\n", BitfieldExtract("postMtxInfo", PostMtxInfo().normalize).c_str());
|
||||
out.Write(" output_tex.xyz = normalize(output_tex.xyz);\n"
|
||||
"\n"
|
||||
" // multiply by postmatrix\n"
|
||||
" output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,\n"
|
||||
" dot(P1.xyz, output_tex.xyz) + P1.w,\n"
|
||||
" dot(P2.xyz, output_tex.xyz) + P2.w);\n"
|
||||
" }\n\n");
|
||||
|
||||
// When q is 0, the GameCube appears to have a special case
|
||||
// This can be seen in devkitPro's neheGX Lesson08 example for Wii
|
||||
// Makes differences in Rogue Squadron 3 (Hoth sky) and The Last Story (shadow culling)
|
||||
out.Write(" if (texgentype == %uu && output_tex.z == 0.0) // XF_TEXGEN_REGULAR\n",
|
||||
XF_TEXGEN_REGULAR);
|
||||
out.Write(
|
||||
" output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f), float2(1.0f,1.0f));\n"
|
||||
"\n");
|
||||
|
||||
out.Write(" // Hopefully GPUs that can support dynamic indexing will optimize this.\n");
|
||||
out.Write(" switch (texgen) {\n");
|
||||
for (u32 i = 0; i < numTexgen; i++)
|
||||
out.Write(" case %uu: o.tex%u = output_tex; break;\n", i, i);
|
||||
out.Write(" }\n"
|
||||
"}\n");
|
||||
}
|
||||
|
||||
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback)
|
||||
{
|
||||
VertexShaderUid uid;
|
||||
std::memset(&uid, 0, sizeof(uid));
|
||||
|
||||
for (u32 texgens = 0; texgens <= 8; texgens++)
|
||||
{
|
||||
auto* vuid = uid.GetUidData<UberShader::vertex_ubershader_uid_data>();
|
||||
vuid->num_texgens = texgens;
|
||||
callback(uid);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2015 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "VideoCommon/PixelShaderGen.h"
|
||||
|
||||
namespace UberShader
|
||||
{
|
||||
#pragma pack(1)
|
||||
struct vertex_ubershader_uid_data
|
||||
{
|
||||
u32 num_texgens : 4;
|
||||
|
||||
u32 NumValues() const { return sizeof(vertex_ubershader_uid_data); }
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
typedef ShaderUid<vertex_ubershader_uid_data> VertexShaderUid;
|
||||
|
||||
VertexShaderUid GetVertexShaderUid();
|
||||
|
||||
ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config,
|
||||
const vertex_ubershader_uid_data* uid_data);
|
||||
void EnumerateVertexShaderUids(const std::function<void(const VertexShaderUid&)>& callback);
|
||||
}
|
|
@ -44,13 +44,6 @@ static VertexLoaderMap s_vertex_loader_map;
|
|||
|
||||
u8* cached_arraybases[12];
|
||||
|
||||
// Used in the Vulkan backend
|
||||
|
||||
NativeVertexFormatMap* GetNativeVertexFormatMap()
|
||||
{
|
||||
return &s_native_vertex_map;
|
||||
}
|
||||
|
||||
void Init()
|
||||
{
|
||||
MarkAllDirty();
|
||||
|
@ -133,6 +126,75 @@ void MarkAllDirty()
|
|||
g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8);
|
||||
}
|
||||
|
||||
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl)
|
||||
{
|
||||
auto iter = s_native_vertex_map.find(decl);
|
||||
if (iter == s_native_vertex_map.end())
|
||||
{
|
||||
std::unique_ptr<NativeVertexFormat> fmt = g_vertex_manager->CreateNativeVertexFormat(decl);
|
||||
auto ipair = s_native_vertex_map.emplace(decl, std::move(fmt));
|
||||
iter = ipair.first;
|
||||
}
|
||||
|
||||
return iter->second.get();
|
||||
}
|
||||
|
||||
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl)
|
||||
{
|
||||
// The padding in the structs can cause the memcmp() in the map to create duplicates.
|
||||
// Avoid this by initializing the padding to zero.
|
||||
PortableVertexDeclaration new_decl;
|
||||
std::memset(&new_decl, 0, sizeof(new_decl));
|
||||
new_decl.stride = decl.stride;
|
||||
|
||||
auto MakeDummyAttribute = [](AttributeFormat& attr, VarType type, int components, bool integer) {
|
||||
attr.type = type;
|
||||
attr.components = components;
|
||||
attr.offset = 0;
|
||||
attr.enable = true;
|
||||
attr.integer = integer;
|
||||
};
|
||||
auto CopyAttribute = [](AttributeFormat& attr, const AttributeFormat& src) {
|
||||
attr.type = src.type;
|
||||
attr.components = src.components;
|
||||
attr.offset = src.offset;
|
||||
attr.enable = src.enable;
|
||||
attr.integer = src.integer;
|
||||
};
|
||||
|
||||
if (decl.position.enable)
|
||||
CopyAttribute(new_decl.position, decl.position);
|
||||
else
|
||||
MakeDummyAttribute(new_decl.position, VAR_FLOAT, 1, false);
|
||||
for (size_t i = 0; i < ArraySize(new_decl.normals); i++)
|
||||
{
|
||||
if (decl.normals[i].enable)
|
||||
CopyAttribute(new_decl.normals[i], decl.normals[i]);
|
||||
else
|
||||
MakeDummyAttribute(new_decl.normals[i], VAR_FLOAT, 1, false);
|
||||
}
|
||||
for (size_t i = 0; i < ArraySize(new_decl.colors); i++)
|
||||
{
|
||||
if (decl.colors[i].enable)
|
||||
CopyAttribute(new_decl.colors[i], decl.colors[i]);
|
||||
else
|
||||
MakeDummyAttribute(new_decl.colors[i], VAR_UNSIGNED_BYTE, 4, false);
|
||||
}
|
||||
for (size_t i = 0; i < ArraySize(new_decl.texcoords); i++)
|
||||
{
|
||||
if (decl.texcoords[i].enable)
|
||||
CopyAttribute(new_decl.texcoords[i], decl.texcoords[i]);
|
||||
else
|
||||
MakeDummyAttribute(new_decl.texcoords[i], VAR_FLOAT, 1, false);
|
||||
}
|
||||
if (decl.posmtx.enable)
|
||||
CopyAttribute(new_decl.posmtx, decl.posmtx);
|
||||
else
|
||||
MakeDummyAttribute(new_decl.posmtx, VAR_UNSIGNED_BYTE, 1, true);
|
||||
|
||||
return GetOrCreateMatchingFormat(new_decl);
|
||||
}
|
||||
|
||||
static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = false)
|
||||
{
|
||||
CPState* state = preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
|
||||
|
@ -208,6 +270,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
|
|||
}
|
||||
s_current_vtx_fmt = loader->m_native_vertex_format;
|
||||
g_current_components = loader->m_native_components;
|
||||
VertexShaderManager::SetVertexFormat(loader->m_native_components);
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
||||
|
|
|
@ -24,7 +24,15 @@ void Clear();
|
|||
|
||||
void MarkAllDirty();
|
||||
|
||||
NativeVertexFormatMap* GetNativeVertexFormatMap();
|
||||
// Creates or obtains a pointer to a VertexFormat representing decl.
|
||||
// If this results in a VertexFormat being created, if the game later uses a matching vertex
|
||||
// declaration, the one that was previously created will be used.
|
||||
NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& decl);
|
||||
|
||||
// For vertex ubershaders, all attributes need to be present, even when the vertex
|
||||
// format does not contain them. This function returns a vertex format with dummy
|
||||
// offsets set to the unused attributes.
|
||||
NativeVertexFormat* GetUberVertexFormat(const PortableVertexDeclaration& decl);
|
||||
|
||||
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
|
||||
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool is_preprocess);
|
||||
|
|
|
@ -193,22 +193,24 @@ void VertexManagerBase::Flush()
|
|||
g_video_backend->CheckInvalidState();
|
||||
|
||||
#if defined(_DEBUG) || defined(DEBUGFAST)
|
||||
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d",
|
||||
PRIM_LOG("frame%d:\n texgen=%u, numchan=%u, dualtex=%u, ztex=%u, cole=%u, alpe=%u, ze=%u",
|
||||
g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans,
|
||||
xfmem.dualTexTrans.enabled, bpmem.ztex2.op, (int)bpmem.blendmode.colorupdate,
|
||||
(int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
|
||||
xfmem.dualTexTrans.enabled, bpmem.ztex2.op.Value(), bpmem.blendmode.colorupdate.Value(),
|
||||
bpmem.blendmode.alphaupdate.Value(), bpmem.zmode.updateenable.Value());
|
||||
|
||||
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
|
||||
for (u32 i = 0; i < xfmem.numChan.numColorChans; ++i)
|
||||
{
|
||||
LitChannel* ch = &xfmem.color[i];
|
||||
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
|
||||
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
|
||||
PRIM_LOG("colchan%u: matsrc=%u, light=0x%x, ambsrc=%u, diffunc=%u, attfunc=%u", i,
|
||||
ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
|
||||
ch->diffusefunc.Value(), ch->attnfunc.Value());
|
||||
ch = &xfmem.alpha[i];
|
||||
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i,
|
||||
ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
|
||||
PRIM_LOG("alpchan%u: matsrc=%u, light=0x%x, ambsrc=%u, diffunc=%u, attfunc=%u", i,
|
||||
ch->matsource.Value(), ch->GetFullLightMask(), ch->ambsource.Value(),
|
||||
ch->diffusefunc.Value(), ch->attnfunc.Value());
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
|
||||
for (u32 i = 0; i < xfmem.numTexGen.numTexGens; ++i)
|
||||
{
|
||||
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
|
||||
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
|
||||
|
@ -216,16 +218,17 @@ void VertexManagerBase::Flush()
|
|||
if (tinfo.texgentype != XF_TEXGEN_REGULAR)
|
||||
tinfo.projection = 0;
|
||||
|
||||
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, "
|
||||
"postmtx=%d, postnorm=%d",
|
||||
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow,
|
||||
tinfo.embosssourceshift, tinfo.embosslightshift, xfmem.postMtxInfo[i].index,
|
||||
xfmem.postMtxInfo[i].normalize);
|
||||
PRIM_LOG("txgen%u: proj=%u, input=%u, gentype=%u, srcrow=%u, embsrc=%u, emblght=%u, "
|
||||
"postmtx=%u, postnorm=%u",
|
||||
i, tinfo.projection.Value(), tinfo.inputform.Value(), tinfo.texgentype.Value(),
|
||||
tinfo.sourcerow.Value(), tinfo.embosssourceshift.Value(),
|
||||
tinfo.embosslightshift.Value(), xfmem.postMtxInfo[i].index.Value(),
|
||||
xfmem.postMtxInfo[i].normalize.Value());
|
||||
}
|
||||
|
||||
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x",
|
||||
(int)bpmem.genMode.numtevstages + 1, (int)bpmem.genMode.numindstages,
|
||||
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable,
|
||||
PRIM_LOG("pixel: tev=%u, ind=%u, texgen=%u, dstalpha=%u, alphatest=0x%x",
|
||||
bpmem.genMode.numtevstages.Value() + 1, bpmem.genMode.numindstages.Value(),
|
||||
bpmem.genMode.numtexgens.Value(), bpmem.dstalpha.enable.Value(),
|
||||
(bpmem.alpha_test.hex >> 16) & 0xff);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -114,16 +114,16 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
out.Write("ATTRIBUTE_LOCATION(%d) in float3 rawnorm2;\n", SHADER_NORM2_ATTRIB);
|
||||
|
||||
if (uid_data->components & VB_HAS_COL0)
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 color0;\n", SHADER_COLOR0_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
|
||||
if (uid_data->components & VB_HAS_COL1)
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 color1;\n", SHADER_COLOR1_ATTRIB);
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
|
||||
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
|
||||
if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
|
||||
{
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float%d tex%d;\n", SHADER_TEXTURE0_ATTRIB + i,
|
||||
out.Write("ATTRIBUTE_LOCATION(%d) in float%d rawtex%d;\n", SHADER_TEXTURE0_ATTRIB + i,
|
||||
hastexmtx ? 3 : 2, i);
|
||||
}
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
{
|
||||
if (i < uid_data->numTexGens)
|
||||
{
|
||||
out.Write("%s out float3 uv%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
|
||||
out.Write("%s out float3 tex%u;\n", GetInterpolationQualifier(msaa, ssaa), i);
|
||||
}
|
||||
}
|
||||
out.Write("%s out float4 clipPos;\n", GetInterpolationQualifier(msaa, ssaa));
|
||||
|
@ -170,14 +170,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
if (uid_data->components & VB_HAS_NRM2)
|
||||
out.Write(" float3 rawnorm2 : NORMAL2,\n");
|
||||
if (uid_data->components & VB_HAS_COL0)
|
||||
out.Write(" float4 color0 : COLOR0,\n");
|
||||
out.Write(" float4 rawcolor0 : COLOR0,\n");
|
||||
if (uid_data->components & VB_HAS_COL1)
|
||||
out.Write(" float4 color1 : COLOR1,\n");
|
||||
out.Write(" float4 rawcolor1 : COLOR1,\n");
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
u32 hastexmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
|
||||
if ((uid_data->components & (VB_HAS_UV0 << i)) || hastexmtx)
|
||||
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
|
||||
out.Write(" float%d rawtex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
|
||||
}
|
||||
if (uid_data->components & VB_HAS_POSMTXIDX)
|
||||
out.Write(" uint4 posmtx : BLENDINDICES,\n");
|
||||
|
@ -242,18 +242,18 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
if (uid_data->numColorChans == 0)
|
||||
{
|
||||
if (uid_data->components & VB_HAS_COL0)
|
||||
out.Write("o.colors_0 = color0;\n");
|
||||
out.Write("o.colors_0 = rawcolor0;\n");
|
||||
else
|
||||
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
|
||||
}
|
||||
|
||||
GenerateLightingShaderCode(out, uid_data->lighting, uid_data->components, uid_data->numColorChans,
|
||||
"color", "o.colors_");
|
||||
"rawcolor", "o.colors_");
|
||||
|
||||
if (uid_data->numColorChans < 2)
|
||||
{
|
||||
if (uid_data->components & VB_HAS_COL1)
|
||||
out.Write("o.colors_1 = color1;\n");
|
||||
out.Write("o.colors_1 = rawcolor1;\n");
|
||||
else
|
||||
out.Write("o.colors_1 = o.colors_0;\n");
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
default:
|
||||
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
|
||||
if (uid_data->components & (VB_HAS_UV0 << (texinfo.sourcerow - XF_SRCTEX0_INROW)))
|
||||
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n",
|
||||
out.Write("coord = float4(rawtex%d.x, rawtex%d.y, 1.0, 1.0);\n",
|
||||
texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
|
||||
break;
|
||||
}
|
||||
|
@ -338,7 +338,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
default:
|
||||
if (uid_data->components & (VB_HAS_TEXMTXIDX0 << i))
|
||||
{
|
||||
out.Write("int tmp = int(tex%d.z);\n", i);
|
||||
out.Write("int tmp = int(rawtex%d.z);\n", i);
|
||||
if (((uid_data->texMtxInfo_n_projection >> i) & 1) == XF_TEXPROJ_STQ)
|
||||
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES
|
||||
"[tmp]), dot(coord, " I_TRANSFORMMATRICES
|
||||
|
@ -407,10 +407,10 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
out.Write("o.WorldPos = pos.xyz;\n");
|
||||
|
||||
if (uid_data->components & VB_HAS_COL0)
|
||||
out.Write("o.colors_0 = color0;\n");
|
||||
out.Write("o.colors_0 = rawcolor0;\n");
|
||||
|
||||
if (uid_data->components & VB_HAS_COL1)
|
||||
out.Write("o.colors_1 = color1;\n");
|
||||
out.Write("o.colors_1 = rawcolor1;\n");
|
||||
}
|
||||
|
||||
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
|
||||
|
@ -495,7 +495,7 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
|
|||
// TODO: Pass interface blocks between shader stages even if geometry shaders
|
||||
// are not supported, however that will require at least OpenGL 3.2 support.
|
||||
for (unsigned int i = 0; i < uid_data->numTexGens; ++i)
|
||||
out.Write("uv%d.xyz = o.tex%d;\n", i, i);
|
||||
out.Write("tex%d.xyz = o.tex%d;\n", i, i);
|
||||
out.Write("clipPos = o.clipPos;\n");
|
||||
if (per_pixel_lighting)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,7 @@ alignas(16) static float g_fProjectionMatrix[16];
|
|||
|
||||
// track changes
|
||||
static bool bTexMatricesChanged[2], bPosNormalMatrixChanged, bProjectionChanged, bViewportChanged;
|
||||
static bool bTexMtxInfoChanged, bLightingConfigChanged;
|
||||
static BitSet32 nMaterialsChanged;
|
||||
static int nTransformMatricesChanged[2]; // min,max
|
||||
static int nNormalMatricesChanged[2]; // min,max
|
||||
|
@ -193,8 +194,10 @@ void VertexShaderManager::Init()
|
|||
bPosNormalMatrixChanged = false;
|
||||
bProjectionChanged = true;
|
||||
bViewportChanged = false;
|
||||
bTexMtxInfoChanged = false;
|
||||
bLightingConfigChanged = false;
|
||||
|
||||
xfmem = {};
|
||||
std::memset(&xfmem, 0, sizeof(xfmem));
|
||||
constants = {};
|
||||
ResetView();
|
||||
|
||||
|
@ -561,6 +564,32 @@ void VertexShaderManager::SetConstants()
|
|||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (bTexMtxInfoChanged)
|
||||
{
|
||||
bTexMtxInfoChanged = false;
|
||||
constants.xfmem_dualTexInfo = xfmem.dualTexTrans.enabled;
|
||||
for (size_t i = 0; i < ArraySize(xfmem.texMtxInfo); i++)
|
||||
constants.xfmem_pack1[i][0] = xfmem.texMtxInfo[i].hex;
|
||||
for (size_t i = 0; i < ArraySize(xfmem.postMtxInfo); i++)
|
||||
constants.xfmem_pack1[i][1] = xfmem.postMtxInfo[i].hex;
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (bLightingConfigChanged)
|
||||
{
|
||||
bLightingConfigChanged = false;
|
||||
|
||||
for (size_t i = 0; i < 2; i++)
|
||||
{
|
||||
constants.xfmem_pack1[i][2] = xfmem.color[i].hex;
|
||||
constants.xfmem_pack1[i][3] = xfmem.alpha[i].hex;
|
||||
}
|
||||
constants.xfmem_numColorChans = xfmem.numChan.numColorChans;
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexShaderManager::InvalidateXFRange(int start, int end)
|
||||
|
@ -758,6 +787,27 @@ void VertexShaderManager::ResetView()
|
|||
bProjectionChanged = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetVertexFormat(u32 components)
|
||||
{
|
||||
if (components != constants.components)
|
||||
{
|
||||
constants.components = components;
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetTexMatrixInfoChanged(int index)
|
||||
{
|
||||
// TODO: Should we track this with more precision, like which indices changed?
|
||||
// The whole vertex constants are probably going to be uploaded regardless.
|
||||
bTexMtxInfoChanged = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetLightingConfigChanged()
|
||||
{
|
||||
bLightingConfigChanged = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::TransformToClipSpace(const float* data, float* out, u32 MtxIdx)
|
||||
{
|
||||
const float* world_matrix = &xfmem.posMatrices[(MtxIdx & 0x3f) * 4];
|
||||
|
@ -800,6 +850,8 @@ void VertexShaderManager::DoState(PointerWrap& p)
|
|||
p.Do(bPosNormalMatrixChanged);
|
||||
p.Do(bProjectionChanged);
|
||||
p.Do(bViewportChanged);
|
||||
p.Do(bTexMtxInfoChanged);
|
||||
p.Do(bLightingConfigChanged);
|
||||
|
||||
p.Do(constants);
|
||||
|
||||
|
|
|
@ -36,6 +36,10 @@ public:
|
|||
static void RotateView(float x, float y);
|
||||
static void ResetView();
|
||||
|
||||
static void SetVertexFormat(u32 components);
|
||||
static void SetTexMatrixInfoChanged(int index);
|
||||
static void SetLightingConfigChanged();
|
||||
|
||||
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
|
||||
// out: 4 floats which will be initialized with the corresponding clip space coordinates
|
||||
// NOTE: g_fProjectionMatrix must be up to date when this is called
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
<ItemGroup>
|
||||
<ClCompile Include="AbstractTexture.cpp" />
|
||||
<ClCompile Include="AsyncRequests.cpp" />
|
||||
<ClCompile Include="AsyncShaderCompiler.cpp" />
|
||||
<ClCompile Include="AVIDump.cpp" />
|
||||
<ClCompile Include="BoundingBox.cpp" />
|
||||
<ClCompile Include="BPFunctions.cpp" />
|
||||
|
@ -66,12 +67,15 @@
|
|||
<ClCompile Include="RenderState.cpp" />
|
||||
<ClCompile Include="LightingShaderGen.cpp" />
|
||||
<ClCompile Include="ShaderGenCommon.cpp" />
|
||||
<ClCompile Include="UberShaderCommon.cpp" />
|
||||
<ClCompile Include="UberShaderPixel.cpp" />
|
||||
<ClCompile Include="Statistics.cpp" />
|
||||
<ClCompile Include="GeometryShaderGen.cpp" />
|
||||
<ClCompile Include="GeometryShaderManager.cpp" />
|
||||
<ClCompile Include="TextureCacheBase.cpp" />
|
||||
<ClCompile Include="TextureConfig.cpp" />
|
||||
<ClCompile Include="TextureConversionShader.cpp" />
|
||||
<ClCompile Include="UberShaderVertex.cpp" />
|
||||
<ClCompile Include="VertexLoader.cpp" />
|
||||
<ClCompile Include="VertexLoaderBase.cpp" />
|
||||
<ClCompile Include="VertexLoaderX64.cpp" />
|
||||
|
@ -94,6 +98,7 @@
|
|||
<ItemGroup>
|
||||
<ClInclude Include="AbstractTexture.h" />
|
||||
<ClInclude Include="AsyncRequests.h" />
|
||||
<ClInclude Include="AsyncShaderCompiler.h" />
|
||||
<ClInclude Include="AVIDump.h" />
|
||||
<ClInclude Include="BoundingBox.h" />
|
||||
<ClInclude Include="BPFunctions.h" />
|
||||
|
@ -107,6 +112,8 @@
|
|||
<ClInclude Include="Fifo.h" />
|
||||
<ClInclude Include="FPSCounter.h" />
|
||||
<ClInclude Include="FramebufferManagerBase.h" />
|
||||
<ClInclude Include="UberShaderCommon.h" />
|
||||
<ClInclude Include="UberShaderPixel.h" />
|
||||
<ClInclude Include="HiresTextures.h" />
|
||||
<ClInclude Include="ImageWrite.h" />
|
||||
<ClInclude Include="IndexGenerator.h" />
|
||||
|
@ -131,6 +138,7 @@
|
|||
<ClInclude Include="TextureConfig.h" />
|
||||
<ClInclude Include="TextureConversionShader.h" />
|
||||
<ClInclude Include="TextureDecoder.h" />
|
||||
<ClInclude Include="UberShaderVertex.h" />
|
||||
<ClInclude Include="VertexLoader.h" />
|
||||
<ClInclude Include="VertexLoaderBase.h" />
|
||||
<ClInclude Include="VertexLoaderManager.h" />
|
||||
|
@ -172,4 +180,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -176,6 +176,18 @@
|
|||
<ClCompile Include="ShaderGenCommon.cpp">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="AsyncShaderCompiler.cpp">
|
||||
<Filter>Util</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="UberShaderPixel.cpp">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="UberShaderCommon.cpp">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="UberShaderVertex.cpp">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="CommandProcessor.h" />
|
||||
|
@ -332,8 +344,20 @@
|
|||
<ClInclude Include="AbstractTexture.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="AsyncShaderCompiler.h">
|
||||
<Filter>Util</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="UberShaderPixel.h">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="UberShaderCommon.h">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="UberShaderVertex.h">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="CMakeLists.txt" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/StringUtil.h"
|
||||
#include "Core/Config/GraphicsSettings.h"
|
||||
|
@ -93,6 +94,13 @@ void VideoConfig::Refresh()
|
|||
bBackendMultithreading = Config::Get(Config::GFX_BACKEND_MULTITHREADING);
|
||||
iCommandBufferExecuteInterval = Config::Get(Config::GFX_COMMAND_BUFFER_EXECUTE_INTERVAL);
|
||||
bShaderCache = Config::Get(Config::GFX_SHADER_CACHE);
|
||||
bBackgroundShaderCompiling = Config::Get(Config::GFX_BACKGROUND_SHADER_COMPILING);
|
||||
bDisableSpecializedShaders = Config::Get(Config::GFX_DISABLE_SPECIALIZED_SHADERS);
|
||||
bPrecompileUberShaders = Config::Get(Config::GFX_PRECOMPILE_UBER_SHADERS);
|
||||
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
||||
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
||||
bForceVertexUberShaders = Config::Get(Config::GFX_FORCE_VERTEX_UBER_SHADERS);
|
||||
bForcePixelUberShaders = Config::Get(Config::GFX_FORCE_PIXEL_UBER_SHADERS);
|
||||
|
||||
bZComploc = Config::Get(Config::GFX_SW_ZCOMPLOC);
|
||||
bZFreeze = Config::Get(Config::GFX_SW_ZFREEZE);
|
||||
|
@ -188,3 +196,37 @@ bool VideoConfig::IsVSync()
|
|||
{
|
||||
return bVSync && !Core::GetIsThrottlerTempDisabled();
|
||||
}
|
||||
|
||||
static u32 GetNumAutoShaderCompilerThreads()
|
||||
{
|
||||
// Automatic number. We use clamp(cpus - 3, 1, 4).
|
||||
return static_cast<u32>(std::min(std::max(cpu_info.num_cores - 3, 1), 4));
|
||||
}
|
||||
|
||||
u32 VideoConfig::GetShaderCompilerThreads() const
|
||||
{
|
||||
if (iShaderCompilerThreads >= 0)
|
||||
return static_cast<u32>(iShaderCompilerThreads);
|
||||
else
|
||||
return GetNumAutoShaderCompilerThreads();
|
||||
}
|
||||
|
||||
u32 VideoConfig::GetShaderPrecompilerThreads() const
|
||||
{
|
||||
if (iShaderPrecompilerThreads >= 0)
|
||||
return static_cast<u32>(iShaderPrecompilerThreads);
|
||||
else
|
||||
return GetNumAutoShaderCompilerThreads();
|
||||
}
|
||||
|
||||
bool VideoConfig::CanPrecompileUberShaders() const
|
||||
{
|
||||
// We don't want to precompile ubershaders if they're never going to be used.
|
||||
return bPrecompileUberShaders && (bBackgroundShaderCompiling || bDisableSpecializedShaders);
|
||||
}
|
||||
|
||||
bool VideoConfig::CanBackgroundCompileShaders() const
|
||||
{
|
||||
// We require precompiled ubershaders to background compile shaders.
|
||||
return bBackgroundShaderCompiling && bPrecompileUberShaders;
|
||||
}
|
||||
|
|
|
@ -168,6 +168,36 @@ struct VideoConfig final
|
|||
// Currently only supported with Vulkan.
|
||||
int iCommandBufferExecuteInterval;
|
||||
|
||||
// The following options determine the ubershader mode:
|
||||
// No ubershaders:
|
||||
// - bBackgroundShaderCompiling = false
|
||||
// - bDisableSpecializedShaders = false
|
||||
// Hybrid/background compiling:
|
||||
// - bBackgroundShaderCompiling = true
|
||||
// - bDisableSpecializedShaders = false
|
||||
// Ubershaders only:
|
||||
// - bBackgroundShaderCompiling = false
|
||||
// - bDisableSpecializedShaders = true
|
||||
|
||||
// Enable background shader compiling, use ubershaders while waiting.
|
||||
bool bBackgroundShaderCompiling;
|
||||
|
||||
// Use ubershaders only, don't compile specialized shaders.
|
||||
bool bDisableSpecializedShaders;
|
||||
|
||||
// Precompile ubershader variants at boot/config reload time.
|
||||
bool bPrecompileUberShaders;
|
||||
|
||||
// Number of shader compiler threads.
|
||||
// 0 disables background compilation.
|
||||
// -1 uses an automatic number based on the CPU threads.
|
||||
int iShaderCompilerThreads;
|
||||
int iShaderPrecompilerThreads;
|
||||
|
||||
// Temporary toggling of ubershaders, for debugging
|
||||
bool bForceVertexUberShaders;
|
||||
bool bForcePixelUberShaders;
|
||||
|
||||
// Static config per API
|
||||
// TODO: Move this out of VideoConfig
|
||||
struct
|
||||
|
@ -204,6 +234,8 @@ struct VideoConfig final
|
|||
bool bSupportsInternalResolutionFrameDumps;
|
||||
bool bSupportsGPUTextureDecoding;
|
||||
bool bSupportsST3CTextures;
|
||||
bool bSupportsBitfield; // Needed by UberShaders, so must stay in VideoCommon
|
||||
bool bSupportsDynamicSamplerIndexing; // Needed by UberShaders, so must stay in VideoCommon
|
||||
} backend_info;
|
||||
|
||||
// Utility
|
||||
|
@ -224,6 +256,10 @@ struct VideoConfig final
|
|||
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
|
||||
}
|
||||
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != SCALE_1X; }
|
||||
u32 GetShaderCompilerThreads() const;
|
||||
u32 GetShaderPrecompilerThreads() const;
|
||||
bool CanPrecompileUberShaders() const;
|
||||
bool CanBackgroundCompileShaders() const;
|
||||
};
|
||||
|
||||
extern VideoConfig g_Config;
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Common/BitField.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
|
||||
|
@ -132,27 +133,15 @@ enum
|
|||
|
||||
union LitChannel
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 matsource : 1;
|
||||
u32 enablelighting : 1;
|
||||
u32 lightMask0_3 : 4;
|
||||
u32 ambsource : 1;
|
||||
u32 diffusefunc : 2; // LIGHTDIF_X
|
||||
u32 attnfunc : 2; // LIGHTATTN_X
|
||||
u32 lightMask4_7 : 4;
|
||||
};
|
||||
struct
|
||||
{
|
||||
u32 hex : 15;
|
||||
u32 unused : 17;
|
||||
};
|
||||
struct
|
||||
{
|
||||
u32 dummy0 : 7;
|
||||
u32 lightparams : 4;
|
||||
u32 dummy1 : 21;
|
||||
};
|
||||
BitField<0, 1, u32> matsource;
|
||||
BitField<1, 1, u32> enablelighting;
|
||||
BitField<2, 4, u32> lightMask0_3;
|
||||
BitField<6, 1, u32> ambsource;
|
||||
BitField<7, 2, u32> diffusefunc; // LIGHTDIF_X
|
||||
BitField<9, 2, u32> attnfunc; // LIGHTATTN_X
|
||||
BitField<11, 4, u32> lightMask4_7;
|
||||
u32 hex;
|
||||
|
||||
unsigned int GetFullLightMask() const
|
||||
{
|
||||
return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0;
|
||||
|
@ -173,28 +162,22 @@ union INVTXSPEC
|
|||
|
||||
union TexMtxInfo
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 unknown : 1;
|
||||
u32 projection : 1; // XF_TEXPROJ_X
|
||||
u32 inputform : 1; // XF_TEXINPUT_X
|
||||
u32 unknown2 : 1;
|
||||
u32 texgentype : 3; // XF_TEXGEN_X
|
||||
u32 sourcerow : 5; // XF_SRCGEOM_X
|
||||
u32 embosssourceshift : 3; // what generated texcoord to use
|
||||
u32 embosslightshift : 3; // light index that is used
|
||||
};
|
||||
BitField<0, 1, u32> unknown; //
|
||||
BitField<1, 1, u32> projection; // XF_TEXPROJ_X
|
||||
BitField<2, 1, u32> inputform; // XF_TEXINPUT_X
|
||||
BitField<3, 1, u32> unknown2; //
|
||||
BitField<4, 3, u32> texgentype; // XF_TEXGEN_X
|
||||
BitField<7, 5, u32> sourcerow; // XF_SRCGEOM_X
|
||||
BitField<12, 3, u32> embosssourceshift; // what generated texcoord to use
|
||||
BitField<15, 3, u32> embosslightshift; // light index that is used
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
union PostMtxInfo
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 index : 6; // base row of dual transform matrix
|
||||
u32 unused : 2;
|
||||
u32 normalize : 1; // normalize before send operation
|
||||
};
|
||||
BitField<0, 6, u32> index; // base row of dual transform matrix
|
||||
BitField<6, 2, u32> unused; //
|
||||
BitField<8, 1, u32> normalize; // normalize before send operation
|
||||
u32 hex;
|
||||
};
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
|
|||
case XFMEM_SETNUMCHAN:
|
||||
if (xfmem.numChan.numColorChans != (newValue & 3))
|
||||
g_vertex_manager->Flush();
|
||||
VertexShaderManager::SetLightingConfigChanged();
|
||||
break;
|
||||
|
||||
case XFMEM_SETCHAN0_AMBCOLOR: // Channel Ambient Color
|
||||
|
@ -88,11 +89,13 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
|
|||
case XFMEM_SETCHAN1_ALPHA:
|
||||
if (((u32*)&xfmem)[address] != (newValue & 0x7fff))
|
||||
g_vertex_manager->Flush();
|
||||
VertexShaderManager::SetLightingConfigChanged();
|
||||
break;
|
||||
|
||||
case XFMEM_DUALTEX:
|
||||
if (xfmem.dualTexTrans.enabled != (newValue & 1))
|
||||
g_vertex_manager->Flush();
|
||||
VertexShaderManager::SetTexMatrixInfoChanged(-1);
|
||||
break;
|
||||
|
||||
case XFMEM_SETMATRIXINDA:
|
||||
|
@ -146,6 +149,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
|
|||
case XFMEM_SETTEXMTXINFO + 6:
|
||||
case XFMEM_SETTEXMTXINFO + 7:
|
||||
g_vertex_manager->Flush();
|
||||
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETTEXMTXINFO);
|
||||
|
||||
nextAddress = XFMEM_SETTEXMTXINFO + 8;
|
||||
break;
|
||||
|
@ -159,6 +163,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
|
|||
case XFMEM_SETPOSMTXINFO + 6:
|
||||
case XFMEM_SETPOSMTXINFO + 7:
|
||||
g_vertex_manager->Flush();
|
||||
VertexShaderManager::SetTexMatrixInfoChanged(address - XFMEM_SETPOSMTXINFO);
|
||||
|
||||
nextAddress = XFMEM_SETPOSMTXINFO + 8;
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue