GS/OpenGL: Use programs instead of separate pipelines

This commit is contained in:
Connor McLaughlin 2021-12-20 23:48:13 +10:00 committed by refractionpcsx2
parent 69ff12645c
commit 6e064da536
15 changed files with 210 additions and 896 deletions

View File

@ -45,7 +45,7 @@ out gl_PerVertex {
// even if only one was updated.
#ifdef FRAGMENT_SHADER
layout(std140, binding = 15) uniform cb15
layout(std140, binding = 6) uniform cb15
{
int EMODA;
int EMODC;
@ -54,7 +54,7 @@ layout(std140, binding = 15) uniform cb15
#endif
#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER)
layout(std140, binding = 20) uniform cb20
layout(std140, binding = 1) uniform cb20
{
vec2 VertexScale;
vec2 VertexOffset;
@ -69,7 +69,7 @@ layout(std140, binding = 20) uniform cb20
#endif
#if defined(VERTEX_SHADER) || defined(FRAGMENT_SHADER)
layout(std140, binding = 21) uniform cb21
layout(std140, binding = 2) uniform cb21
{
vec3 FogColor;
float AREF;

View File

@ -9,7 +9,7 @@ in SHADER
#ifdef FRAGMENT_SHADER
layout(std140, binding = 11) uniform cb11
layout(std140, binding = 4) uniform cb11
{
vec2 ZrH;
float hH;

View File

@ -9,7 +9,7 @@ in SHADER
#ifdef FRAGMENT_SHADER
layout(std140, binding = 10) uniform cb10
layout(std140, binding = 3) uniform cb10
{
vec4 BGColor;
};

View File

@ -629,7 +629,6 @@ set(pcsx2GSSources
GS/Renderers/OpenGL/GLLoader.cpp
GS/Renderers/OpenGL/GLState.cpp
GS/Renderers/OpenGL/GSDeviceOGL.cpp
GS/Renderers/OpenGL/GSShaderOGL.cpp
GS/Renderers/OpenGL/GSTextureOGL.cpp
GS/Window/GSSetting.cpp
GS/Window/GSwxDialog.cpp
@ -699,7 +698,6 @@ set(pcsx2GSHeaders
GS/Renderers/OpenGL/GLLoader.h
GS/Renderers/OpenGL/GLState.h
GS/Renderers/OpenGL/GSDeviceOGL.h
GS/Renderers/OpenGL/GSShaderOGL.h
GS/Renderers/OpenGL/GSTextureOGL.h
GS/Renderers/OpenGL/GSUniformBufferOGL.h
GS/Window/GSCaptureDlg.h

View File

@ -252,14 +252,6 @@ int _GSopen(const WindowInfo& wi, const char* title, GSRendererType renderer, in
return -1;
}
if (renderer == GSRendererType::OGL_HW && theApp.GetConfigI("debug_glsl_shader") == 2)
{
printf("GS: test OpenGL shader. Please wait...\n\n");
static_cast<GSDeviceOGL*>(s_gs->m_dev)->SelfShaderTest();
printf("\nGS: test OpenGL shader done. It will now exit\n");
return -1;
}
return 0;
}

View File

@ -145,7 +145,6 @@ namespace GLLoader
bool vendor_id_intel = false;
bool mesa_driver = false;
bool in_replayer = false;
bool buggy_sso_dual_src = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
bool found_GL_ARB_clear_texture = false;
@ -213,8 +212,6 @@ namespace GLLoader
// On linux assumes the free driver if it isn't nvidia or amd pro driver
mesa_driver = !vendor_id_nvidia && !vendor_id_amd;
#endif
// As of 2019 SSO is still broken on intel (Kaby Lake confirmed).
buggy_sso_dual_src = vendor_id_intel || vendor_id_amd;
if (theApp.GetConfigI("override_geometry_shader") != -1)
{

View File

@ -36,7 +36,6 @@ namespace GLLoader
extern bool vendor_id_nvidia;
extern bool vendor_id_intel;
extern bool mesa_driver;
extern bool buggy_sso_dual_src;
extern bool in_replayer;
// GL

View File

@ -44,12 +44,6 @@ namespace GLState
GLuint tex_unit[8];
GLuint64 tex_handle[8];
GLuint ps;
GLuint gs;
GLuint vs;
GLuint program;
GLuint pipeline;
s64 available_vram;
void Clear()
@ -80,12 +74,6 @@ namespace GLState
std::fill(std::begin(tex_unit), std::end(tex_unit), 0);
std::fill(std::begin(tex_handle), std::end(tex_handle), 0);
ps = 0;
gs = 0;
vs = 0;
program = 0;
pipeline = 0;
// Set a max vram limit for texture allocation
// (256MB are reserved for PBO/IBO/VBO/UBO buffers)
available_vram = (4096u - 256u) * 1024u * 1024u;

View File

@ -46,12 +46,6 @@ namespace GLState
extern GLuint tex_unit[8]; // shader input texture
extern GLuint64 tex_handle[8]; // shader input texture
extern GLuint ps;
extern GLuint gs;
extern GLuint vs;
extern GLuint program;
extern GLuint pipeline;
extern s64 available_vram;
extern void Clear();

View File

@ -31,12 +31,12 @@ u64 g_vertex_upload_byte = 0;
u64 g_uniform_upload_byte = 0;
#endif
static constexpr u32 g_merge_cb_index = 10;
static constexpr u32 g_interlace_cb_index = 11;
static constexpr u32 g_merge_cb_index = 3;
static constexpr u32 g_interlace_cb_index = 4;
static constexpr u32 g_fx_cb_index = 14;
static constexpr u32 g_convert_index = 15;
static constexpr u32 g_vs_cb_index = 20;
static constexpr u32 g_ps_cb_index = 21;
static constexpr u32 g_convert_index = 5;
static constexpr u32 g_vs_cb_index = 1;
static constexpr u32 g_ps_cb_index = 2;
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
@ -51,15 +51,15 @@ FILE* GSDeviceOGL::m_debug_gl_file = NULL;
GSDeviceOGL::GSDeviceOGL()
: m_fbo(0)
, m_fbo_read(0)
, m_apitrace(0)
, m_palette_ss(0)
, m_shader(NULL)
{
memset(&m_merge_obj, 0, sizeof(m_merge_obj));
memset(&m_interlace, 0, sizeof(m_interlace));
memset(&m_convert, 0, sizeof(m_convert));
memset(&m_fxaa, 0, sizeof(m_fxaa));
#ifndef PCSX2_CORE
memset(&m_shaderfx, 0, sizeof(m_shaderfx));
#endif
memset(&m_date, 0, sizeof(m_date));
memset(&m_shadeboost, 0, sizeof(m_shadeboost));
memset(&m_om_dss, 0, sizeof(m_om_dss));
@ -101,10 +101,6 @@ GSDeviceOGL::~GSDeviceOGL()
}
#endif
// If the create function wasn't called nothing to do.
if (m_shader == NULL)
return;
GL_PUSH("GSDeviceOGL destructor");
// Clean vertex buffer state
@ -127,8 +123,10 @@ GSDeviceOGL::~GSDeviceOGL()
// Clean m_fxaa
delete m_fxaa.cb;
#ifndef PCSX2_CORE
// Clean m_shaderfx
delete m_shaderfx.cb;
#endif
// Clean m_date
delete m_date.dss;
@ -142,7 +140,7 @@ GSDeviceOGL::~GSDeviceOGL()
m_fragment_uniform_stream_buffer.reset();
glDeleteSamplers(1, &m_palette_ss);
m_ps.clear();
m_programs.clear();
glDeleteSamplers(std::size(m_ps_ss), m_ps_ss);
@ -150,10 +148,6 @@ GSDeviceOGL::~GSDeviceOGL()
delete ds;
PboPool::Destroy();
// Must be done after the destruction of all shader/program objects
delete m_shader;
m_shader = NULL;
}
void GSDeviceOGL::GenerateProfilerData()
@ -301,8 +295,6 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
{
GL_PUSH("GSDeviceOGL::Various");
m_shader = new GSShaderOGL(theApp.GetConfigB("debug_glsl_shader"));
glGenFramebuffers(1, &m_fbo);
// Always write to the first buffer
OMSetFBO(m_fbo);
@ -376,29 +368,28 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
// ****************************************************************
// convert
// ****************************************************************
GLuint vs = 0;
GLuint ps = 0;
{
GL_PUSH("GSDeviceOGL::Convert");
m_convert.cb = new GSUniformBufferOGL("Misc UBO", g_convert_index, sizeof(MiscConstantBuffer));
// these all share the same vertex shader
const auto shader = Host::ReadResourceFileToString("shaders/opengl/convert.glsl");
if (!shader.has_value())
return false;
vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, m_shader_common_header, shader->c_str());
m_convert.vs = GetShaderSource("vs_main", GL_VERTEX_SHADER, m_shader_common_header, *shader, {});
m_convert.vs = vs;
for (size_t i = 0; i < std::size(m_convert.ps); i++)
{
const char* name = shaderName(static_cast<ShaderConvert>(i));
const std::string macro_sel = (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I) ?
format("#define PS_SCALE_FACTOR %d\n", m_upscale_multiplier) :
std::string();
ps = m_shader->Compile("convert.glsl", name, GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str(), macro_sel);
std::string pretty_name = std::string("Convert pipe ") + name;
m_convert.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *shader, macro_sel));
if (!m_convert.ps[i].Compile(m_convert.vs, {}, ps) || !m_convert.ps[i].Link())
return false;
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
}
PSSamplerSelector point;
@ -420,7 +411,9 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
{
GL_PUSH("GSDeviceOGL::Merge");
static const float default_merge_cb[4] = {};
m_merge_obj.cb = new GSUniformBufferOGL("Merge UBO", g_merge_cb_index, sizeof(MergeConstantBuffer));
m_merge_obj.cb->upload(default_merge_cb);
const auto shader = Host::ReadResourceFileToString("shaders/opengl/merge.glsl");
if (!shader.has_value())
@ -428,9 +421,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
for (size_t i = 0; i < std::size(m_merge_obj.ps); i++)
{
ps = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str());
std::string pretty_name = "Merge pipe " + std::to_string(i);
m_merge_obj.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
const std::string ps(GetShaderSource(format("ps_main%d", i), GL_FRAGMENT_SHADER, m_shader_common_header, *shader, {}));
if (!m_merge_obj.ps[i].Compile(m_convert.vs, {}, ps) || !m_merge_obj.ps[i].Link())
return false;
m_merge_obj.ps[i].SetFormattedName("Merge pipe %zu", i);
}
}
@ -441,6 +435,8 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
GL_PUSH("GSDeviceOGL::Interlace");
m_interlace.cb = new GSUniformBufferOGL("Interlace UBO", g_interlace_cb_index, sizeof(InterlaceConstantBuffer));
InterlaceConstantBuffer interlace_cb;
m_interlace.cb->upload(&interlace_cb);
const auto shader = Host::ReadResourceFileToString("shaders/opengl/interlace.glsl");
if (!shader.has_value())
@ -448,9 +444,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
for (size_t i = 0; i < std::size(m_interlace.ps); i++)
{
ps = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str());
std::string pretty_name = "Interlace pipe " + std::to_string(i);
m_interlace.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps);
const std::string ps(GetShaderSource(format("ps_main%d", i), GL_FRAGMENT_SHADER, m_shader_common_header, *shader, {}));
if (!m_interlace.ps[i].Compile(m_convert.vs, {}, ps) || !m_interlace.ps[i].Link())
return false;
m_interlace.ps[i].SetFormattedName("Merge pipe %zu", i);
}
}
@ -471,8 +468,10 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
if (!shader.has_value())
return false;
ps = m_shader->Compile("shadeboost.glsl", "ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str(), shade_macro);
m_shadeboost.ps = m_shader->LinkPipeline("ShadeBoost pipe", vs, 0, ps);
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, *shader, shade_macro));
if (!m_shadeboost.ps.Compile(m_convert.vs, {}, ps) || !m_shadeboost.ps.Link())
return false;
m_shadeboost.ps.SetName("Shadeboost pipe");
}
// ****************************************************************
@ -608,16 +607,6 @@ bool GSDeviceOGL::CreateTextureFX()
m_palette_ss = CreateSampler(PSSamplerSelector(0));
glBindSampler(1, m_palette_ss);
// Pre compile the (remaining) Geometry & Vertex Shader
// One-Hot encoding
memset(m_gs, 0, sizeof(m_gs));
m_gs[1] = CompileGS(GSSelector(1));
m_gs[2] = CompileGS(GSSelector(2));
m_gs[4] = CompileGS(GSSelector(4));
for (u32 key = 0; key < std::size(m_vs); key++)
m_vs[key] = CompileVS(VSSelector(key));
// Enable all bits for stencil operations. Technically 1 bit is
// enough but buffer is polluted with noise. Clear will be limited
// to the mask.
@ -627,8 +616,6 @@ bool GSDeviceOGL::CreateTextureFX()
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
}
// Help to debug FS in apitrace
m_apitrace = CompilePS(PSSelector());
return true;
}
@ -946,29 +933,102 @@ void GSDeviceOGL::Barrier(GLbitfield b)
glMemoryBarrier(b);
}
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
std::string GSDeviceOGL::GetShaderSource(const std::string_view& entry, GLenum type, const std::string_view& common_header, const std::string_view& glsl_h_code, const std::string_view& macro_sel)
{
std::string macro = format("#define VS_INT_FST %d\n", sel.int_fst);
if (GLLoader::buggy_sso_dual_src)
return m_shader->CompileShader("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, m_shader_common_header, m_shader_tfx_vgs.data(), macro);
else
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, m_shader_common_header, m_shader_tfx_vgs.data(), macro);
std::string src = GenGlslHeader(entry, type, macro_sel);
src += m_shader_common_header;
src += glsl_h_code;
return src;
}
GLuint GSDeviceOGL::CompileGS(GSSelector sel)
std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro)
{
std::string header = "#version 330 core\n";
// Need GL version 420
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n";
}
else
{
header += "#define DISABLE_GL42_image\n";
}
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
header += "#define BROKEN_DRIVER as_usual\n";
// Stupid GL implementation (can't use GL_ES)
// AMD/nvidia define it to 0
// intel window don't define it
// intel linux refuse to define it
header += "#define pGL_ES 0\n";
// Allow to puts several shader in 1 files
switch (type)
{
case GL_VERTEX_SHADER:
header += "#define VERTEX_SHADER 1\n";
break;
case GL_GEOMETRY_SHADER:
header += "#define GEOMETRY_SHADER 1\n";
break;
case GL_FRAGMENT_SHADER:
header += "#define FRAGMENT_SHADER 1\n";
break;
default:
ASSERT(0);
}
// Select the entry point ie the main function
header += "#define ";
header += entry;
header += " main\n";
header += macro;
return header;
}
std::string GSDeviceOGL::GetVSSource(VSSelector sel)
{
#ifdef PCSX2_DEVBUILD
Console.WriteLn("Compiling new vertex shader with selector 0x%" PRIX64, sel.key);
#endif
std::string macro = format("#define VS_INT_FST %d\n", sel.int_fst);
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
src += m_shader_common_header;
src += m_shader_tfx_vgs;
return src;
}
std::string GSDeviceOGL::GetGSSource(GSSelector sel)
{
#ifdef PCSX2_DEVBUILD
Console.WriteLn("Compiling new geometry shader with selector 0x%" PRIX64, sel.key);
#endif
std::string macro = format("#define GS_POINT %d\n", sel.point)
+ format("#define GS_LINE %d\n", sel.line);
if (GLLoader::buggy_sso_dual_src)
return m_shader->CompileShader("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, m_shader_common_header, m_shader_tfx_vgs.data(), macro);
else
return m_shader->Compile("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, m_shader_common_header, m_shader_tfx_vgs.data(), macro);
std::string src = GenGlslHeader("gs_main", GL_GEOMETRY_SHADER, macro);
src += m_shader_common_header;
src += m_shader_tfx_vgs;
return src;
}
GLuint GSDeviceOGL::CompilePS(PSSelector sel)
std::string GSDeviceOGL::GetPSSource(PSSelector sel)
{
#ifdef PCSX2_DEVBUILD
Console.WriteLn("Compiling new pixel shader with selector 0x%" PRIX64, sel.key);
#endif
std::string macro = format("#define PS_FST %d\n", sel.fst)
+ format("#define PS_WMS %d\n", sel.wms)
+ format("#define PS_WMT %d\n", sel.wmt)
@ -1012,226 +1072,10 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
+ format("#define PS_SCALE_FACTOR %d\n", m_upscale_multiplier)
;
if (GLLoader::buggy_sso_dual_src)
return m_shader->CompileShader("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, m_shader_tfx_fs.data(), macro);
else
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, m_shader_tfx_fs.data(), macro);
}
void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& file, const PSSelector& sel, int& nb_shader)
{
#ifdef __unix__
std::string out = "/tmp/GS_Shader/";
GSmkdir(out.c_str());
out += dir + "/";
GSmkdir(out.c_str());
out += file;
#else
std::string out = file;
#endif
#ifdef __linux__
// Nouveau actually
if (GLLoader::mesa_driver)
{
if (freopen(out.c_str(), "w", stderr) == NULL)
fprintf(stderr, "Failed to redirect stderr\n");
}
#endif
const GLuint p = CompilePS(sel);
nb_shader++;
m_shader_inst += m_shader->DumpAsm(out, p);
#ifdef __linux__
// Nouveau actually
if (GLLoader::mesa_driver)
{
if (freopen("/dev/tty", "w", stderr) == NULL)
fprintf(stderr, "Failed to restore stderr\n");
}
#endif
}
void GSDeviceOGL::SelfShaderTestPrint(const std::string& test, int& nb_shader)
{
fprintf(stderr, "%-25s\t\t%d shaders:\t%d instructions (M %4.2f)\t%d registers (M %4.2f)\n",
test.c_str(), nb_shader,
m_shader_inst, (float)m_shader_inst / (float)nb_shader,
m_shader_reg, (float)m_shader_reg / (float)nb_shader);
m_shader_inst = 0;
m_shader_reg = 0;
nb_shader = 0;
}
void GSDeviceOGL::SelfShaderTest()
{
std::string out;
#ifdef __unix__
setenv("NV50_PROG_DEBUG", "1", 1);
#endif
std::string test;
m_shader_inst = 0;
m_shader_reg = 0;
int nb_shader = 0;
test = "SW_Blending";
for (int colclip = 0; colclip < 2; colclip++)
{
for (int fmt = 0; fmt < 3; fmt++)
{
for (int i = 0; i < 3; i++)
{
PSSelector sel;
sel.tfx = 4;
const int ib = (i + 1) % 3;
sel.blend_a = i;
sel.blend_b = ib;
sel.blend_c = i;
sel.blend_d = i;
sel.colclip = colclip;
sel.dfmt = fmt;
std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm",
i, ib, i, i, colclip, fmt);
SelfShaderTestRun(test, file, sel, nb_shader);
}
}
}
SelfShaderTestPrint(test, nb_shader);
test = "Alpha_Test";
for (int atst = 0; atst < 5; atst++)
{
PSSelector sel;
sel.tfx = 4;
sel.atst = atst;
std::string file = format("Shader_Atst_%d.glsl.asm", atst);
SelfShaderTestRun(test, file, sel, nb_shader);
}
SelfShaderTestPrint(test, nb_shader);
test = "Fbmask__Fog__Shuffle__Read_ba";
for (int read_ba = 0; read_ba < 2; read_ba++)
{
PSSelector sel;
sel.tfx = 4;
sel.fog = 1;
sel.fbmask = 1;
sel.shuffle = 1;
sel.read_ba = read_ba;
std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba);
SelfShaderTestRun(test, file, sel, nb_shader);
}
SelfShaderTestPrint(test, nb_shader);
test = "Date";
for (int date = 1; date < 7; date++)
{
PSSelector sel;
sel.tfx = 4;
sel.date = date;
std::string file = format("Shader_Date_%d.glsl.asm", date);
SelfShaderTestRun(test, file, sel, nb_shader);
}
SelfShaderTestPrint(test, nb_shader);
test = "FBA";
for (int fmt = 0; fmt < 3; fmt++)
{
PSSelector sel;
sel.tfx = 4;
sel.fba = 1;
sel.dfmt = fmt;
sel.clr1 = 1;
std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt);
SelfShaderTestRun(test, file, sel, nb_shader);
}
SelfShaderTestPrint(test, nb_shader);
test = "Fst__Tc__IIP";
{
PSSelector sel;
sel.tfx = 1;
sel.fst = 0;
sel.iip = 1;
sel.tcoffsethack = 1;
std::string file = format("Shader_Fst__TC__Iip.glsl.asm");
SelfShaderTestRun(test, file, sel, nb_shader);
}
SelfShaderTestPrint(test, nb_shader);
test = "Tfx__Tcc";
for (int channel = 0; channel < 5; channel++)
{
for (int tfx = 0; tfx < 5; tfx++)
{
for (int tcc = 0; tcc < 2; tcc++)
{
PSSelector sel;
sel.fst = 1;
sel.channel = channel;
sel.tfx = tfx;
sel.tcc = tcc;
std::string file = format("Shader_Tfx_%d__Tcc_%d__Channel_%d.glsl.asm", tfx, tcc, channel);
SelfShaderTestRun(test, file, sel, nb_shader);
}
}
}
SelfShaderTestPrint(test, nb_shader);
test = "Texture_Sampling";
for (int depth = 0; depth < 4; depth++)
{
for (int fmt = 0; fmt < 16; fmt++)
{
if ((fmt & 3) == 3)
continue;
for (int ltf = 0; ltf < 2; ltf++)
{
for (int aem = 0; aem < 2; aem++)
{
for (int wms = 1; wms < 4; wms++)
{
for (int wmt = 1; wmt < 4; wmt++)
{
PSSelector sel;
sel.tfx = 1;
sel.tcc = 1;
sel.fst = 1;
sel.depth_fmt = depth;
sel.ltf = ltf;
sel.aem = aem;
sel.aem_fmt = fmt & 3;
sel.pal_fmt = fmt >> 2;
sel.wms = wms;
sel.wmt = wmt;
std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d__DepthFmt_%d.glsl.asm",
ltf, aem, fmt, wms, wmt, depth);
SelfShaderTestRun(test, file, sel, nb_shader);
}
}
}
}
}
}
SelfShaderTestPrint(test, nb_shader);
std::string src = GenGlslHeader("ps_main", GL_FRAGMENT_SHADER, macro);
src += m_shader_common_header;
src += m_shader_tfx_fs;
return src;
}
bool GSDeviceOGL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
@ -1256,7 +1100,7 @@ void GSDeviceOGL::BlitRect(GSTexture* sTex, const GSVector4i& r, const GSVector2
const GSVector4 float_r(r);
BeginScene();
m_shader->BindPipeline(m_convert.ps[static_cast<int>(ShaderConvert::COPY)]);
m_convert.ps[static_cast<int>(ShaderConvert::COPY)].Bind();
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState();
OMSetColorMaskState();
@ -1299,7 +1143,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)shader], linear);
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear)
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool linear)
{
StretchRect(sTex, sRect, dTex, dRect, ps, m_NO_BLEND, OMColorMaskSelector(), linear);
}
@ -1316,7 +1160,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[(int)ShaderConvert::COPY], m_NO_BLEND, cms, false);
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, OMColorMaskSelector cms, bool linear)
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, int bs, OMColorMaskSelector cms, bool linear)
{
if (!sTex || !dTex)
{
@ -1343,7 +1187,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
GSVector2i ds = dTex->GetSize();
m_shader->BindPipeline(ps);
ps.Bind();
// ************************************
// om
@ -1431,8 +1275,7 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt)
{
BeginScene();
m_shader->BindPipeline(m_convert.ps[static_cast<int>(ShaderConvert::OSD)]);
m_convert.ps[static_cast<int>(ShaderConvert::OSD)].Bind();
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState((u8)GSDeviceOGL::m_MERGE_BLEND);
@ -1547,7 +1390,7 @@ void GSDeviceOGL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool
void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
{
// Lazy compile
if (!m_fxaa.ps)
if (!m_fxaa.ps.IsValid())
{
if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension
{
@ -1561,8 +1404,9 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
if (!shader.has_value())
return;
GLuint ps = m_shader->Compile("fxaa.fx", "ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str(), fxaa_macro);
m_fxaa.ps = m_shader->LinkPipeline("FXAA pipe", m_convert.vs, 0, ps);
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, shader->c_str(), fxaa_macro));
if (!m_fxaa.ps.Compile(m_convert.vs, {}, ps) || !m_fxaa.ps.Link())
return;
}
GL_PUSH("DoFxaa");
@ -1579,8 +1423,9 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
{
#ifndef PCSX2_CORE
// Lazy compile
if (!m_shaderfx.ps)
if (!m_shaderfx.ps.IsValid())
{
if (!GLLoader::found_GL_ARB_gpu_shader5) // GL4.0 extension
{
@ -1608,8 +1453,9 @@ void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
m_shaderfx.cb = new GSUniformBufferOGL("eFX UBO", g_fx_cb_index, sizeof(ExternalFXConstantBuffer));
GLuint ps = m_shader->Compile("Extra", "ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, shader.str().c_str(), config.str());
m_shaderfx.ps = m_shader->LinkPipeline("eFX pipie", m_convert.vs, 0, ps);
const std::string ps(GetShaderSource("ps_main", GL_FRAGMENT_SHADER, m_shader_common_header, shader.str(), config.str()));
if (!m_shaderfx.ps.Compile(m_convert.vs, {}, ps) || !m_shaderfx.ps.Link())
return;
}
GL_PUSH("DoExternalFX");
@ -1630,6 +1476,7 @@ void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex)
m_shaderfx.cb->cache_upload(&cb);
StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, true);
#endif
}
void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex)
@ -1656,7 +1503,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver
ClearStencil(ds, 0);
m_shader->BindPipeline(m_convert.ps[static_cast<int>(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)]);
m_convert.ps[static_cast<int>(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)].Bind();
// om
@ -1908,78 +1755,25 @@ __fi static void WriteToStreamBuffer(GL::StreamBuffer* sb, u32 index, u32 align,
glBindBufferRange(GL_UNIFORM_BUFFER, index, sb->GetGLBufferId(), res.buffer_offset, size);
}
void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, const PSSelector& psel)
void GSDeviceOGL::SetupPipeline(const ProgramSelector& psel)
{
auto i = m_ps.find(psel.key);
GLuint ps;
if (i == m_ps.end())
auto it = m_programs.find(psel);
if (it != m_programs.end())
{
ps = CompilePS(psel);
m_ps[psel.key] = ps;
}
else
{
ps = i->second;
it->second.Bind();
return;
}
{
#if defined(_DEBUG) && 0
// Toggling Shader is bad for the perf. Let's trace parameter that often toggle to detect
// potential uber shader possibilities.
static PSSelector old_psel;
static GLuint old_ps = 0;
std::string msg("");
#define CHECK_STATE(p) \
if (psel.p != old_psel.p) \
msg.append(" ").append(#p);
const std::string vs(GetVSSource(psel.vs));
const std::string ps(GetPSSource(psel.ps));
const std::string gs((psel.gs.key != 0) ? GetGSSource(psel.gs) : std::string());
if (old_ps != ps)
{
GL::Program prog;
if (prog.Compile(vs, gs, ps))
prog.Link();
CHECK_STATE(tex_fmt);
CHECK_STATE(dfmt);
CHECK_STATE(depth_fmt);
CHECK_STATE(aem);
CHECK_STATE(fba);
CHECK_STATE(fog);
CHECK_STATE(iip);
CHECK_STATE(date);
CHECK_STATE(atst);
CHECK_STATE(fst);
CHECK_STATE(tfx);
CHECK_STATE(tcc);
CHECK_STATE(wms);
CHECK_STATE(wmt);
CHECK_STATE(ltf);
CHECK_STATE(shuffle);
CHECK_STATE(read_ba);
CHECK_STATE(write_rg);
CHECK_STATE(fbmask);
CHECK_STATE(blend_a);
CHECK_STATE(blend_b);
CHECK_STATE(blend_c);
CHECK_STATE(blend_d);
CHECK_STATE(clr1);
CHECK_STATE(pabe);
CHECK_STATE(hdr);
CHECK_STATE(colclip);
// CHECK_STATE(channel);
// CHECK_STATE(tcoffsethack);
// CHECK_STATE(urban_chaos_hle);
// CHECK_STATE(tales_of_abyss_hle);
GL_PERF("New PS :%s", msg.c_str());
}
old_psel.key = psel.key;
old_ps = ps;
#endif
}
if (GLLoader::buggy_sso_dual_src)
m_shader->BindProgram(m_vs[vsel], m_gs[gsel], ps);
else
m_shader->BindPipeline(m_vs[vsel], m_gs[gsel], ps);
it = m_programs.emplace(psel, std::move(prog)).first;
it->second.Bind();
}
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)
@ -2091,20 +1885,22 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
m_uniform_buffer_alignment, &config.cb_ps, sizeof(config.cb_ps));
}
GSSelector gssel;
ProgramSelector psel;
psel.vs = convertSel(config.vs);
psel.ps = config.ps;
psel.gs.key = 0;
if (config.gs.expand)
{
switch (config.gs.topology)
{
case GSHWDrawConfig::GSTopology::Point: gssel.point = 1; break;
case GSHWDrawConfig::GSTopology::Line: gssel.line = 1; break;
case GSHWDrawConfig::GSTopology::Sprite: gssel.sprite = 1; break;
case GSHWDrawConfig::GSTopology::Triangle: ASSERT(0); break;
case GSHWDrawConfig::GSTopology::Point: psel.gs.point = 1; break;
case GSHWDrawConfig::GSTopology::Line: psel.gs.line = 1; break;
case GSHWDrawConfig::GSTopology::Sprite: psel.gs.sprite = 1; break;
case GSHWDrawConfig::GSTopology::Triangle: ASSERT(0); break;
}
}
const VSSelector vssel = convertSel(config.vs);
SetupPipeline(vssel, gssel, config.ps);
SetupPipeline(psel);
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
@ -2129,9 +1925,9 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
// Ask PS to discard shader above the primitiveID max
glDepthMask(GLState::depth_mask);
config.ps.date = 3;
psel.ps.date = 3;
config.alpha_second_pass.ps.date = 3;
SetupPipeline(vssel, gssel, config.ps);
SetupPipeline(psel);
// Be sure that first pass is finished !
Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
@ -2151,7 +1947,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
m_uniform_buffer_alignment, &config.cb_ps, sizeof(config.cb_ps));
}
SetupPipeline(vssel, gssel, config.alpha_second_pass.ps);
psel.ps = config.alpha_second_pass.ps;
SetupPipeline(psel);
OMSetColorMaskState(config.alpha_second_pass.colormask);
SetupOM(config.alpha_second_pass.depth);
@ -2274,7 +2071,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
// Don't spam noisy information on the terminal
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION)
{
fprintf(stderr, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
Console.Error("T:%s\tID:%d\tS:%s\t=> %s", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
}
#else
// Print nouveau shader compiler info

View File

@ -17,10 +17,11 @@
#include "common/GL/Context.h"
#include "common/GL/StreamBuffer.h"
#include "common/GL/Program.h"
#include "common/HashCombine.h"
#include "GS/Renderers/Common/GSDevice.h"
#include "GSTextureOGL.h"
#include "GSUniformBufferOGL.h"
#include "GSShaderOGL.h"
#include "GLState.h"
#include "GS/GS.h"
@ -188,6 +189,26 @@ public:
MiscConstantBuffer() { memset(this, 0, sizeof(*this)); }
};
struct ProgramSelector
{
VSSelector vs;
GSSelector gs;
PSSelector ps;
__fi bool operator==(const ProgramSelector& p) const { return vs.key == p.vs.key && gs.key == p.gs.key && ps.key == p.ps.key; }
__fi bool operator!=(const ProgramSelector& p) const { return vs.key != p.vs.key || gs.key != p.gs.key || ps.key != p.ps.key; }
};
struct ProgramSelectorHash
{
__fi std::size_t operator()(const ProgramSelector& p) const noexcept
{
std::size_t h = 0;
HashCombine(h, p.vs.key, p.gs.key, p.ps.key);
return h;
}
};
static int m_shader_inst;
static int m_shader_reg;
@ -221,20 +242,20 @@ private:
struct
{
GLuint ps[2]; // program object
GL::Program ps[2]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
} m_merge_obj;
struct
{
GLuint ps[4]; // program object
GL::Program ps[4]; // program object
GSUniformBufferOGL* cb; // uniform buffer object
} m_interlace;
struct
{
GLuint vs; // program object
GLuint ps[(int)ShaderConvert::Count]; // program object
std::string vs;
GL::Program ps[static_cast<int>(ShaderConvert::Count)]; // program object
GLuint ln; // sampler object
GLuint pt; // sampler object
GSDepthStencilOGL* dss;
@ -244,15 +265,17 @@ private:
struct
{
GLuint ps;
GL::Program ps;
GSUniformBufferOGL* cb;
} m_fxaa;
#ifndef PCSX2_CORE
struct
{
GLuint ps;
GL::Program ps;
GSUniformBufferOGL* cb;
} m_shaderfx;
#endif
struct
{
@ -262,7 +285,7 @@ private:
struct
{
GLuint ps;
GL::Program ps;
} m_shadeboost;
struct
@ -273,12 +296,9 @@ private:
GLuint timer() { return timer_query[last_query]; }
} m_profiler;
GLuint m_vs[1 << 1];
GLuint m_gs[1 << 3];
GLuint m_ps_ss[1 << 7];
GSDepthStencilOGL* m_om_dss[1 << 5];
std::unordered_map<u64, GLuint> m_ps;
GLuint m_apitrace;
std::unordered_map<ProgramSelector, GL::Program, ProgramSelectorHash> m_programs;
GLuint m_palette_ss;
@ -307,8 +327,6 @@ private:
void DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds);
public:
GSShaderOGL* m_shader;
GSDeviceOGL();
virtual ~GSDeviceOGL();
@ -342,9 +360,9 @@ public:
void BlitRect(GSTexture* sTex, const GSVector4i& r, const GSVector2i& dsize, bool at_origin, bool linear);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) final;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear = true);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool linear = true);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, OMColorMaskSelector cms, bool linear = true);
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, int bs, OMColorMaskSelector cms, bool linear = true);
void RenderHW(GSHWDrawConfig& config) final;
void SendHWDraw(const GSHWDrawConfig& config);
@ -368,17 +386,15 @@ public:
bool HasDepthSparse() final { return GLLoader::found_compatible_sparse_depth; }
bool CreateTextureFX();
GLuint CompileVS(VSSelector sel);
GLuint CompileGS(GSSelector sel);
GLuint CompilePS(PSSelector sel);
std::string GetShaderSource(const std::string_view& entry, GLenum type, const std::string_view& common_header, const std::string_view& glsl_h_code, const std::string_view& macro_sel);
std::string GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro);
std::string GetVSSource(VSSelector sel);
std::string GetGSSource(GSSelector sel);
std::string GetPSSource(PSSelector sel);
GLuint CreateSampler(PSSamplerSelector sel);
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
void SelfShaderTestPrint(const std::string& test, int& nb_shader);
void SelfShaderTestRun(const std::string& dir, const std::string& file, const PSSelector& sel, int& nb_shader);
void SelfShaderTest();
void SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, const PSSelector& psel);
void SetupPipeline(const ProgramSelector& psel);
void SetupSampler(PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel);
GLuint GetSamplerID(PSSamplerSelector ssel);

View File

@ -1,404 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "GSShaderOGL.h"
#include "GLState.h"
#include "GS/GS.h"
GSShaderOGL::GSShaderOGL(bool debug)
: m_pipeline(0)
, m_debug_shader(debug)
{
// Create a default pipeline
m_pipeline = LinkPipeline("HW pipe", 0, 0, 0);
BindPipeline(m_pipeline);
}
GSShaderOGL::~GSShaderOGL()
{
printf("Delete %zu Shaders, %zu Programs, %zu Pipelines\n",
m_shad_to_delete.size(), m_prog_to_delete.size(), m_pipe_to_delete.size());
for (auto s : m_shad_to_delete)
glDeleteShader(s);
for (auto p : m_prog_to_delete)
glDeleteProgram(p);
glDeleteProgramPipelines(m_pipe_to_delete.size(), &m_pipe_to_delete[0]);
}
GLuint GSShaderOGL::LinkPipeline(const std::string& pretty_print, GLuint vs, GLuint gs, GLuint ps)
{
GLuint p;
glCreateProgramPipelines(1, &p);
glUseProgramStages(p, GL_VERTEX_SHADER_BIT, vs);
glUseProgramStages(p, GL_GEOMETRY_SHADER_BIT, gs);
glUseProgramStages(p, GL_FRAGMENT_SHADER_BIT, ps);
glObjectLabel(GL_PROGRAM_PIPELINE, p, pretty_print.size(), pretty_print.c_str());
m_pipe_to_delete.push_back(p);
return p;
}
GLuint GSShaderOGL::LinkProgram(GLuint vs, GLuint gs, GLuint ps)
{
u32 hash = ((vs ^ gs) << 24) ^ ps;
auto it = m_program.find(hash);
if (it != m_program.end())
return it->second;
GLuint p = glCreateProgram();
if (vs) glAttachShader(p, vs);
if (ps) glAttachShader(p, ps);
if (gs) glAttachShader(p, gs);
glLinkProgram(p);
ValidateProgram(p);
m_prog_to_delete.push_back(p);
m_program[hash] = p;
return p;
}
void GSShaderOGL::BindProgram(GLuint vs, GLuint gs, GLuint ps)
{
GLuint p = LinkProgram(vs, gs, ps);
if (GLState::program != p)
{
GLState::program = p;
glUseProgram(p);
}
}
void GSShaderOGL::BindProgram(GLuint p)
{
if (GLState::program != p)
{
GLState::program = p;
glUseProgram(p);
}
}
void GSShaderOGL::BindPipeline(GLuint vs, GLuint gs, GLuint ps)
{
BindPipeline(m_pipeline);
if (GLState::vs != vs)
{
GLState::vs = vs;
glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, vs);
}
if (GLState::gs != gs)
{
GLState::gs = gs;
glUseProgramStages(m_pipeline, GL_GEOMETRY_SHADER_BIT, gs);
}
#ifdef _DEBUG
// In debug always sets the program. It allow to replace the program in apitrace easily.
if (true)
#else
if (GLState::ps != ps)
#endif
{
GLState::ps = ps;
glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, ps);
}
}
void GSShaderOGL::BindPipeline(GLuint pipe)
{
if (GLState::pipeline != pipe)
{
GLState::pipeline = pipe;
glBindProgramPipeline(pipe);
}
if (GLState::program)
{
GLState::program = 0;
glUseProgram(0);
}
}
bool GSShaderOGL::ValidateShader(GLuint s)
{
if (!m_debug_shader)
return true;
GLint status = 0;
glGetShaderiv(s, GL_COMPILE_STATUS, &status);
if (status)
return true;
GLint log_length = 0;
glGetShaderiv(s, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0)
{
char* log = new char[log_length];
glGetShaderInfoLog(s, log_length, NULL, log);
fprintf(stderr, "%s", log);
delete[] log;
}
fprintf(stderr, "\n");
return false;
}
bool GSShaderOGL::ValidateProgram(GLuint p)
{
if (!m_debug_shader)
return true;
GLint status = 0;
glGetProgramiv(p, GL_LINK_STATUS, &status);
if (status)
return true;
GLint log_length = 0;
glGetProgramiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0)
{
char* log = new char[log_length];
glGetProgramInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log);
delete[] log;
}
fprintf(stderr, "\n");
return false;
}
bool GSShaderOGL::ValidatePipeline(GLuint p)
{
if (!m_debug_shader)
return true;
// FIXME: might be mandatory to validate the pipeline
glValidateProgramPipeline(p);
GLint status = 0;
glGetProgramPipelineiv(p, GL_VALIDATE_STATUS, &status);
if (status)
return true;
GLint log_length = 0;
glGetProgramPipelineiv(p, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0)
{
char* log = new char[log_length];
glGetProgramPipelineInfoLog(p, log_length, NULL, log);
fprintf(stderr, "%s", log);
delete[] log;
}
fprintf(stderr, "\n");
return false;
}
std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro)
{
std::string header;
header = "#version 330 core\n";
// Need GL version 420
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (GLLoader::found_GL_ARB_shader_image_load_store)
{
// Need GL version 420
header += "#extension GL_ARB_shader_image_load_store: require\n";
}
else
{
header += "#define DISABLE_GL42_image\n";
}
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
header += "#define BROKEN_DRIVER as_usual\n";
// Stupid GL implementation (can't use GL_ES)
// AMD/nvidia define it to 0
// intel window don't define it
// intel linux refuse to define it
header += "#define pGL_ES 0\n";
// Allow to puts several shader in 1 files
switch (type)
{
case GL_VERTEX_SHADER:
header += "#define VERTEX_SHADER 1\n";
break;
case GL_GEOMETRY_SHADER:
header += "#define GEOMETRY_SHADER 1\n";
break;
case GL_FRAGMENT_SHADER:
header += "#define FRAGMENT_SHADER 1\n";
break;
default:
ASSERT(0);
}
// Select the entry point ie the main function
header += format("#define %s main\n", entry.c_str());
header += macro;
return header;
}
GLuint GSShaderOGL::Compile(const char* glsl_file, const std::string& entry, GLenum type, const std::string& common_header, const char* glsl_h_code, const std::string& macro_sel /* = "" */)
{
ASSERT(glsl_h_code != NULL);
GLuint program = 0;
// Note it is better to separate header and source file to have the good line number
// in the glsl compiler report
const int shader_nb = 3;
const char* sources[shader_nb];
std::string header = GenGlslHeader(entry, type, macro_sel);
sources[0] = header.c_str();
sources[1] = common_header.c_str();
sources[2] = glsl_h_code;
program = glCreateShaderProgramv(type, shader_nb, sources);
bool status = ValidateProgram(program);
if (!status)
{
// print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file, entry.c_str(), program);
fprintf(stderr, "\n%s", macro_sel.c_str());
fprintf(stderr, "\n");
}
m_prog_to_delete.push_back(program);
return program;
}
// Same as above but for not-separated build
GLuint GSShaderOGL::CompileShader(const char* glsl_file, const std::string& entry, GLenum type, const std::string& common_header, const char* glsl_h_code, const std::string& macro_sel /* = "" */)
{
ASSERT(glsl_h_code != NULL);
GLuint shader = 0;
// Note it is better to separate header and source file to have the good line number
// in the glsl compiler report
const int shader_nb = 3;
const char* sources[shader_nb];
std::string header = GenGlslHeader(entry, type, macro_sel);
sources[0] = header.c_str();
sources[1] = common_header.data();
sources[2] = glsl_h_code;
shader = glCreateShader(type);
glShaderSource(shader, shader_nb, sources, NULL);
glCompileShader(shader);
bool status = ValidateShader(shader);
if (!status)
{
// print extra info
fprintf(stderr, "%s (entry %s, prog %d) :", glsl_file, entry.c_str(), shader);
fprintf(stderr, "\n%s", macro_sel.c_str());
fprintf(stderr, "\n");
}
m_shad_to_delete.push_back(shader);
return shader;
}
// This function will get the binary program. Normally it must be used a caching
// solution but Nvidia also incorporates the ASM dump. Asm is nice because it allow
// to have an overview of the program performance based on the instruction number
// Note: initially I was using cg offline compiler but it doesn't support latest
// GLSL improvement (unfortunately).
int GSShaderOGL::DumpAsm(const std::string& file, GLuint p)
{
if (!GLLoader::vendor_id_nvidia)
return 0;
GLint binaryLength;
glGetProgramiv(p, GL_PROGRAM_BINARY_LENGTH, &binaryLength);
char* binary = new char[binaryLength + 4];
GLenum binaryFormat;
glGetProgramBinary(p, binaryLength, NULL, &binaryFormat, binary);
FILE* outfile = fopen(file.c_str(), "w");
ASSERT(outfile);
// Search the magic number "!!"
int asm_ = 0;
while (asm_ < binaryLength && (binary[asm_] != '!' || binary[asm_ + 1] != '!'))
{
asm_ += 1;
}
int instructions = -1;
if (asm_ < binaryLength)
{
// Now print asm as text
char* asm_txt = strtok(&binary[asm_], "\n");
while (asm_txt != NULL && (strncmp(asm_txt, "END", 3) || !strncmp(asm_txt, "ENDIF", 5)))
{
if (!strncmp(asm_txt, "OUT", 3) || !strncmp(asm_txt, "TEMP", 4) || !strncmp(asm_txt, "LONG", 4))
{
instructions = 0;
}
else if (instructions >= 0)
{
if (instructions == 0)
fprintf(outfile, "\n");
instructions++;
}
fprintf(outfile, "%s\n", asm_txt);
asm_txt = strtok(NULL, "\n");
}
fprintf(outfile, "\nFound %d instructions\n", instructions);
}
fclose(outfile);
if (instructions < 0)
{
// RAW dump in case of error
fprintf(stderr, "Error: failed to find the number of instructions!\n");
outfile = fopen(file.c_str(), "wb");
fwrite(binary, binaryLength, 1, outfile);
fclose(outfile);
ASSERT(0);
}
delete[] binary;
return instructions;
}

View File

@ -1,55 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GS/GSGL.h"
#include <unordered_map>
class GSShaderOGL
{
GLuint m_pipeline;
std::unordered_map<u32, GLuint> m_program;
const bool m_debug_shader;
std::vector<GLuint> m_shad_to_delete;
std::vector<GLuint> m_prog_to_delete;
std::vector<GLuint> m_pipe_to_delete;
bool ValidateShader(GLuint s);
bool ValidateProgram(GLuint p);
bool ValidatePipeline(GLuint p);
std::string GenGlslHeader(const std::string& entry, GLenum type, const std::string& macro);
public:
GSShaderOGL(bool debug);
~GSShaderOGL();
void BindPipeline(GLuint vs, GLuint gs, GLuint ps);
void BindPipeline(GLuint pipe);
GLuint Compile(const char* glsl_file, const std::string& entry, GLenum type, const std::string& common_header, const char* glsl_h_code, const std::string& macro_sel = "");
GLuint LinkPipeline(const std::string& pretty_print, GLuint vs, GLuint gs, GLuint ps);
// Same as above but for not separated build
void BindProgram(GLuint vs, GLuint gs, GLuint ps);
void BindProgram(GLuint p);
GLuint CompileShader(const char* glsl_file, const std::string& entry, GLenum type, const std::string& common_header, const char* glsl_h_code, const std::string& macro_sel = "");
GLuint LinkProgram(GLuint vs, GLuint gs, GLuint ps);
int DumpAsm(const std::string& file, GLuint p);
};

View File

@ -493,7 +493,6 @@
<ClCompile Include="GS\Window\GSSetting.cpp" />
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.cpp" />
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.cpp" />
<ClCompile Include="GS\Renderers\OpenGL\GSShaderOGL.cpp" />
<ClCompile Include="GS\GSState.cpp" />
<ClCompile Include="GS\GSTables.cpp" />
<ClCompile Include="GS\Renderers\Common\GSTexture.cpp" />
@ -854,7 +853,6 @@
<ClInclude Include="GS\Window\GSSetting.h" />
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.h" />
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.h" />
<ClInclude Include="GS\Renderers\OpenGL\GSShaderOGL.h" />
<ClInclude Include="GS\GSState.h" />
<ClInclude Include="GS\GSTables.h" />
<ClInclude Include="GS\Renderers\Common\GSTexture.h" />

View File

@ -1490,9 +1490,6 @@
<ClCompile Include="GS\Renderers\OpenGL\GSDeviceOGL.cpp">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\OpenGL\GSShaderOGL.cpp">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\OpenGL\GSTextureOGL.cpp">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClCompile>
@ -2581,9 +2578,6 @@
<ClInclude Include="GS\Renderers\OpenGL\GSDeviceOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GSShaderOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GSTextureOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>