diff --git a/linux_various/glsl2h.pl b/linux_various/glsl2h.pl index 3af99197f2..cd6dd009f0 100755 --- a/linux_various/glsl2h.pl +++ b/linux_various/glsl2h.pl @@ -42,7 +42,7 @@ my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h"); # Just a hack to reuse glsl2h function easily $gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl"); -my @tfx_res = qw/tfx_fs.glsl tfx_fs_subroutine.glsl/; +my @tfx_res = qw/tfx_fs.glsl/; my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl"); my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/; diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index c8bffcfb6f..e16d2a206c 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -87,7 +87,6 @@ PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NU PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL; PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL; // GL4.0 -PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL; // GL4.1 PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL; PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL; @@ -339,13 +338,12 @@ namespace GLLoader { bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows! // Note: except Apple, all drivers support explicit uniform location - bool found_GL_ARB_explicit_uniform_location = false; // need by subroutine and bindless texture + bool found_GL_ARB_explicit_uniform_location = false; // need by bindless texture // GL4 hardware bool found_GL_ARB_buffer_storage = false; bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it bool found_GL_ARB_gpu_shader5 = false; bool found_GL_ARB_shader_image_load_store = false; // GLES3.1 - bool found_GL_ARB_shader_subroutine = false; bool found_GL_ARB_bindless_texture = false; // GL5 GPU? bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know @@ -450,19 +448,6 @@ namespace GLLoader { if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true; else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n"); } -#if 0 - // Erratum: on nvidia implementation, gain is very nice : 42.5 fps => 46.5 fps - // - // Strangely it doesn't provide the speed boost as expected. - // Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on - // colin mcrae 3 by 2100 uniform, but code is slower! - // - // Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls - // are done on the overloaded main threads. - // Apitrace profiling shows faster GPU draw times - - if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true; -#endif // GL4.2 if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true; if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true; @@ -495,7 +480,6 @@ namespace GLLoader { status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend"); // GL4.1 status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects"); - status &= status_and_override(found_GL_ARB_shader_subroutine, "GL_ARB_shader_subroutine"); // GL4.2 status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store"); status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true); diff --git a/plugins/GSdx/GLLoader.h b/plugins/GSdx/GLLoader.h index ea3d5a43df..0f0cf4f410 100644 --- a/plugins/GSdx/GLLoader.h +++ b/plugins/GSdx/GLLoader.h @@ -270,7 +270,6 @@ extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange; extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate; extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate; // GL4.0 -extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv; // GL4.1 extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline; extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines; @@ -361,7 +360,6 @@ namespace GLLoader { extern bool found_GL_ARB_shader_image_load_store; extern bool found_GL_ARB_clear_texture; extern bool found_GL_ARB_buffer_storage; - extern bool found_GL_ARB_shader_subroutine; extern bool found_GL_ARB_bindless_texture; extern bool found_GL_ARB_explicit_uniform_location; extern bool found_GL_ARB_clip_control; diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index 8925ce6845..25028d1ce9 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -58,8 +58,6 @@ namespace GLState { GLuint vs; GLuint program; bool dirty_prog; - bool dirty_subroutine_vs; - bool dirty_subroutine_ps; #if 0 struct { GSVertexBufferStateOGL* vb; @@ -105,8 +103,6 @@ namespace GLState { vs = 0; program = 0; dirty_prog = false; - dirty_subroutine_vs = false; - dirty_subroutine_ps = false; dirty_ressources = false; } } diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index 0ce22acc1c..43c7836a2e 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -58,8 +58,6 @@ namespace GLState { extern GLuint vs; extern GLuint program; // monolith program (when sso isn't supported) extern bool dirty_prog; - extern bool dirty_subroutine_vs; - extern bool dirty_subroutine_ps; extern bool dirty_ressources; extern void Clear(); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 9276b5a972..cd6fd12994 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -229,7 +229,6 @@ class GSDeviceOGL : public GSDevice { uint32 wildhack:1; uint32 bppz:2; - // Next param will be handle by subroutine uint32 tme:1; uint32 fst:1; @@ -338,7 +337,6 @@ class GSDeviceOGL : public GSDevice uint32 tcoffsethack:1; //uint32 point_sampler:1; Not tested, so keep the bit for blend uint32 iip:1; - // Next param will be handle by subroutine (broken currently) uint32 colclip:2; uint32 atst:3; diff --git a/plugins/GSdx/GSShaderOGL.cpp b/plugins/GSdx/GSShaderOGL.cpp index 19759c05f8..dd7abc6f9e 100644 --- a/plugins/GSdx/GSShaderOGL.cpp +++ b/plugins/GSdx/GSShaderOGL.cpp @@ -24,14 +24,8 @@ #include "GLState.h" GSShaderOGL::GSShaderOGL(bool debug) : - m_debug_shader(debug), - m_vs_sub_count(0), - m_ps_sub_count(0) + m_debug_shader(debug) { - - memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0])); - memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0])); - m_single_prog.clear(); if (GLLoader::found_GL_ARB_separate_shader_objects) { gl_GenProgramPipelines(1, &m_pipeline); @@ -48,41 +42,17 @@ GSShaderOGL::~GSShaderOGL() m_single_prog.clear(); } -void GSShaderOGL::VS(GLuint s, GLuint sub_count) +void GSShaderOGL::VS(GLuint s) { if (GLState::vs != s) { - m_vs_sub_count = sub_count; - GLState::vs = s; GLState::dirty_prog = true; - GLState::dirty_subroutine_vs = true; if (GLLoader::found_GL_ARB_separate_shader_objects) gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s); } } -void GSShaderOGL::VS_subroutine(GLuint *sub) -{ - if (!(m_vs_sub[0] == sub[0])) { - m_vs_sub[0] = sub[0]; - GLState::dirty_subroutine_vs = true; - } -} - -void GSShaderOGL::PS_subroutine(GLuint *sub) -{ - // FIXME could be more efficient with GSvector - if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) { - m_ps_sub[0] = sub[0]; - m_ps_sub[1] = sub[1]; - m_ps_sub[2] = sub[2]; - m_ps_sub[3] = sub[3]; - m_ps_sub[4] = sub[4]; - GLState::dirty_subroutine_ps = true; - } -} - void GSShaderOGL::PS_ressources(GLuint64 handle[2]) { if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) { @@ -92,7 +62,7 @@ void GSShaderOGL::PS_ressources(GLuint64 handle[2]) } } -void GSShaderOGL::PS(GLuint s, GLuint sub_count) +void GSShaderOGL::PS(GLuint s) { #ifdef _DEBUG if (true) @@ -100,12 +70,9 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count) if (GLState::ps != s) #endif { - m_ps_sub_count = sub_count; - // In debug always sets the program. It allow to replace the program in apitrace easily. GLState::ps = s; GLState::dirty_prog = true; - GLState::dirty_subroutine_ps = true; GLState::dirty_ressources = true; if (GLLoader::found_GL_ARB_separate_shader_objects) { gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s); @@ -142,21 +109,6 @@ void GSShaderOGL::SetupRessources() } } -void GSShaderOGL::SetupSubroutineUniform() -{ - if (!GLLoader::found_GL_ARB_shader_subroutine) return; - - if (GLState::dirty_subroutine_vs && m_vs_sub_count) { - gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub); - GLState::dirty_subroutine_vs = false; - } - - if (GLState::dirty_subroutine_ps && m_ps_sub_count) { - gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub); - GLState::dirty_subroutine_ps = false; - } -} - bool GSShaderOGL::ValidateShader(GLuint s) { if (!m_debug_shader) return true; @@ -243,8 +195,6 @@ void GSShaderOGL::UseProgram() if (GLState::dirty_prog) { if (!GLLoader::found_GL_ARB_separate_shader_objects) { - GLState::dirty_subroutine_vs = true; - GLState::dirty_subroutine_ps = true; GLState::dirty_ressources = true; hash_map::iterator it; @@ -277,8 +227,6 @@ void GSShaderOGL::UseProgram() SetupRessources(); - SetupSubroutineUniform(); - GLState::dirty_prog = false; GL_POP(); @@ -294,11 +242,6 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co // Need GL version 410 header += "#extension GL_ARB_separate_shader_objects: require\n"; } - if (GLLoader::found_GL_ARB_shader_subroutine && GLLoader::found_GL_ARB_explicit_uniform_location) { - // Need GL version 400 - header += "#define SUBROUTINE_GL40 1\n"; - header += "#extension GL_ARB_shader_subroutine: require\n"; - } if (GLLoader::found_GL_ARB_explicit_uniform_location) { // Need GL version 430 header += "#extension GL_ARB_explicit_uniform_location: require\n"; diff --git a/plugins/GSdx/GSShaderOGL.h b/plugins/GSdx/GSShaderOGL.h index 92e781dabc..53b98e6b41 100644 --- a/plugins/GSdx/GSShaderOGL.h +++ b/plugins/GSdx/GSShaderOGL.h @@ -25,13 +25,7 @@ class GSShaderOGL { GLuint m_pipeline; hash_map m_single_prog; const bool m_debug_shader; - GLuint m_vs_sub_count; - GLuint m_ps_sub_count; - GLuint m_vs_sub[1]; - GLuint m_ps_sub[5]; - - void SetupSubroutineUniform(); void SetupRessources(); bool ValidateShader(GLuint p); @@ -46,11 +40,9 @@ class GSShaderOGL { ~GSShaderOGL(); void GS(GLuint s); - void PS(GLuint s, GLuint sub_count = 0); - void PS_subroutine(GLuint *sub); + void PS(GLuint s); void PS_ressources(GLuint64 handle[2]); - void VS(GLuint s, GLuint sub_count = 0); - void VS_subroutine(GLuint *sub); + void VS(GLuint s); void UseProgram(); diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 03942e397d..052fe57aee 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -143,16 +143,7 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* void GSDeviceOGL::SetupVS(VSSelector sel) { - if (GLLoader::found_GL_ARB_shader_subroutine) { - GLuint sub[1]; - sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0; - m_shader->VS_subroutine(sub); - // Handle by subroutine useless now - sel.tme = 0; - sel.fst = 0; - } - - m_shader->VS(m_vs[sel], 1); + m_shader->VS(m_vs[sel]); } void GSDeviceOGL::SetupGS(GSSelector sel) @@ -162,38 +153,6 @@ void GSDeviceOGL::SetupGS(GSSelector sel) void GSDeviceOGL::SetupPS(PSSelector sel) { - if (GLLoader::found_GL_ARB_shader_subroutine) { - GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4; - - GLuint colclip = 8 + (uint32)sel.colclip; - - GLuint clamp = - (sel.wms == 2 && sel.wmt == 2) ? 20 : - (sel.wms == 2) ? 21 : - (sel.wmt == 2) ? 22 : 23; - - GLuint wrap = - (sel.wms == 2 && sel.wmt == 2) ? 24 : - (sel.wms == 3 && sel.wmt == 3) ? 25 : - (sel.wms == 2 && sel.wmt == 3) ? 26 : - (sel.wms == 3 && sel.wmt == 2) ? 27 : - (sel.wms == 2) ? 28 : - (sel.wmt == 3) ? 29 : - (sel.wms == 3) ? 30 : - (sel.wmt == 2) ? 31 : 32; - - GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap}; - - m_shader->PS_subroutine(sub); - // Handle by subroutine useless now - sel.atst = 0; - sel.colclip = 0; - sel.tfx = 0; - sel.tcc = 0; - // sel.wms = 0; - // sel.wmt = 0; - } - // ************************************************************* // Static // ************************************************************* @@ -210,7 +169,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel) // ************************************************************* // Dynamic // ************************************************************* - m_shader->PS(ps, 3); + m_shader->PS(ps); } void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel) diff --git a/plugins/GSdx/GSWnd.cpp b/plugins/GSdx/GSWnd.cpp index abcf4ed42f..14a77b6ca1 100644 --- a/plugins/GSdx/GSWnd.cpp +++ b/plugins/GSdx/GSWnd.cpp @@ -80,7 +80,6 @@ void GSWndGL::PopulateGlFunction() *(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync"); *(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange"); // GL4.0 - *(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv", true); *(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true); *(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true); // GL4.1 diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 8682f96a88..7f7cf189f6 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -80,19 +80,6 @@ layout(std140, binding = 21) uniform cb21 vec2 TC_OffsetHack; }; -#ifdef SUBROUTINE_GL40 -// Function pointer type + the functionn pointer variable -subroutine void AlphaTestType(vec4 c); -layout(location = 0) subroutine uniform AlphaTestType atst; - -subroutine vec4 TfxType(vec4 t, vec4 c); -layout(location = 2) subroutine uniform TfxType tfx; - -subroutine void ColClipType(inout vec4 c); -layout(location = 1) subroutine uniform ColClipType colclip; -#endif - - vec4 sample_c(vec2 uv) { // FIXME: check the issue on openGL @@ -291,7 +278,6 @@ vec4 sample_color(vec2 st, float q) } // FIXME Precompute the factor 255/128 in VS -#ifndef SUBROUTINE_GL40 vec4 tfx(vec4 t, vec4 c) { vec4 c_out = c; @@ -319,9 +305,7 @@ vec4 tfx(vec4 t, vec4 c) return c_out; } -#endif -#ifndef SUBROUTINE_GL40 void atst(vec4 c) { float a = trunc(c.a * 255.0 + 0.01); @@ -350,9 +334,7 @@ void atst(vec4 c) discard; #endif } -#endif -#ifndef SUBROUTINE_GL40 void colclip(inout vec4 c) { #if (PS_COLCLIP == 2) @@ -363,7 +345,6 @@ void colclip(inout vec4 c) c.rgb *= vec3(factor); #endif } -#endif void fog(inout vec4 c, float f) { diff --git a/plugins/GSdx/res/glsl/tfx_fs_subroutine.glsl b/plugins/GSdx/res/glsl/tfx_fs_subroutine.glsl deleted file mode 100644 index ce3d4bac58..0000000000 --- a/plugins/GSdx/res/glsl/tfx_fs_subroutine.glsl +++ /dev/null @@ -1,285 +0,0 @@ -//#version 420 // Keep it for text editor detection - -// Subroutine of standard fs function (I don't know if it will be ever used one day) - -// FIXME crash nvidia -#if 0 -// Function pointer type -subroutine vec4 WrapType(vec4 uv); - -// a function pointer variable -layout(location = 4) subroutine uniform WrapType wrapuv; - -layout(index = 24) subroutine(WrapType) -vec4 wrapuv_wms_wmt_2(vec4 uv) -{ - vec4 uv_out = uv; - uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); - return uv_out; -} - -layout(index = 25) subroutine(WrapType) -vec4 wrapuv_wms_wmt3(vec4 uv) -{ - vec4 uv_out = uv; - uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; - return uv_out; -} - -layout(index = 26) subroutine(WrapType) -vec4 wrapuv_wms2_wmt3(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; - return uv_out; -} - -layout(index = 27) subroutine(WrapType) -vec4 wrapuv_wms3_wmt2(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - return uv_out; -} - -layout(index = 28) subroutine(WrapType) -vec4 wrapuv_wms2_wmtx(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - return uv_out; -} - -layout(index = 29) subroutine(WrapType) -vec4 wrapuv_wmsx_wmt3(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; - return uv_out; -} - -layout(index = 30) subroutine(WrapType) -vec4 wrapuv_wms3_wmtx(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; - return uv_out; -} - -layout(index = 31) subroutine(WrapType) -vec4 wrapuv_wmsx_wmt2(vec4 uv) -{ - vec4 uv_out = uv; - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - return uv_out; -} - -layout(index = 32) subroutine(WrapType) -vec4 wrapuv_dummy(vec4 uv) -{ - return uv; -} -#endif - -// FIXME crash nvidia -#if 0 -// Function pointer type -subroutine vec2 ClampType(vec2 uv); - -// a function pointer variable -layout(location = 3) subroutine uniform ClampType clampuv; - -layout(index = 20) subroutine(ClampType) -vec2 clampuv_wms2_wmt2(vec2 uv) -{ - return clamp(uv, MinF, MinMax.zw); -} - -layout(index = 21) subroutine(ClampType) -vec2 clampuv_wms2(vec2 uv) -{ - vec2 uv_out = uv; - uv_out.x = clamp(uv.x, MinF.x, MinMax.z); - return uv_out; -} - -layout(index = 22) subroutine(ClampType) -vec2 clampuv_wmt2(vec2 uv) -{ - vec2 uv_out = uv; - uv_out.y = clamp(uv.y, MinF.y, MinMax.w); - return uv_out; -} - -layout(index = 23) subroutine(ClampType) -vec2 clampuv_dummy(vec2 uv) -{ - return uv; -} -#endif - -#ifdef SUBROUTINE_GL40 -layout(index = 11) subroutine(TfxType) -vec4 tfx_0_tcc_0(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f; - return c_out; -} - -layout(index = 12) subroutine(TfxType) -vec4 tfx_1_tcc_0(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = t.rgb; - return c_out; -} - -layout(index = 13) subroutine(TfxType) -vec4 tfx_2_tcc_0(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; - return c_out; -} - -layout(index = 14) subroutine(TfxType) -vec4 tfx_3_tcc_0(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; - return c_out; -} - -layout(index = 15) subroutine(TfxType) -vec4 tfx_0_tcc_1(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out = c * t * 255.0f / 128.0f; - return c_out; -} - -layout(index = 16) subroutine(TfxType) -vec4 tfx_1_tcc_1(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out = t; - return c_out; -} - -layout(index = 17) subroutine(TfxType) -vec4 tfx_2_tcc_1(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; - c_out.a += t.a; - return c_out; -} - -layout(index = 18) subroutine(TfxType) -vec4 tfx_3_tcc_1(vec4 t, vec4 c) -{ - vec4 c_out = c; - c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; - c_out.a = t.a; - return c_out; -} - -layout(index = 19) subroutine(TfxType) -vec4 tfx_dummy(vec4 t, vec4 c) -{ - return c; -} -#endif - -#ifdef SUBROUTINE_GL40 -layout(index = 0) subroutine(AlphaTestType) -void atest_never(vec4 c) -{ - discard; -} - -layout(index = 1) subroutine(AlphaTestType) -void atest_always(vec4 c) -{ - // Nothing to do -} - -layout(index = 2) subroutine(AlphaTestType) -void atest_l(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if (PS_SPRITEHACK == 0) - if ((AREF - a - 0.5f) < 0.0f) - discard; -} - -layout(index = 3) subroutine(AlphaTestType) -void atest_le(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if ((AREF - a + 0.5f) < 0.0f) - discard; -} - -layout(index = 4) subroutine(AlphaTestType) -void atest_e(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if ((0.5f - abs(a - AREF)) < 0.0f) - discard; -} - -layout(index = 5) subroutine(AlphaTestType) -void atest_ge(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if ((a-AREF + 0.5f) < 0.0f) - discard; -} - -layout(index = 6) subroutine(AlphaTestType) -void atest_g(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if ((a-AREF - 0.5f) < 0.0f) - discard; -} - -layout(index = 7) subroutine(AlphaTestType) -void atest_ne(vec4 c) -{ - float a = trunc(c.a * 255.0 + 0.01); - if ((abs(a - AREF) - 0.5f) < 0.0f) - discard; -} -#endif - -#ifdef SUBROUTINE_GL40 -layout(index = 8) subroutine(ColClipType) -void colclip_0(inout vec4 c) -{ - // nothing to do -} - -layout(index = 9) subroutine(ColClipType) -void colclip_1(inout vec4 c) -{ - // FIXME !!!! - //c.rgb *= c.rgb < 128./255; - bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); - c.rgb *= vec3(factor); -} - -layout(index = 10) subroutine(ColClipType) -void colclip_2(inout vec4 c) -{ - c.rgb = 256.0f/255.0f - c.rgb; - // FIXME !!!! - //c.rgb *= c.rgb < 128./255; - bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); - c.rgb *= vec3(factor); -} -#endif diff --git a/plugins/GSdx/res/glsl/tfx_vgs.glsl b/plugins/GSdx/res/glsl/tfx_vgs.glsl index 1273f5d6cf..8bbb183d46 100644 --- a/plugins/GSdx/res/glsl/tfx_vgs.glsl +++ b/plugins/GSdx/res/glsl/tfx_vgs.glsl @@ -42,36 +42,6 @@ const float exp_min32 = exp2(-32.0f); const float exp_min31 = exp2(-31.0f); #endif -#ifdef SUBROUTINE_GL40 -// Function pointer type -subroutine void TextureCoordType(void); - -// a function pointer variable -layout(location = 0) subroutine uniform TextureCoordType texture_coord; - -layout(index = 0) subroutine(TextureCoordType) -void tme_0() -{ - VSout_t.xy = vec2(0.0f, 0.0f); - VSout_t.w = 1.0f; -} - -layout(index = 1) subroutine(TextureCoordType) -void tme_1_fst_0() -{ - VSout_t.xy = i_st; - VSout_t.w = i_q; -} - -layout(index = 2) subroutine(TextureCoordType) -void tme_1_fst_1() -{ - VSout_t.xy = vec2(i_uv) * TextureScale; - VSout_t.w = 1.0f; -} - -#else - void texture_coord() { if(VS_TME != 0) @@ -98,8 +68,6 @@ void texture_coord() } } -#endif - void vs_main() { highp uint z; diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 2f9331ebc6..3132f3b9c1 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -650,36 +650,6 @@ static const char* tfx_vgs_glsl = "const float exp_min31 = exp2(-31.0f);\n" "#endif\n" "\n" - "#ifdef SUBROUTINE_GL40\n" - "// Function pointer type\n" - "subroutine void TextureCoordType(void);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n" - "\n" - "layout(index = 0) subroutine(TextureCoordType)\n" - "void tme_0()\n" - "{\n" - " VSout_t.xy = vec2(0.0f, 0.0f);\n" - " VSout_t.w = 1.0f;\n" - "}\n" - "\n" - "layout(index = 1) subroutine(TextureCoordType)\n" - "void tme_1_fst_0()\n" - "{\n" - " VSout_t.xy = i_st;\n" - " VSout_t.w = i_q;\n" - "}\n" - "\n" - "layout(index = 2) subroutine(TextureCoordType)\n" - "void tme_1_fst_1()\n" - "{\n" - " VSout_t.xy = vec2(i_uv) * TextureScale;\n" - " VSout_t.w = 1.0f;\n" - "}\n" - "\n" - "#else\n" - "\n" "void texture_coord()\n" "{\n" " if(VS_TME != 0)\n" @@ -706,8 +676,6 @@ static const char* tfx_vgs_glsl = " }\n" "}\n" "\n" - "#endif\n" - "\n" "void vs_main()\n" "{\n" " highp uint z;\n" @@ -973,19 +941,6 @@ static const char* tfx_fs_all_glsl = " vec2 TC_OffsetHack;\n" "};\n" "\n" - "#ifdef SUBROUTINE_GL40\n" - "// Function pointer type + the functionn pointer variable\n" - "subroutine void AlphaTestType(vec4 c);\n" - "layout(location = 0) subroutine uniform AlphaTestType atst;\n" - "\n" - "subroutine vec4 TfxType(vec4 t, vec4 c);\n" - "layout(location = 2) subroutine uniform TfxType tfx;\n" - "\n" - "subroutine void ColClipType(inout vec4 c);\n" - "layout(location = 1) subroutine uniform ColClipType colclip;\n" - "#endif\n" - "\n" - "\n" "vec4 sample_c(vec2 uv)\n" "{\n" " // FIXME: check the issue on openGL\n" @@ -1184,7 +1139,6 @@ static const char* tfx_fs_all_glsl = "}\n" "\n" "// FIXME Precompute the factor 255/128 in VS\n" - "#ifndef SUBROUTINE_GL40\n" "vec4 tfx(vec4 t, vec4 c)\n" "{\n" " vec4 c_out = c;\n" @@ -1212,9 +1166,7 @@ static const char* tfx_fs_all_glsl = "\n" " return c_out;\n" "}\n" - "#endif\n" "\n" - "#ifndef SUBROUTINE_GL40\n" "void atst(vec4 c)\n" "{\n" " float a = trunc(c.a * 255.0 + 0.01);\n" @@ -1243,9 +1195,7 @@ static const char* tfx_fs_all_glsl = " discard;\n" "#endif\n" "}\n" - "#endif\n" "\n" - "#ifndef SUBROUTINE_GL40\n" "void colclip(inout vec4 c)\n" "{\n" "#if (PS_COLCLIP == 2)\n" @@ -1256,7 +1206,6 @@ static const char* tfx_fs_all_glsl = " c.rgb *= vec3(factor);\n" "#endif\n" "}\n" - "#endif\n" "\n" "void fog(inout vec4 c, float f)\n" "{\n" @@ -1516,291 +1465,6 @@ static const char* tfx_fs_all_glsl = "}\n" "\n" "#endif\n" - "//#version 420 // Keep it for text editor detection\n" - "\n" - "// Subroutine of standard fs function (I don't know if it will be ever used one day)\n" - "\n" - "// FIXME crash nvidia\n" - "#if 0\n" - "// Function pointer type\n" - "subroutine vec4 WrapType(vec4 uv);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 4) subroutine uniform WrapType wrapuv;\n" - "\n" - "layout(index = 24) subroutine(WrapType)\n" - "vec4 wrapuv_wms_wmt_2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 25) subroutine(WrapType)\n" - "vec4 wrapuv_wms_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 26) subroutine(WrapType)\n" - "vec4 wrapuv_wms2_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 27) subroutine(WrapType)\n" - "vec4 wrapuv_wms3_wmt2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 28) subroutine(WrapType)\n" - "vec4 wrapuv_wms2_wmtx(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 29) subroutine(WrapType)\n" - "vec4 wrapuv_wmsx_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 30) subroutine(WrapType)\n" - "vec4 wrapuv_wms3_wmtx(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 31) subroutine(WrapType)\n" - "vec4 wrapuv_wmsx_wmt2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 32) subroutine(WrapType)\n" - "vec4 wrapuv_dummy(vec4 uv)\n" - "{\n" - " return uv;\n" - "}\n" - "#endif\n" - "\n" - "// FIXME crash nvidia\n" - "#if 0\n" - "// Function pointer type\n" - "subroutine vec2 ClampType(vec2 uv);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 3) subroutine uniform ClampType clampuv;\n" - "\n" - "layout(index = 20) subroutine(ClampType)\n" - "vec2 clampuv_wms2_wmt2(vec2 uv)\n" - "{\n" - " return clamp(uv, MinF, MinMax.zw);\n" - "}\n" - "\n" - "layout(index = 21) subroutine(ClampType)\n" - "vec2 clampuv_wms2(vec2 uv)\n" - "{\n" - " vec2 uv_out = uv;\n" - " uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 22) subroutine(ClampType)\n" - "vec2 clampuv_wmt2(vec2 uv)\n" - "{\n" - " vec2 uv_out = uv;\n" - " uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 23) subroutine(ClampType)\n" - "vec2 clampuv_dummy(vec2 uv)\n" - "{\n" - " return uv;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef SUBROUTINE_GL40\n" - "layout(index = 11) subroutine(TfxType)\n" - "vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 12) subroutine(TfxType)\n" - "vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = t.rgb;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 13) subroutine(TfxType)\n" - "vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 14) subroutine(TfxType)\n" - "vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 15) subroutine(TfxType)\n" - "vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out = c * t * 255.0f / 128.0f;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 16) subroutine(TfxType)\n" - "vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out = t;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 17) subroutine(TfxType)\n" - "vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " c_out.a += t.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 18) subroutine(TfxType)\n" - "vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " c_out.a = t.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 19) subroutine(TfxType)\n" - "vec4 tfx_dummy(vec4 t, vec4 c)\n" - "{\n" - " return c;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef SUBROUTINE_GL40\n" - "layout(index = 0) subroutine(AlphaTestType)\n" - "void atest_never(vec4 c)\n" - "{\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 1) subroutine(AlphaTestType)\n" - "void atest_always(vec4 c)\n" - "{\n" - " // Nothing to do\n" - "}\n" - "\n" - "layout(index = 2) subroutine(AlphaTestType)\n" - "void atest_l(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if (PS_SPRITEHACK == 0)\n" - " if ((AREF - a - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 3) subroutine(AlphaTestType)\n" - "void atest_le(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((AREF - a + 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 4) subroutine(AlphaTestType)\n" - "void atest_e(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((0.5f - abs(a - AREF)) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 5) subroutine(AlphaTestType)\n" - "void atest_ge(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((a-AREF + 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 6) subroutine(AlphaTestType)\n" - "void atest_g(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((a-AREF - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 7) subroutine(AlphaTestType)\n" - "void atest_ne(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "#endif\n" - "\n" - "#ifdef SUBROUTINE_GL40\n" - "layout(index = 8) subroutine(ColClipType)\n" - "void colclip_0(inout vec4 c)\n" - "{\n" - " // nothing to do\n" - "}\n" - "\n" - "layout(index = 9) subroutine(ColClipType)\n" - "void colclip_1(inout vec4 c)\n" - "{\n" - " // FIXME !!!!\n" - " //c.rgb *= c.rgb < 128./255;\n" - " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" - " c.rgb *= vec3(factor);\n" - "}\n" - "\n" - "layout(index = 10) subroutine(ColClipType)\n" - "void colclip_2(inout vec4 c)\n" - "{\n" - " c.rgb = 256.0f/255.0f - c.rgb;\n" - " // FIXME !!!!\n" - " //c.rgb *= c.rgb < 128./255;\n" - " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" - " c.rgb *= vec3(factor);\n" - "}\n" - "#endif\n" ; static const char* fxaa_fx =