gsdx-ogl: wipeout subroutine code

Code was completey bitrotten
Code was a partial test (and yet 500 lines already)
Shader is more and more complex and multithreading support greatly
reduce the cost of shader switch
This commit is contained in:
Gregory Hainaut 2015-07-17 18:16:35 +02:00
parent e3751f6cd9
commit b4c04ed00a
14 changed files with 9 additions and 814 deletions

View File

@ -42,7 +42,7 @@ my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h");
# Just a hack to reuse glsl2h function easily # Just a hack to reuse glsl2h function easily
$gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl"); $gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl");
my @tfx_res = qw/tfx_fs.glsl tfx_fs_subroutine.glsl/; my @tfx_res = qw/tfx_fs.glsl/;
my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl"); my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl");
my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/; my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/;

View File

@ -87,7 +87,6 @@ PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NU
PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL; PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL;
PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL; PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL;
// GL4.0 // GL4.0
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
// GL4.1 // GL4.1
PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL; PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL;
PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL; PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL;
@ -339,13 +338,12 @@ namespace GLLoader {
bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows! bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows!
// Note: except Apple, all drivers support explicit uniform location // Note: except Apple, all drivers support explicit uniform location
bool found_GL_ARB_explicit_uniform_location = false; // need by subroutine and bindless texture bool found_GL_ARB_explicit_uniform_location = false; // need by bindless texture
// GL4 hardware // GL4 hardware
bool found_GL_ARB_buffer_storage = false; bool found_GL_ARB_buffer_storage = false;
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
bool found_GL_ARB_gpu_shader5 = false; bool found_GL_ARB_gpu_shader5 = false;
bool found_GL_ARB_shader_image_load_store = false; // GLES3.1 bool found_GL_ARB_shader_image_load_store = false; // GLES3.1
bool found_GL_ARB_shader_subroutine = false;
bool found_GL_ARB_bindless_texture = false; // GL5 GPU? bool found_GL_ARB_bindless_texture = false; // GL5 GPU?
bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know
@ -450,19 +448,6 @@ namespace GLLoader {
if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true; if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true;
else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n"); else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n");
} }
#if 0
// Erratum: on nvidia implementation, gain is very nice : 42.5 fps => 46.5 fps
//
// Strangely it doesn't provide the speed boost as expected.
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
// colin mcrae 3 by 2100 uniform, but code is slower!
//
// Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls
// are done on the overloaded main threads.
// Apitrace profiling shows faster GPU draw times
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
#endif
// GL4.2 // GL4.2
if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true; if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true;
if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true; if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true;
@ -495,7 +480,6 @@ namespace GLLoader {
status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend"); status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend");
// GL4.1 // GL4.1
status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects"); status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects");
status &= status_and_override(found_GL_ARB_shader_subroutine, "GL_ARB_shader_subroutine");
// GL4.2 // GL4.2
status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store"); status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store");
status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true); status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true);

View File

@ -270,7 +270,6 @@ extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange;
extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate; extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate;
extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate; extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate;
// GL4.0 // GL4.0
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
// GL4.1 // GL4.1
extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline; extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline;
extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines; extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines;
@ -361,7 +360,6 @@ namespace GLLoader {
extern bool found_GL_ARB_shader_image_load_store; extern bool found_GL_ARB_shader_image_load_store;
extern bool found_GL_ARB_clear_texture; extern bool found_GL_ARB_clear_texture;
extern bool found_GL_ARB_buffer_storage; extern bool found_GL_ARB_buffer_storage;
extern bool found_GL_ARB_shader_subroutine;
extern bool found_GL_ARB_bindless_texture; extern bool found_GL_ARB_bindless_texture;
extern bool found_GL_ARB_explicit_uniform_location; extern bool found_GL_ARB_explicit_uniform_location;
extern bool found_GL_ARB_clip_control; extern bool found_GL_ARB_clip_control;

View File

@ -58,8 +58,6 @@ namespace GLState {
GLuint vs; GLuint vs;
GLuint program; GLuint program;
bool dirty_prog; bool dirty_prog;
bool dirty_subroutine_vs;
bool dirty_subroutine_ps;
#if 0 #if 0
struct { struct {
GSVertexBufferStateOGL* vb; GSVertexBufferStateOGL* vb;
@ -105,8 +103,6 @@ namespace GLState {
vs = 0; vs = 0;
program = 0; program = 0;
dirty_prog = false; dirty_prog = false;
dirty_subroutine_vs = false;
dirty_subroutine_ps = false;
dirty_ressources = false; dirty_ressources = false;
} }
} }

View File

@ -58,8 +58,6 @@ namespace GLState {
extern GLuint vs; extern GLuint vs;
extern GLuint program; // monolith program (when sso isn't supported) extern GLuint program; // monolith program (when sso isn't supported)
extern bool dirty_prog; extern bool dirty_prog;
extern bool dirty_subroutine_vs;
extern bool dirty_subroutine_ps;
extern bool dirty_ressources; extern bool dirty_ressources;
extern void Clear(); extern void Clear();

View File

@ -229,7 +229,6 @@ class GSDeviceOGL : public GSDevice
{ {
uint32 wildhack:1; uint32 wildhack:1;
uint32 bppz:2; uint32 bppz:2;
// Next param will be handle by subroutine
uint32 tme:1; uint32 tme:1;
uint32 fst:1; uint32 fst:1;
@ -338,7 +337,6 @@ class GSDeviceOGL : public GSDevice
uint32 tcoffsethack:1; uint32 tcoffsethack:1;
//uint32 point_sampler:1; Not tested, so keep the bit for blend //uint32 point_sampler:1; Not tested, so keep the bit for blend
uint32 iip:1; uint32 iip:1;
// Next param will be handle by subroutine (broken currently)
uint32 colclip:2; uint32 colclip:2;
uint32 atst:3; uint32 atst:3;

View File

@ -24,14 +24,8 @@
#include "GLState.h" #include "GLState.h"
GSShaderOGL::GSShaderOGL(bool debug) : GSShaderOGL::GSShaderOGL(bool debug) :
m_debug_shader(debug), m_debug_shader(debug)
m_vs_sub_count(0),
m_ps_sub_count(0)
{ {
memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0]));
memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0]));
m_single_prog.clear(); m_single_prog.clear();
if (GLLoader::found_GL_ARB_separate_shader_objects) { if (GLLoader::found_GL_ARB_separate_shader_objects) {
gl_GenProgramPipelines(1, &m_pipeline); gl_GenProgramPipelines(1, &m_pipeline);
@ -48,41 +42,17 @@ GSShaderOGL::~GSShaderOGL()
m_single_prog.clear(); m_single_prog.clear();
} }
void GSShaderOGL::VS(GLuint s, GLuint sub_count) void GSShaderOGL::VS(GLuint s)
{ {
if (GLState::vs != s) if (GLState::vs != s)
{ {
m_vs_sub_count = sub_count;
GLState::vs = s; GLState::vs = s;
GLState::dirty_prog = true; GLState::dirty_prog = true;
GLState::dirty_subroutine_vs = true;
if (GLLoader::found_GL_ARB_separate_shader_objects) if (GLLoader::found_GL_ARB_separate_shader_objects)
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s); gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
} }
} }
void GSShaderOGL::VS_subroutine(GLuint *sub)
{
if (!(m_vs_sub[0] == sub[0])) {
m_vs_sub[0] = sub[0];
GLState::dirty_subroutine_vs = true;
}
}
void GSShaderOGL::PS_subroutine(GLuint *sub)
{
// FIXME could be more efficient with GSvector
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) {
m_ps_sub[0] = sub[0];
m_ps_sub[1] = sub[1];
m_ps_sub[2] = sub[2];
m_ps_sub[3] = sub[3];
m_ps_sub[4] = sub[4];
GLState::dirty_subroutine_ps = true;
}
}
void GSShaderOGL::PS_ressources(GLuint64 handle[2]) void GSShaderOGL::PS_ressources(GLuint64 handle[2])
{ {
if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) { if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) {
@ -92,7 +62,7 @@ void GSShaderOGL::PS_ressources(GLuint64 handle[2])
} }
} }
void GSShaderOGL::PS(GLuint s, GLuint sub_count) void GSShaderOGL::PS(GLuint s)
{ {
#ifdef _DEBUG #ifdef _DEBUG
if (true) if (true)
@ -100,12 +70,9 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count)
if (GLState::ps != s) if (GLState::ps != s)
#endif #endif
{ {
m_ps_sub_count = sub_count;
// In debug always sets the program. It allow to replace the program in apitrace easily. // In debug always sets the program. It allow to replace the program in apitrace easily.
GLState::ps = s; GLState::ps = s;
GLState::dirty_prog = true; GLState::dirty_prog = true;
GLState::dirty_subroutine_ps = true;
GLState::dirty_ressources = true; GLState::dirty_ressources = true;
if (GLLoader::found_GL_ARB_separate_shader_objects) { if (GLLoader::found_GL_ARB_separate_shader_objects) {
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s); gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
@ -142,21 +109,6 @@ void GSShaderOGL::SetupRessources()
} }
} }
void GSShaderOGL::SetupSubroutineUniform()
{
if (!GLLoader::found_GL_ARB_shader_subroutine) return;
if (GLState::dirty_subroutine_vs && m_vs_sub_count) {
gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub);
GLState::dirty_subroutine_vs = false;
}
if (GLState::dirty_subroutine_ps && m_ps_sub_count) {
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub);
GLState::dirty_subroutine_ps = false;
}
}
bool GSShaderOGL::ValidateShader(GLuint s) bool GSShaderOGL::ValidateShader(GLuint s)
{ {
if (!m_debug_shader) return true; if (!m_debug_shader) return true;
@ -243,8 +195,6 @@ void GSShaderOGL::UseProgram()
if (GLState::dirty_prog) { if (GLState::dirty_prog) {
if (!GLLoader::found_GL_ARB_separate_shader_objects) { if (!GLLoader::found_GL_ARB_separate_shader_objects) {
GLState::dirty_subroutine_vs = true;
GLState::dirty_subroutine_ps = true;
GLState::dirty_ressources = true; GLState::dirty_ressources = true;
hash_map<uint64, GLuint >::iterator it; hash_map<uint64, GLuint >::iterator it;
@ -277,8 +227,6 @@ void GSShaderOGL::UseProgram()
SetupRessources(); SetupRessources();
SetupSubroutineUniform();
GLState::dirty_prog = false; GLState::dirty_prog = false;
GL_POP(); GL_POP();
@ -294,11 +242,6 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
// Need GL version 410 // Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n"; header += "#extension GL_ARB_separate_shader_objects: require\n";
} }
if (GLLoader::found_GL_ARB_shader_subroutine && GLLoader::found_GL_ARB_explicit_uniform_location) {
// Need GL version 400
header += "#define SUBROUTINE_GL40 1\n";
header += "#extension GL_ARB_shader_subroutine: require\n";
}
if (GLLoader::found_GL_ARB_explicit_uniform_location) { if (GLLoader::found_GL_ARB_explicit_uniform_location) {
// Need GL version 430 // Need GL version 430
header += "#extension GL_ARB_explicit_uniform_location: require\n"; header += "#extension GL_ARB_explicit_uniform_location: require\n";

View File

@ -25,13 +25,7 @@ class GSShaderOGL {
GLuint m_pipeline; GLuint m_pipeline;
hash_map<uint64, GLuint > m_single_prog; hash_map<uint64, GLuint > m_single_prog;
const bool m_debug_shader; const bool m_debug_shader;
GLuint m_vs_sub_count;
GLuint m_ps_sub_count;
GLuint m_vs_sub[1];
GLuint m_ps_sub[5];
void SetupSubroutineUniform();
void SetupRessources(); void SetupRessources();
bool ValidateShader(GLuint p); bool ValidateShader(GLuint p);
@ -46,11 +40,9 @@ class GSShaderOGL {
~GSShaderOGL(); ~GSShaderOGL();
void GS(GLuint s); void GS(GLuint s);
void PS(GLuint s, GLuint sub_count = 0); void PS(GLuint s);
void PS_subroutine(GLuint *sub);
void PS_ressources(GLuint64 handle[2]); void PS_ressources(GLuint64 handle[2]);
void VS(GLuint s, GLuint sub_count = 0); void VS(GLuint s);
void VS_subroutine(GLuint *sub);
void UseProgram(); void UseProgram();

View File

@ -143,16 +143,7 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer*
void GSDeviceOGL::SetupVS(VSSelector sel) void GSDeviceOGL::SetupVS(VSSelector sel)
{ {
if (GLLoader::found_GL_ARB_shader_subroutine) { m_shader->VS(m_vs[sel]);
GLuint sub[1];
sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0;
m_shader->VS_subroutine(sub);
// Handle by subroutine useless now
sel.tme = 0;
sel.fst = 0;
}
m_shader->VS(m_vs[sel], 1);
} }
void GSDeviceOGL::SetupGS(GSSelector sel) void GSDeviceOGL::SetupGS(GSSelector sel)
@ -162,38 +153,6 @@ void GSDeviceOGL::SetupGS(GSSelector sel)
void GSDeviceOGL::SetupPS(PSSelector sel) void GSDeviceOGL::SetupPS(PSSelector sel)
{ {
if (GLLoader::found_GL_ARB_shader_subroutine) {
GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4;
GLuint colclip = 8 + (uint32)sel.colclip;
GLuint clamp =
(sel.wms == 2 && sel.wmt == 2) ? 20 :
(sel.wms == 2) ? 21 :
(sel.wmt == 2) ? 22 : 23;
GLuint wrap =
(sel.wms == 2 && sel.wmt == 2) ? 24 :
(sel.wms == 3 && sel.wmt == 3) ? 25 :
(sel.wms == 2 && sel.wmt == 3) ? 26 :
(sel.wms == 3 && sel.wmt == 2) ? 27 :
(sel.wms == 2) ? 28 :
(sel.wmt == 3) ? 29 :
(sel.wms == 3) ? 30 :
(sel.wmt == 2) ? 31 : 32;
GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap};
m_shader->PS_subroutine(sub);
// Handle by subroutine useless now
sel.atst = 0;
sel.colclip = 0;
sel.tfx = 0;
sel.tcc = 0;
// sel.wms = 0;
// sel.wmt = 0;
}
// ************************************************************* // *************************************************************
// Static // Static
// ************************************************************* // *************************************************************
@ -210,7 +169,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel)
// ************************************************************* // *************************************************************
// Dynamic // Dynamic
// ************************************************************* // *************************************************************
m_shader->PS(ps, 3); m_shader->PS(ps);
} }
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel) void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)

View File

@ -80,7 +80,6 @@ void GSWndGL::PopulateGlFunction()
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync"); *(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange"); *(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
// GL4.0 // GL4.0
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv", true);
*(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true); *(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true);
*(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true); *(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true);
// GL4.1 // GL4.1

View File

@ -80,19 +80,6 @@ layout(std140, binding = 21) uniform cb21
vec2 TC_OffsetHack; vec2 TC_OffsetHack;
}; };
#ifdef SUBROUTINE_GL40
// Function pointer type + the functionn pointer variable
subroutine void AlphaTestType(vec4 c);
layout(location = 0) subroutine uniform AlphaTestType atst;
subroutine vec4 TfxType(vec4 t, vec4 c);
layout(location = 2) subroutine uniform TfxType tfx;
subroutine void ColClipType(inout vec4 c);
layout(location = 1) subroutine uniform ColClipType colclip;
#endif
vec4 sample_c(vec2 uv) vec4 sample_c(vec2 uv)
{ {
// FIXME: check the issue on openGL // FIXME: check the issue on openGL
@ -291,7 +278,6 @@ vec4 sample_color(vec2 st, float q)
} }
// FIXME Precompute the factor 255/128 in VS // FIXME Precompute the factor 255/128 in VS
#ifndef SUBROUTINE_GL40
vec4 tfx(vec4 t, vec4 c) vec4 tfx(vec4 t, vec4 c)
{ {
vec4 c_out = c; vec4 c_out = c;
@ -319,9 +305,7 @@ vec4 tfx(vec4 t, vec4 c)
return c_out; return c_out;
} }
#endif
#ifndef SUBROUTINE_GL40
void atst(vec4 c) void atst(vec4 c)
{ {
float a = trunc(c.a * 255.0 + 0.01); float a = trunc(c.a * 255.0 + 0.01);
@ -350,9 +334,7 @@ void atst(vec4 c)
discard; discard;
#endif #endif
} }
#endif
#ifndef SUBROUTINE_GL40
void colclip(inout vec4 c) void colclip(inout vec4 c)
{ {
#if (PS_COLCLIP == 2) #if (PS_COLCLIP == 2)
@ -363,7 +345,6 @@ void colclip(inout vec4 c)
c.rgb *= vec3(factor); c.rgb *= vec3(factor);
#endif #endif
} }
#endif
void fog(inout vec4 c, float f) void fog(inout vec4 c, float f)
{ {

View File

@ -1,285 +0,0 @@
//#version 420 // Keep it for text editor detection
// Subroutine of standard fs function (I don't know if it will be ever used one day)
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec4 WrapType(vec4 uv);
// a function pointer variable
layout(location = 4) subroutine uniform WrapType wrapuv;
layout(index = 24) subroutine(WrapType)
vec4 wrapuv_wms_wmt_2(vec4 uv)
{
vec4 uv_out = uv;
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
return uv_out;
}
layout(index = 25) subroutine(WrapType)
vec4 wrapuv_wms_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;
return uv_out;
}
layout(index = 26) subroutine(WrapType)
vec4 wrapuv_wms2_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 27) subroutine(WrapType)
vec4 wrapuv_wms3_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 28) subroutine(WrapType)
vec4 wrapuv_wms2_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
return uv_out;
}
layout(index = 29) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 30) subroutine(WrapType)
vec4 wrapuv_wms3_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
return uv_out;
}
layout(index = 31) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 32) subroutine(WrapType)
vec4 wrapuv_dummy(vec4 uv)
{
return uv;
}
#endif
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec2 ClampType(vec2 uv);
// a function pointer variable
layout(location = 3) subroutine uniform ClampType clampuv;
layout(index = 20) subroutine(ClampType)
vec2 clampuv_wms2_wmt2(vec2 uv)
{
return clamp(uv, MinF, MinMax.zw);
}
layout(index = 21) subroutine(ClampType)
vec2 clampuv_wms2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
return uv_out;
}
layout(index = 22) subroutine(ClampType)
vec2 clampuv_wmt2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
return uv_out;
}
layout(index = 23) subroutine(ClampType)
vec2 clampuv_dummy(vec2 uv)
{
return uv;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 11) subroutine(TfxType)
vec4 tfx_0_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;
return c_out;
}
layout(index = 12) subroutine(TfxType)
vec4 tfx_1_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = t.rgb;
return c_out;
}
layout(index = 13) subroutine(TfxType)
vec4 tfx_2_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 14) subroutine(TfxType)
vec4 tfx_3_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 15) subroutine(TfxType)
vec4 tfx_0_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = c * t * 255.0f / 128.0f;
return c_out;
}
layout(index = 16) subroutine(TfxType)
vec4 tfx_1_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = t;
return c_out;
}
layout(index = 17) subroutine(TfxType)
vec4 tfx_2_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a += t.a;
return c_out;
}
layout(index = 18) subroutine(TfxType)
vec4 tfx_3_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a = t.a;
return c_out;
}
layout(index = 19) subroutine(TfxType)
vec4 tfx_dummy(vec4 t, vec4 c)
{
return c;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 0) subroutine(AlphaTestType)
void atest_never(vec4 c)
{
discard;
}
layout(index = 1) subroutine(AlphaTestType)
void atest_always(vec4 c)
{
// Nothing to do
}
layout(index = 2) subroutine(AlphaTestType)
void atest_l(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if (PS_SPRITEHACK == 0)
if ((AREF - a - 0.5f) < 0.0f)
discard;
}
layout(index = 3) subroutine(AlphaTestType)
void atest_le(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((AREF - a + 0.5f) < 0.0f)
discard;
}
layout(index = 4) subroutine(AlphaTestType)
void atest_e(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((0.5f - abs(a - AREF)) < 0.0f)
discard;
}
layout(index = 5) subroutine(AlphaTestType)
void atest_ge(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((a-AREF + 0.5f) < 0.0f)
discard;
}
layout(index = 6) subroutine(AlphaTestType)
void atest_g(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((a-AREF - 0.5f) < 0.0f)
discard;
}
layout(index = 7) subroutine(AlphaTestType)
void atest_ne(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((abs(a - AREF) - 0.5f) < 0.0f)
discard;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 8) subroutine(ColClipType)
void colclip_0(inout vec4 c)
{
// nothing to do
}
layout(index = 9) subroutine(ColClipType)
void colclip_1(inout vec4 c)
{
// FIXME !!!!
//c.rgb *= c.rgb < 128./255;
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
c.rgb *= vec3(factor);
}
layout(index = 10) subroutine(ColClipType)
void colclip_2(inout vec4 c)
{
c.rgb = 256.0f/255.0f - c.rgb;
// FIXME !!!!
//c.rgb *= c.rgb < 128./255;
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
c.rgb *= vec3(factor);
}
#endif

View File

@ -42,36 +42,6 @@ const float exp_min32 = exp2(-32.0f);
const float exp_min31 = exp2(-31.0f); const float exp_min31 = exp2(-31.0f);
#endif #endif
#ifdef SUBROUTINE_GL40
// Function pointer type
subroutine void TextureCoordType(void);
// a function pointer variable
layout(location = 0) subroutine uniform TextureCoordType texture_coord;
layout(index = 0) subroutine(TextureCoordType)
void tme_0()
{
VSout_t.xy = vec2(0.0f, 0.0f);
VSout_t.w = 1.0f;
}
layout(index = 1) subroutine(TextureCoordType)
void tme_1_fst_0()
{
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
layout(index = 2) subroutine(TextureCoordType)
void tme_1_fst_1()
{
VSout_t.xy = vec2(i_uv) * TextureScale;
VSout_t.w = 1.0f;
}
#else
void texture_coord() void texture_coord()
{ {
if(VS_TME != 0) if(VS_TME != 0)
@ -98,8 +68,6 @@ void texture_coord()
} }
} }
#endif
void vs_main() void vs_main()
{ {
highp uint z; highp uint z;

View File

@ -650,36 +650,6 @@ static const char* tfx_vgs_glsl =
"const float exp_min31 = exp2(-31.0f);\n" "const float exp_min31 = exp2(-31.0f);\n"
"#endif\n" "#endif\n"
"\n" "\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type\n"
"subroutine void TextureCoordType(void);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n"
"\n"
"layout(index = 0) subroutine(TextureCoordType)\n"
"void tme_0()\n"
"{\n"
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"layout(index = 1) subroutine(TextureCoordType)\n"
"void tme_1_fst_0()\n"
"{\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
"}\n"
"\n"
"layout(index = 2) subroutine(TextureCoordType)\n"
"void tme_1_fst_1()\n"
"{\n"
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"#else\n"
"\n"
"void texture_coord()\n" "void texture_coord()\n"
"{\n" "{\n"
" if(VS_TME != 0)\n" " if(VS_TME != 0)\n"
@ -706,8 +676,6 @@ static const char* tfx_vgs_glsl =
" }\n" " }\n"
"}\n" "}\n"
"\n" "\n"
"#endif\n"
"\n"
"void vs_main()\n" "void vs_main()\n"
"{\n" "{\n"
" highp uint z;\n" " highp uint z;\n"
@ -973,19 +941,6 @@ static const char* tfx_fs_all_glsl =
" vec2 TC_OffsetHack;\n" " vec2 TC_OffsetHack;\n"
"};\n" "};\n"
"\n" "\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type + the functionn pointer variable\n"
"subroutine void AlphaTestType(vec4 c);\n"
"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
"\n"
"subroutine vec4 TfxType(vec4 t, vec4 c);\n"
"layout(location = 2) subroutine uniform TfxType tfx;\n"
"\n"
"subroutine void ColClipType(inout vec4 c);\n"
"layout(location = 1) subroutine uniform ColClipType colclip;\n"
"#endif\n"
"\n"
"\n"
"vec4 sample_c(vec2 uv)\n" "vec4 sample_c(vec2 uv)\n"
"{\n" "{\n"
" // FIXME: check the issue on openGL\n" " // FIXME: check the issue on openGL\n"
@ -1184,7 +1139,6 @@ static const char* tfx_fs_all_glsl =
"}\n" "}\n"
"\n" "\n"
"// FIXME Precompute the factor 255/128 in VS\n" "// FIXME Precompute the factor 255/128 in VS\n"
"#ifndef SUBROUTINE_GL40\n"
"vec4 tfx(vec4 t, vec4 c)\n" "vec4 tfx(vec4 t, vec4 c)\n"
"{\n" "{\n"
" vec4 c_out = c;\n" " vec4 c_out = c;\n"
@ -1212,9 +1166,7 @@ static const char* tfx_fs_all_glsl =
"\n" "\n"
" return c_out;\n" " return c_out;\n"
"}\n" "}\n"
"#endif\n"
"\n" "\n"
"#ifndef SUBROUTINE_GL40\n"
"void atst(vec4 c)\n" "void atst(vec4 c)\n"
"{\n" "{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n" " float a = trunc(c.a * 255.0 + 0.01);\n"
@ -1243,9 +1195,7 @@ static const char* tfx_fs_all_glsl =
" discard;\n" " discard;\n"
"#endif\n" "#endif\n"
"}\n" "}\n"
"#endif\n"
"\n" "\n"
"#ifndef SUBROUTINE_GL40\n"
"void colclip(inout vec4 c)\n" "void colclip(inout vec4 c)\n"
"{\n" "{\n"
"#if (PS_COLCLIP == 2)\n" "#if (PS_COLCLIP == 2)\n"
@ -1256,7 +1206,6 @@ static const char* tfx_fs_all_glsl =
" c.rgb *= vec3(factor);\n" " c.rgb *= vec3(factor);\n"
"#endif\n" "#endif\n"
"}\n" "}\n"
"#endif\n"
"\n" "\n"
"void fog(inout vec4 c, float f)\n" "void fog(inout vec4 c, float f)\n"
"{\n" "{\n"
@ -1516,291 +1465,6 @@ static const char* tfx_fs_all_glsl =
"}\n" "}\n"
"\n" "\n"
"#endif\n" "#endif\n"
"//#version 420 // Keep it for text editor detection\n"
"\n"
"// Subroutine of standard fs function (I don't know if it will be ever used one day)\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec4 WrapType(vec4 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 4) subroutine uniform WrapType wrapuv;\n"
"\n"
"layout(index = 24) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt_2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 25) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 26) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 27) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 28) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 29) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 30) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 31) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 32) subroutine(WrapType)\n"
"vec4 wrapuv_dummy(vec4 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"#endif\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec2 ClampType(vec2 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 3) subroutine uniform ClampType clampuv;\n"
"\n"
"layout(index = 20) subroutine(ClampType)\n"
"vec2 clampuv_wms2_wmt2(vec2 uv)\n"
"{\n"
" return clamp(uv, MinF, MinMax.zw);\n"
"}\n"
"\n"
"layout(index = 21) subroutine(ClampType)\n"
"vec2 clampuv_wms2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 22) subroutine(ClampType)\n"
"vec2 clampuv_wmt2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 23) subroutine(ClampType)\n"
"vec2 clampuv_dummy(vec2 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 11) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 12) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = t.rgb;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 13) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 14) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 15) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = c * t * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 16) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = t;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 17) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a += t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 18) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a = t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 19) subroutine(TfxType)\n"
"vec4 tfx_dummy(vec4 t, vec4 c)\n"
"{\n"
" return c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 0) subroutine(AlphaTestType)\n"
"void atest_never(vec4 c)\n"
"{\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 1) subroutine(AlphaTestType)\n"
"void atest_always(vec4 c)\n"
"{\n"
" // Nothing to do\n"
"}\n"
"\n"
"layout(index = 2) subroutine(AlphaTestType)\n"
"void atest_l(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if (PS_SPRITEHACK == 0)\n"
" if ((AREF - a - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 3) subroutine(AlphaTestType)\n"
"void atest_le(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((AREF - a + 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 4) subroutine(AlphaTestType)\n"
"void atest_e(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((0.5f - abs(a - AREF)) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 5) subroutine(AlphaTestType)\n"
"void atest_ge(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((a-AREF + 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 6) subroutine(AlphaTestType)\n"
"void atest_g(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((a-AREF - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 7) subroutine(AlphaTestType)\n"
"void atest_ne(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 8) subroutine(ColClipType)\n"
"void colclip_0(inout vec4 c)\n"
"{\n"
" // nothing to do\n"
"}\n"
"\n"
"layout(index = 9) subroutine(ColClipType)\n"
"void colclip_1(inout vec4 c)\n"
"{\n"
" // FIXME !!!!\n"
" //c.rgb *= c.rgb < 128./255;\n"
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
" c.rgb *= vec3(factor);\n"
"}\n"
"\n"
"layout(index = 10) subroutine(ColClipType)\n"
"void colclip_2(inout vec4 c)\n"
"{\n"
" c.rgb = 256.0f/255.0f - c.rgb;\n"
" // FIXME !!!!\n"
" //c.rgb *= c.rgb < 128./255;\n"
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
" c.rgb *= vec3(factor);\n"
"}\n"
"#endif\n"
; ;
static const char* fxaa_fx = static const char* fxaa_fx =