gsdx-ogl: wipeout subroutine code

Code was completey bitrotten
Code was a partial test (and yet 500 lines already)
Shader is more and more complex and multithreading support greatly
reduce the cost of shader switch
This commit is contained in:
Gregory Hainaut 2015-07-17 18:16:35 +02:00
parent e3751f6cd9
commit b4c04ed00a
14 changed files with 9 additions and 814 deletions

View File

@ -42,7 +42,7 @@ my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h");
# Just a hack to reuse glsl2h function easily
$gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl");
my @tfx_res = qw/tfx_fs.glsl tfx_fs_subroutine.glsl/;
my @tfx_res = qw/tfx_fs.glsl/;
my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl");
my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/;

View File

@ -87,7 +87,6 @@ PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NU
PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL;
PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL;
// GL4.0
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
// GL4.1
PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL;
PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL;
@ -339,13 +338,12 @@ namespace GLLoader {
bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows!
// Note: except Apple, all drivers support explicit uniform location
bool found_GL_ARB_explicit_uniform_location = false; // need by subroutine and bindless texture
bool found_GL_ARB_explicit_uniform_location = false; // need by bindless texture
// GL4 hardware
bool found_GL_ARB_buffer_storage = false;
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
bool found_GL_ARB_gpu_shader5 = false;
bool found_GL_ARB_shader_image_load_store = false; // GLES3.1
bool found_GL_ARB_shader_subroutine = false;
bool found_GL_ARB_bindless_texture = false; // GL5 GPU?
bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know
@ -450,19 +448,6 @@ namespace GLLoader {
if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true;
else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n");
}
#if 0
// Erratum: on nvidia implementation, gain is very nice : 42.5 fps => 46.5 fps
//
// Strangely it doesn't provide the speed boost as expected.
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
// colin mcrae 3 by 2100 uniform, but code is slower!
//
// Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls
// are done on the overloaded main threads.
// Apitrace profiling shows faster GPU draw times
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
#endif
// GL4.2
if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true;
if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true;
@ -495,7 +480,6 @@ namespace GLLoader {
status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend");
// GL4.1
status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects");
status &= status_and_override(found_GL_ARB_shader_subroutine, "GL_ARB_shader_subroutine");
// GL4.2
status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store");
status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true);

View File

@ -270,7 +270,6 @@ extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange;
extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate;
extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate;
// GL4.0
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
// GL4.1
extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline;
extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines;
@ -361,7 +360,6 @@ namespace GLLoader {
extern bool found_GL_ARB_shader_image_load_store;
extern bool found_GL_ARB_clear_texture;
extern bool found_GL_ARB_buffer_storage;
extern bool found_GL_ARB_shader_subroutine;
extern bool found_GL_ARB_bindless_texture;
extern bool found_GL_ARB_explicit_uniform_location;
extern bool found_GL_ARB_clip_control;

View File

@ -58,8 +58,6 @@ namespace GLState {
GLuint vs;
GLuint program;
bool dirty_prog;
bool dirty_subroutine_vs;
bool dirty_subroutine_ps;
#if 0
struct {
GSVertexBufferStateOGL* vb;
@ -105,8 +103,6 @@ namespace GLState {
vs = 0;
program = 0;
dirty_prog = false;
dirty_subroutine_vs = false;
dirty_subroutine_ps = false;
dirty_ressources = false;
}
}

View File

@ -58,8 +58,6 @@ namespace GLState {
extern GLuint vs;
extern GLuint program; // monolith program (when sso isn't supported)
extern bool dirty_prog;
extern bool dirty_subroutine_vs;
extern bool dirty_subroutine_ps;
extern bool dirty_ressources;
extern void Clear();

View File

@ -229,7 +229,6 @@ class GSDeviceOGL : public GSDevice
{
uint32 wildhack:1;
uint32 bppz:2;
// Next param will be handle by subroutine
uint32 tme:1;
uint32 fst:1;
@ -338,7 +337,6 @@ class GSDeviceOGL : public GSDevice
uint32 tcoffsethack:1;
//uint32 point_sampler:1; Not tested, so keep the bit for blend
uint32 iip:1;
// Next param will be handle by subroutine (broken currently)
uint32 colclip:2;
uint32 atst:3;

View File

@ -24,14 +24,8 @@
#include "GLState.h"
GSShaderOGL::GSShaderOGL(bool debug) :
m_debug_shader(debug),
m_vs_sub_count(0),
m_ps_sub_count(0)
m_debug_shader(debug)
{
memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0]));
memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0]));
m_single_prog.clear();
if (GLLoader::found_GL_ARB_separate_shader_objects) {
gl_GenProgramPipelines(1, &m_pipeline);
@ -48,41 +42,17 @@ GSShaderOGL::~GSShaderOGL()
m_single_prog.clear();
}
void GSShaderOGL::VS(GLuint s, GLuint sub_count)
void GSShaderOGL::VS(GLuint s)
{
if (GLState::vs != s)
{
m_vs_sub_count = sub_count;
GLState::vs = s;
GLState::dirty_prog = true;
GLState::dirty_subroutine_vs = true;
if (GLLoader::found_GL_ARB_separate_shader_objects)
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
}
}
void GSShaderOGL::VS_subroutine(GLuint *sub)
{
if (!(m_vs_sub[0] == sub[0])) {
m_vs_sub[0] = sub[0];
GLState::dirty_subroutine_vs = true;
}
}
void GSShaderOGL::PS_subroutine(GLuint *sub)
{
// FIXME could be more efficient with GSvector
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) {
m_ps_sub[0] = sub[0];
m_ps_sub[1] = sub[1];
m_ps_sub[2] = sub[2];
m_ps_sub[3] = sub[3];
m_ps_sub[4] = sub[4];
GLState::dirty_subroutine_ps = true;
}
}
void GSShaderOGL::PS_ressources(GLuint64 handle[2])
{
if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) {
@ -92,7 +62,7 @@ void GSShaderOGL::PS_ressources(GLuint64 handle[2])
}
}
void GSShaderOGL::PS(GLuint s, GLuint sub_count)
void GSShaderOGL::PS(GLuint s)
{
#ifdef _DEBUG
if (true)
@ -100,12 +70,9 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count)
if (GLState::ps != s)
#endif
{
m_ps_sub_count = sub_count;
// In debug always sets the program. It allow to replace the program in apitrace easily.
GLState::ps = s;
GLState::dirty_prog = true;
GLState::dirty_subroutine_ps = true;
GLState::dirty_ressources = true;
if (GLLoader::found_GL_ARB_separate_shader_objects) {
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
@ -142,21 +109,6 @@ void GSShaderOGL::SetupRessources()
}
}
void GSShaderOGL::SetupSubroutineUniform()
{
if (!GLLoader::found_GL_ARB_shader_subroutine) return;
if (GLState::dirty_subroutine_vs && m_vs_sub_count) {
gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub);
GLState::dirty_subroutine_vs = false;
}
if (GLState::dirty_subroutine_ps && m_ps_sub_count) {
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub);
GLState::dirty_subroutine_ps = false;
}
}
bool GSShaderOGL::ValidateShader(GLuint s)
{
if (!m_debug_shader) return true;
@ -243,8 +195,6 @@ void GSShaderOGL::UseProgram()
if (GLState::dirty_prog) {
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
GLState::dirty_subroutine_vs = true;
GLState::dirty_subroutine_ps = true;
GLState::dirty_ressources = true;
hash_map<uint64, GLuint >::iterator it;
@ -277,8 +227,6 @@ void GSShaderOGL::UseProgram()
SetupRessources();
SetupSubroutineUniform();
GLState::dirty_prog = false;
GL_POP();
@ -294,11 +242,6 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
}
if (GLLoader::found_GL_ARB_shader_subroutine && GLLoader::found_GL_ARB_explicit_uniform_location) {
// Need GL version 400
header += "#define SUBROUTINE_GL40 1\n";
header += "#extension GL_ARB_shader_subroutine: require\n";
}
if (GLLoader::found_GL_ARB_explicit_uniform_location) {
// Need GL version 430
header += "#extension GL_ARB_explicit_uniform_location: require\n";

View File

@ -25,13 +25,7 @@ class GSShaderOGL {
GLuint m_pipeline;
hash_map<uint64, GLuint > m_single_prog;
const bool m_debug_shader;
GLuint m_vs_sub_count;
GLuint m_ps_sub_count;
GLuint m_vs_sub[1];
GLuint m_ps_sub[5];
void SetupSubroutineUniform();
void SetupRessources();
bool ValidateShader(GLuint p);
@ -46,11 +40,9 @@ class GSShaderOGL {
~GSShaderOGL();
void GS(GLuint s);
void PS(GLuint s, GLuint sub_count = 0);
void PS_subroutine(GLuint *sub);
void PS(GLuint s);
void PS_ressources(GLuint64 handle[2]);
void VS(GLuint s, GLuint sub_count = 0);
void VS_subroutine(GLuint *sub);
void VS(GLuint s);
void UseProgram();

View File

@ -143,16 +143,7 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer*
void GSDeviceOGL::SetupVS(VSSelector sel)
{
if (GLLoader::found_GL_ARB_shader_subroutine) {
GLuint sub[1];
sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0;
m_shader->VS_subroutine(sub);
// Handle by subroutine useless now
sel.tme = 0;
sel.fst = 0;
}
m_shader->VS(m_vs[sel], 1);
m_shader->VS(m_vs[sel]);
}
void GSDeviceOGL::SetupGS(GSSelector sel)
@ -162,38 +153,6 @@ void GSDeviceOGL::SetupGS(GSSelector sel)
void GSDeviceOGL::SetupPS(PSSelector sel)
{
if (GLLoader::found_GL_ARB_shader_subroutine) {
GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4;
GLuint colclip = 8 + (uint32)sel.colclip;
GLuint clamp =
(sel.wms == 2 && sel.wmt == 2) ? 20 :
(sel.wms == 2) ? 21 :
(sel.wmt == 2) ? 22 : 23;
GLuint wrap =
(sel.wms == 2 && sel.wmt == 2) ? 24 :
(sel.wms == 3 && sel.wmt == 3) ? 25 :
(sel.wms == 2 && sel.wmt == 3) ? 26 :
(sel.wms == 3 && sel.wmt == 2) ? 27 :
(sel.wms == 2) ? 28 :
(sel.wmt == 3) ? 29 :
(sel.wms == 3) ? 30 :
(sel.wmt == 2) ? 31 : 32;
GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap};
m_shader->PS_subroutine(sub);
// Handle by subroutine useless now
sel.atst = 0;
sel.colclip = 0;
sel.tfx = 0;
sel.tcc = 0;
// sel.wms = 0;
// sel.wmt = 0;
}
// *************************************************************
// Static
// *************************************************************
@ -210,7 +169,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel)
// *************************************************************
// Dynamic
// *************************************************************
m_shader->PS(ps, 3);
m_shader->PS(ps);
}
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)

View File

@ -80,7 +80,6 @@ void GSWndGL::PopulateGlFunction()
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
// GL4.0
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv", true);
*(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true);
*(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true);
// GL4.1

View File

@ -80,19 +80,6 @@ layout(std140, binding = 21) uniform cb21
vec2 TC_OffsetHack;
};
#ifdef SUBROUTINE_GL40
// Function pointer type + the functionn pointer variable
subroutine void AlphaTestType(vec4 c);
layout(location = 0) subroutine uniform AlphaTestType atst;
subroutine vec4 TfxType(vec4 t, vec4 c);
layout(location = 2) subroutine uniform TfxType tfx;
subroutine void ColClipType(inout vec4 c);
layout(location = 1) subroutine uniform ColClipType colclip;
#endif
vec4 sample_c(vec2 uv)
{
// FIXME: check the issue on openGL
@ -291,7 +278,6 @@ vec4 sample_color(vec2 st, float q)
}
// FIXME Precompute the factor 255/128 in VS
#ifndef SUBROUTINE_GL40
vec4 tfx(vec4 t, vec4 c)
{
vec4 c_out = c;
@ -319,9 +305,7 @@ vec4 tfx(vec4 t, vec4 c)
return c_out;
}
#endif
#ifndef SUBROUTINE_GL40
void atst(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
@ -350,9 +334,7 @@ void atst(vec4 c)
discard;
#endif
}
#endif
#ifndef SUBROUTINE_GL40
void colclip(inout vec4 c)
{
#if (PS_COLCLIP == 2)
@ -363,7 +345,6 @@ void colclip(inout vec4 c)
c.rgb *= vec3(factor);
#endif
}
#endif
void fog(inout vec4 c, float f)
{

View File

@ -1,285 +0,0 @@
//#version 420 // Keep it for text editor detection
// Subroutine of standard fs function (I don't know if it will be ever used one day)
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec4 WrapType(vec4 uv);
// a function pointer variable
layout(location = 4) subroutine uniform WrapType wrapuv;
layout(index = 24) subroutine(WrapType)
vec4 wrapuv_wms_wmt_2(vec4 uv)
{
vec4 uv_out = uv;
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
return uv_out;
}
layout(index = 25) subroutine(WrapType)
vec4 wrapuv_wms_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;
return uv_out;
}
layout(index = 26) subroutine(WrapType)
vec4 wrapuv_wms2_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 27) subroutine(WrapType)
vec4 wrapuv_wms3_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 28) subroutine(WrapType)
vec4 wrapuv_wms2_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
return uv_out;
}
layout(index = 29) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt3(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
return uv_out;
}
layout(index = 30) subroutine(WrapType)
vec4 wrapuv_wms3_wmtx(vec4 uv)
{
vec4 uv_out = uv;
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
return uv_out;
}
layout(index = 31) subroutine(WrapType)
vec4 wrapuv_wmsx_wmt2(vec4 uv)
{
vec4 uv_out = uv;
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
return uv_out;
}
layout(index = 32) subroutine(WrapType)
vec4 wrapuv_dummy(vec4 uv)
{
return uv;
}
#endif
// FIXME crash nvidia
#if 0
// Function pointer type
subroutine vec2 ClampType(vec2 uv);
// a function pointer variable
layout(location = 3) subroutine uniform ClampType clampuv;
layout(index = 20) subroutine(ClampType)
vec2 clampuv_wms2_wmt2(vec2 uv)
{
return clamp(uv, MinF, MinMax.zw);
}
layout(index = 21) subroutine(ClampType)
vec2 clampuv_wms2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
return uv_out;
}
layout(index = 22) subroutine(ClampType)
vec2 clampuv_wmt2(vec2 uv)
{
vec2 uv_out = uv;
uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
return uv_out;
}
layout(index = 23) subroutine(ClampType)
vec2 clampuv_dummy(vec2 uv)
{
return uv;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 11) subroutine(TfxType)
vec4 tfx_0_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;
return c_out;
}
layout(index = 12) subroutine(TfxType)
vec4 tfx_1_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = t.rgb;
return c_out;
}
layout(index = 13) subroutine(TfxType)
vec4 tfx_2_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 14) subroutine(TfxType)
vec4 tfx_3_tcc_0(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
return c_out;
}
layout(index = 15) subroutine(TfxType)
vec4 tfx_0_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = c * t * 255.0f / 128.0f;
return c_out;
}
layout(index = 16) subroutine(TfxType)
vec4 tfx_1_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out = t;
return c_out;
}
layout(index = 17) subroutine(TfxType)
vec4 tfx_2_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a += t.a;
return c_out;
}
layout(index = 18) subroutine(TfxType)
vec4 tfx_3_tcc_1(vec4 t, vec4 c)
{
vec4 c_out = c;
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
c_out.a = t.a;
return c_out;
}
layout(index = 19) subroutine(TfxType)
vec4 tfx_dummy(vec4 t, vec4 c)
{
return c;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 0) subroutine(AlphaTestType)
void atest_never(vec4 c)
{
discard;
}
layout(index = 1) subroutine(AlphaTestType)
void atest_always(vec4 c)
{
// Nothing to do
}
layout(index = 2) subroutine(AlphaTestType)
void atest_l(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if (PS_SPRITEHACK == 0)
if ((AREF - a - 0.5f) < 0.0f)
discard;
}
layout(index = 3) subroutine(AlphaTestType)
void atest_le(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((AREF - a + 0.5f) < 0.0f)
discard;
}
layout(index = 4) subroutine(AlphaTestType)
void atest_e(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((0.5f - abs(a - AREF)) < 0.0f)
discard;
}
layout(index = 5) subroutine(AlphaTestType)
void atest_ge(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((a-AREF + 0.5f) < 0.0f)
discard;
}
layout(index = 6) subroutine(AlphaTestType)
void atest_g(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((a-AREF - 0.5f) < 0.0f)
discard;
}
layout(index = 7) subroutine(AlphaTestType)
void atest_ne(vec4 c)
{
float a = trunc(c.a * 255.0 + 0.01);
if ((abs(a - AREF) - 0.5f) < 0.0f)
discard;
}
#endif
#ifdef SUBROUTINE_GL40
layout(index = 8) subroutine(ColClipType)
void colclip_0(inout vec4 c)
{
// nothing to do
}
layout(index = 9) subroutine(ColClipType)
void colclip_1(inout vec4 c)
{
// FIXME !!!!
//c.rgb *= c.rgb < 128./255;
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
c.rgb *= vec3(factor);
}
layout(index = 10) subroutine(ColClipType)
void colclip_2(inout vec4 c)
{
c.rgb = 256.0f/255.0f - c.rgb;
// FIXME !!!!
//c.rgb *= c.rgb < 128./255;
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
c.rgb *= vec3(factor);
}
#endif

View File

@ -42,36 +42,6 @@ const float exp_min32 = exp2(-32.0f);
const float exp_min31 = exp2(-31.0f);
#endif
#ifdef SUBROUTINE_GL40
// Function pointer type
subroutine void TextureCoordType(void);
// a function pointer variable
layout(location = 0) subroutine uniform TextureCoordType texture_coord;
layout(index = 0) subroutine(TextureCoordType)
void tme_0()
{
VSout_t.xy = vec2(0.0f, 0.0f);
VSout_t.w = 1.0f;
}
layout(index = 1) subroutine(TextureCoordType)
void tme_1_fst_0()
{
VSout_t.xy = i_st;
VSout_t.w = i_q;
}
layout(index = 2) subroutine(TextureCoordType)
void tme_1_fst_1()
{
VSout_t.xy = vec2(i_uv) * TextureScale;
VSout_t.w = 1.0f;
}
#else
void texture_coord()
{
if(VS_TME != 0)
@ -98,8 +68,6 @@ void texture_coord()
}
}
#endif
void vs_main()
{
highp uint z;

View File

@ -650,36 +650,6 @@ static const char* tfx_vgs_glsl =
"const float exp_min31 = exp2(-31.0f);\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type\n"
"subroutine void TextureCoordType(void);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n"
"\n"
"layout(index = 0) subroutine(TextureCoordType)\n"
"void tme_0()\n"
"{\n"
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"layout(index = 1) subroutine(TextureCoordType)\n"
"void tme_1_fst_0()\n"
"{\n"
" VSout_t.xy = i_st;\n"
" VSout_t.w = i_q;\n"
"}\n"
"\n"
"layout(index = 2) subroutine(TextureCoordType)\n"
"void tme_1_fst_1()\n"
"{\n"
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
" VSout_t.w = 1.0f;\n"
"}\n"
"\n"
"#else\n"
"\n"
"void texture_coord()\n"
"{\n"
" if(VS_TME != 0)\n"
@ -706,8 +676,6 @@ static const char* tfx_vgs_glsl =
" }\n"
"}\n"
"\n"
"#endif\n"
"\n"
"void vs_main()\n"
"{\n"
" highp uint z;\n"
@ -973,19 +941,6 @@ static const char* tfx_fs_all_glsl =
" vec2 TC_OffsetHack;\n"
"};\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"// Function pointer type + the functionn pointer variable\n"
"subroutine void AlphaTestType(vec4 c);\n"
"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
"\n"
"subroutine vec4 TfxType(vec4 t, vec4 c);\n"
"layout(location = 2) subroutine uniform TfxType tfx;\n"
"\n"
"subroutine void ColClipType(inout vec4 c);\n"
"layout(location = 1) subroutine uniform ColClipType colclip;\n"
"#endif\n"
"\n"
"\n"
"vec4 sample_c(vec2 uv)\n"
"{\n"
" // FIXME: check the issue on openGL\n"
@ -1184,7 +1139,6 @@ static const char* tfx_fs_all_glsl =
"}\n"
"\n"
"// FIXME Precompute the factor 255/128 in VS\n"
"#ifndef SUBROUTINE_GL40\n"
"vec4 tfx(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
@ -1212,9 +1166,7 @@ static const char* tfx_fs_all_glsl =
"\n"
" return c_out;\n"
"}\n"
"#endif\n"
"\n"
"#ifndef SUBROUTINE_GL40\n"
"void atst(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
@ -1243,9 +1195,7 @@ static const char* tfx_fs_all_glsl =
" discard;\n"
"#endif\n"
"}\n"
"#endif\n"
"\n"
"#ifndef SUBROUTINE_GL40\n"
"void colclip(inout vec4 c)\n"
"{\n"
"#if (PS_COLCLIP == 2)\n"
@ -1256,7 +1206,6 @@ static const char* tfx_fs_all_glsl =
" c.rgb *= vec3(factor);\n"
"#endif\n"
"}\n"
"#endif\n"
"\n"
"void fog(inout vec4 c, float f)\n"
"{\n"
@ -1516,291 +1465,6 @@ static const char* tfx_fs_all_glsl =
"}\n"
"\n"
"#endif\n"
"//#version 420 // Keep it for text editor detection\n"
"\n"
"// Subroutine of standard fs function (I don't know if it will be ever used one day)\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec4 WrapType(vec4 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 4) subroutine uniform WrapType wrapuv;\n"
"\n"
"layout(index = 24) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt_2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 25) subroutine(WrapType)\n"
"vec4 wrapuv_wms_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 26) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 27) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 28) subroutine(WrapType)\n"
"vec4 wrapuv_wms2_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 29) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt3(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 30) subroutine(WrapType)\n"
"vec4 wrapuv_wms3_wmtx(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 31) subroutine(WrapType)\n"
"vec4 wrapuv_wmsx_wmt2(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 32) subroutine(WrapType)\n"
"vec4 wrapuv_dummy(vec4 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"#endif\n"
"\n"
"// FIXME crash nvidia\n"
"#if 0\n"
"// Function pointer type\n"
"subroutine vec2 ClampType(vec2 uv);\n"
"\n"
"// a function pointer variable\n"
"layout(location = 3) subroutine uniform ClampType clampuv;\n"
"\n"
"layout(index = 20) subroutine(ClampType)\n"
"vec2 clampuv_wms2_wmt2(vec2 uv)\n"
"{\n"
" return clamp(uv, MinF, MinMax.zw);\n"
"}\n"
"\n"
"layout(index = 21) subroutine(ClampType)\n"
"vec2 clampuv_wms2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 22) subroutine(ClampType)\n"
"vec2 clampuv_wmt2(vec2 uv)\n"
"{\n"
" vec2 uv_out = uv;\n"
" uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
" return uv_out;\n"
"}\n"
"\n"
"layout(index = 23) subroutine(ClampType)\n"
"vec2 clampuv_dummy(vec2 uv)\n"
"{\n"
" return uv;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 11) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 12) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = t.rgb;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 13) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 14) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 15) subroutine(TfxType)\n"
"vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = c * t * 255.0f / 128.0f;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 16) subroutine(TfxType)\n"
"vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out = t;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 17) subroutine(TfxType)\n"
"vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a += t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 18) subroutine(TfxType)\n"
"vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n"
"{\n"
" vec4 c_out = c;\n"
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
" c_out.a = t.a;\n"
" return c_out;\n"
"}\n"
"\n"
"layout(index = 19) subroutine(TfxType)\n"
"vec4 tfx_dummy(vec4 t, vec4 c)\n"
"{\n"
" return c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 0) subroutine(AlphaTestType)\n"
"void atest_never(vec4 c)\n"
"{\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 1) subroutine(AlphaTestType)\n"
"void atest_always(vec4 c)\n"
"{\n"
" // Nothing to do\n"
"}\n"
"\n"
"layout(index = 2) subroutine(AlphaTestType)\n"
"void atest_l(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if (PS_SPRITEHACK == 0)\n"
" if ((AREF - a - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 3) subroutine(AlphaTestType)\n"
"void atest_le(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((AREF - a + 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 4) subroutine(AlphaTestType)\n"
"void atest_e(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((0.5f - abs(a - AREF)) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 5) subroutine(AlphaTestType)\n"
"void atest_ge(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((a-AREF + 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 6) subroutine(AlphaTestType)\n"
"void atest_g(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((a-AREF - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"\n"
"layout(index = 7) subroutine(AlphaTestType)\n"
"void atest_ne(vec4 c)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
" discard;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef SUBROUTINE_GL40\n"
"layout(index = 8) subroutine(ColClipType)\n"
"void colclip_0(inout vec4 c)\n"
"{\n"
" // nothing to do\n"
"}\n"
"\n"
"layout(index = 9) subroutine(ColClipType)\n"
"void colclip_1(inout vec4 c)\n"
"{\n"
" // FIXME !!!!\n"
" //c.rgb *= c.rgb < 128./255;\n"
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
" c.rgb *= vec3(factor);\n"
"}\n"
"\n"
"layout(index = 10) subroutine(ColClipType)\n"
"void colclip_2(inout vec4 c)\n"
"{\n"
" c.rgb = 256.0f/255.0f - c.rgb;\n"
" // FIXME !!!!\n"
" //c.rgb *= c.rgb < 128./255;\n"
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
" c.rgb *= vec3(factor);\n"
"}\n"
"#endif\n"
;
static const char* fxaa_fx =