mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: wipeout subroutine code
Code was completey bitrotten Code was a partial test (and yet 500 lines already) Shader is more and more complex and multithreading support greatly reduce the cost of shader switch
This commit is contained in:
parent
e3751f6cd9
commit
b4c04ed00a
|
@ -42,7 +42,7 @@ my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h");
|
||||||
|
|
||||||
# Just a hack to reuse glsl2h function easily
|
# Just a hack to reuse glsl2h function easily
|
||||||
$gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl");
|
$gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res", "glsl");
|
||||||
my @tfx_res = qw/tfx_fs.glsl tfx_fs_subroutine.glsl/;
|
my @tfx_res = qw/tfx_fs.glsl/;
|
||||||
my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl");
|
my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl");
|
||||||
|
|
||||||
my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/;
|
my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/;
|
||||||
|
|
|
@ -87,7 +87,6 @@ PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange = NU
|
||||||
PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL;
|
PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate = NULL;
|
||||||
PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL;
|
PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate = NULL;
|
||||||
// GL4.0
|
// GL4.0
|
||||||
PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv = NULL;
|
|
||||||
// GL4.1
|
// GL4.1
|
||||||
PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL;
|
PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline = NULL;
|
||||||
PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL;
|
PFNGLGENPROGRAMPIPELINESPROC gl_GenProgramPipelines = NULL;
|
||||||
|
@ -339,13 +338,12 @@ namespace GLLoader {
|
||||||
bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows!
|
bool found_GL_ARB_draw_buffers_blend = false; // DX10 GPU limited driver on windows!
|
||||||
|
|
||||||
// Note: except Apple, all drivers support explicit uniform location
|
// Note: except Apple, all drivers support explicit uniform location
|
||||||
bool found_GL_ARB_explicit_uniform_location = false; // need by subroutine and bindless texture
|
bool found_GL_ARB_explicit_uniform_location = false; // need by bindless texture
|
||||||
// GL4 hardware
|
// GL4 hardware
|
||||||
bool found_GL_ARB_buffer_storage = false;
|
bool found_GL_ARB_buffer_storage = false;
|
||||||
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
|
bool found_GL_ARB_copy_image = false; // Not sure actually maybe GL3 GPU can do it
|
||||||
bool found_GL_ARB_gpu_shader5 = false;
|
bool found_GL_ARB_gpu_shader5 = false;
|
||||||
bool found_GL_ARB_shader_image_load_store = false; // GLES3.1
|
bool found_GL_ARB_shader_image_load_store = false; // GLES3.1
|
||||||
bool found_GL_ARB_shader_subroutine = false;
|
|
||||||
bool found_GL_ARB_bindless_texture = false; // GL5 GPU?
|
bool found_GL_ARB_bindless_texture = false; // GL5 GPU?
|
||||||
bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know
|
bool found_GL_ARB_texture_barrier = false; // Well maybe supported by older hardware I don't know
|
||||||
|
|
||||||
|
@ -450,19 +448,6 @@ namespace GLLoader {
|
||||||
if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true;
|
if (!fglrx_buggy_driver && !mesa_amd_buggy_driver && !intel_buggy_driver) found_GL_ARB_separate_shader_objects = true;
|
||||||
else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n");
|
else fprintf(stderr, "Buggy driver detected, GL_ARB_separate_shader_objects will be disabled\n");
|
||||||
}
|
}
|
||||||
#if 0
|
|
||||||
// Erratum: on nvidia implementation, gain is very nice : 42.5 fps => 46.5 fps
|
|
||||||
//
|
|
||||||
// Strangely it doesn't provide the speed boost as expected.
|
|
||||||
// Note: only atst/colclip was replaced with subroutine for the moment. It replace 2000 program switch on
|
|
||||||
// colin mcrae 3 by 2100 uniform, but code is slower!
|
|
||||||
//
|
|
||||||
// Current hypothesis: the validation of useprogram is done in the "driver thread" whereas the extra function calls
|
|
||||||
// are done on the overloaded main threads.
|
|
||||||
// Apitrace profiling shows faster GPU draw times
|
|
||||||
|
|
||||||
if (ext.compare("GL_ARB_shader_subroutine") == 0) found_GL_ARB_shader_subroutine = true;
|
|
||||||
#endif
|
|
||||||
// GL4.2
|
// GL4.2
|
||||||
if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true;
|
if (ext.compare("GL_ARB_shading_language_420pack") == 0) found_GL_ARB_shading_language_420pack = true;
|
||||||
if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true;
|
if (ext.compare("GL_ARB_texture_storage") == 0) found_GL_ARB_texture_storage = true;
|
||||||
|
@ -495,7 +480,6 @@ namespace GLLoader {
|
||||||
status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend");
|
status &= status_and_override(found_GL_ARB_draw_buffers_blend, "GL_ARB_draw_buffers_blend");
|
||||||
// GL4.1
|
// GL4.1
|
||||||
status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects");
|
status &= status_and_override(found_GL_ARB_separate_shader_objects, "GL_ARB_separate_shader_objects");
|
||||||
status &= status_and_override(found_GL_ARB_shader_subroutine, "GL_ARB_shader_subroutine");
|
|
||||||
// GL4.2
|
// GL4.2
|
||||||
status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store");
|
status &= status_and_override(found_GL_ARB_shader_image_load_store, "GL_ARB_shader_image_load_store");
|
||||||
status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true);
|
status &= status_and_override(found_GL_ARB_shading_language_420pack, "GL_ARB_shading_language_420pack", true);
|
||||||
|
|
|
@ -270,7 +270,6 @@ extern PFNGLFLUSHMAPPEDBUFFERRANGEPROC gl_FlushMappedBufferRange;
|
||||||
extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate;
|
extern PFNGLBLENDEQUATIONSEPARATEPROC gl_BlendEquationSeparate;
|
||||||
extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate;
|
extern PFNGLBLENDFUNCSEPARATEPROC gl_BlendFuncSeparate;
|
||||||
// GL4.0
|
// GL4.0
|
||||||
extern PFNGLUNIFORMSUBROUTINESUIVPROC gl_UniformSubroutinesuiv;
|
|
||||||
// GL4.1
|
// GL4.1
|
||||||
extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline;
|
extern PFNGLBINDPROGRAMPIPELINEPROC gl_BindProgramPipeline;
|
||||||
extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines;
|
extern PFNGLDELETEPROGRAMPIPELINESPROC gl_DeleteProgramPipelines;
|
||||||
|
@ -361,7 +360,6 @@ namespace GLLoader {
|
||||||
extern bool found_GL_ARB_shader_image_load_store;
|
extern bool found_GL_ARB_shader_image_load_store;
|
||||||
extern bool found_GL_ARB_clear_texture;
|
extern bool found_GL_ARB_clear_texture;
|
||||||
extern bool found_GL_ARB_buffer_storage;
|
extern bool found_GL_ARB_buffer_storage;
|
||||||
extern bool found_GL_ARB_shader_subroutine;
|
|
||||||
extern bool found_GL_ARB_bindless_texture;
|
extern bool found_GL_ARB_bindless_texture;
|
||||||
extern bool found_GL_ARB_explicit_uniform_location;
|
extern bool found_GL_ARB_explicit_uniform_location;
|
||||||
extern bool found_GL_ARB_clip_control;
|
extern bool found_GL_ARB_clip_control;
|
||||||
|
|
|
@ -58,8 +58,6 @@ namespace GLState {
|
||||||
GLuint vs;
|
GLuint vs;
|
||||||
GLuint program;
|
GLuint program;
|
||||||
bool dirty_prog;
|
bool dirty_prog;
|
||||||
bool dirty_subroutine_vs;
|
|
||||||
bool dirty_subroutine_ps;
|
|
||||||
#if 0
|
#if 0
|
||||||
struct {
|
struct {
|
||||||
GSVertexBufferStateOGL* vb;
|
GSVertexBufferStateOGL* vb;
|
||||||
|
@ -105,8 +103,6 @@ namespace GLState {
|
||||||
vs = 0;
|
vs = 0;
|
||||||
program = 0;
|
program = 0;
|
||||||
dirty_prog = false;
|
dirty_prog = false;
|
||||||
dirty_subroutine_vs = false;
|
|
||||||
dirty_subroutine_ps = false;
|
|
||||||
dirty_ressources = false;
|
dirty_ressources = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,8 +58,6 @@ namespace GLState {
|
||||||
extern GLuint vs;
|
extern GLuint vs;
|
||||||
extern GLuint program; // monolith program (when sso isn't supported)
|
extern GLuint program; // monolith program (when sso isn't supported)
|
||||||
extern bool dirty_prog;
|
extern bool dirty_prog;
|
||||||
extern bool dirty_subroutine_vs;
|
|
||||||
extern bool dirty_subroutine_ps;
|
|
||||||
extern bool dirty_ressources;
|
extern bool dirty_ressources;
|
||||||
|
|
||||||
extern void Clear();
|
extern void Clear();
|
||||||
|
|
|
@ -229,7 +229,6 @@ class GSDeviceOGL : public GSDevice
|
||||||
{
|
{
|
||||||
uint32 wildhack:1;
|
uint32 wildhack:1;
|
||||||
uint32 bppz:2;
|
uint32 bppz:2;
|
||||||
// Next param will be handle by subroutine
|
|
||||||
uint32 tme:1;
|
uint32 tme:1;
|
||||||
uint32 fst:1;
|
uint32 fst:1;
|
||||||
|
|
||||||
|
@ -338,7 +337,6 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 tcoffsethack:1;
|
uint32 tcoffsethack:1;
|
||||||
//uint32 point_sampler:1; Not tested, so keep the bit for blend
|
//uint32 point_sampler:1; Not tested, so keep the bit for blend
|
||||||
uint32 iip:1;
|
uint32 iip:1;
|
||||||
// Next param will be handle by subroutine (broken currently)
|
|
||||||
uint32 colclip:2;
|
uint32 colclip:2;
|
||||||
uint32 atst:3;
|
uint32 atst:3;
|
||||||
|
|
||||||
|
|
|
@ -24,14 +24,8 @@
|
||||||
#include "GLState.h"
|
#include "GLState.h"
|
||||||
|
|
||||||
GSShaderOGL::GSShaderOGL(bool debug) :
|
GSShaderOGL::GSShaderOGL(bool debug) :
|
||||||
m_debug_shader(debug),
|
m_debug_shader(debug)
|
||||||
m_vs_sub_count(0),
|
|
||||||
m_ps_sub_count(0)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
memset(&m_vs_sub, 0, countof(m_vs_sub)*sizeof(m_vs_sub[0]));
|
|
||||||
memset(&m_ps_sub, 0, countof(m_ps_sub)*sizeof(m_ps_sub[0]));
|
|
||||||
|
|
||||||
m_single_prog.clear();
|
m_single_prog.clear();
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
gl_GenProgramPipelines(1, &m_pipeline);
|
gl_GenProgramPipelines(1, &m_pipeline);
|
||||||
|
@ -48,41 +42,17 @@ GSShaderOGL::~GSShaderOGL()
|
||||||
m_single_prog.clear();
|
m_single_prog.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSShaderOGL::VS(GLuint s, GLuint sub_count)
|
void GSShaderOGL::VS(GLuint s)
|
||||||
{
|
{
|
||||||
if (GLState::vs != s)
|
if (GLState::vs != s)
|
||||||
{
|
{
|
||||||
m_vs_sub_count = sub_count;
|
|
||||||
|
|
||||||
GLState::vs = s;
|
GLState::vs = s;
|
||||||
GLState::dirty_prog = true;
|
GLState::dirty_prog = true;
|
||||||
GLState::dirty_subroutine_vs = true;
|
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
if (GLLoader::found_GL_ARB_separate_shader_objects)
|
||||||
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
|
gl_UseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSShaderOGL::VS_subroutine(GLuint *sub)
|
|
||||||
{
|
|
||||||
if (!(m_vs_sub[0] == sub[0])) {
|
|
||||||
m_vs_sub[0] = sub[0];
|
|
||||||
GLState::dirty_subroutine_vs = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSShaderOGL::PS_subroutine(GLuint *sub)
|
|
||||||
{
|
|
||||||
// FIXME could be more efficient with GSvector
|
|
||||||
if (!(m_ps_sub[0] == sub[0] && m_ps_sub[1] == sub[1] && m_ps_sub[2] == sub[2] && m_ps_sub[3] == sub[3] && m_ps_sub[4] == sub[4])) {
|
|
||||||
m_ps_sub[0] = sub[0];
|
|
||||||
m_ps_sub[1] = sub[1];
|
|
||||||
m_ps_sub[2] = sub[2];
|
|
||||||
m_ps_sub[3] = sub[3];
|
|
||||||
m_ps_sub[4] = sub[4];
|
|
||||||
GLState::dirty_subroutine_ps = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSShaderOGL::PS_ressources(GLuint64 handle[2])
|
void GSShaderOGL::PS_ressources(GLuint64 handle[2])
|
||||||
{
|
{
|
||||||
if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) {
|
if (handle[0] != GLState::tex_handle[0] || handle[1] != GLState::tex_handle[1]) {
|
||||||
|
@ -92,7 +62,7 @@ void GSShaderOGL::PS_ressources(GLuint64 handle[2])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSShaderOGL::PS(GLuint s, GLuint sub_count)
|
void GSShaderOGL::PS(GLuint s)
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
if (true)
|
if (true)
|
||||||
|
@ -100,12 +70,9 @@ void GSShaderOGL::PS(GLuint s, GLuint sub_count)
|
||||||
if (GLState::ps != s)
|
if (GLState::ps != s)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
m_ps_sub_count = sub_count;
|
|
||||||
|
|
||||||
// In debug always sets the program. It allow to replace the program in apitrace easily.
|
// In debug always sets the program. It allow to replace the program in apitrace easily.
|
||||||
GLState::ps = s;
|
GLState::ps = s;
|
||||||
GLState::dirty_prog = true;
|
GLState::dirty_prog = true;
|
||||||
GLState::dirty_subroutine_ps = true;
|
|
||||||
GLState::dirty_ressources = true;
|
GLState::dirty_ressources = true;
|
||||||
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
|
gl_UseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, s);
|
||||||
|
@ -142,21 +109,6 @@ void GSShaderOGL::SetupRessources()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSShaderOGL::SetupSubroutineUniform()
|
|
||||||
{
|
|
||||||
if (!GLLoader::found_GL_ARB_shader_subroutine) return;
|
|
||||||
|
|
||||||
if (GLState::dirty_subroutine_vs && m_vs_sub_count) {
|
|
||||||
gl_UniformSubroutinesuiv(GL_VERTEX_SHADER, m_vs_sub_count, m_vs_sub);
|
|
||||||
GLState::dirty_subroutine_vs = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (GLState::dirty_subroutine_ps && m_ps_sub_count) {
|
|
||||||
gl_UniformSubroutinesuiv(GL_FRAGMENT_SHADER, m_ps_sub_count, m_ps_sub);
|
|
||||||
GLState::dirty_subroutine_ps = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GSShaderOGL::ValidateShader(GLuint s)
|
bool GSShaderOGL::ValidateShader(GLuint s)
|
||||||
{
|
{
|
||||||
if (!m_debug_shader) return true;
|
if (!m_debug_shader) return true;
|
||||||
|
@ -243,8 +195,6 @@ void GSShaderOGL::UseProgram()
|
||||||
|
|
||||||
if (GLState::dirty_prog) {
|
if (GLState::dirty_prog) {
|
||||||
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
|
if (!GLLoader::found_GL_ARB_separate_shader_objects) {
|
||||||
GLState::dirty_subroutine_vs = true;
|
|
||||||
GLState::dirty_subroutine_ps = true;
|
|
||||||
GLState::dirty_ressources = true;
|
GLState::dirty_ressources = true;
|
||||||
|
|
||||||
hash_map<uint64, GLuint >::iterator it;
|
hash_map<uint64, GLuint >::iterator it;
|
||||||
|
@ -277,8 +227,6 @@ void GSShaderOGL::UseProgram()
|
||||||
|
|
||||||
SetupRessources();
|
SetupRessources();
|
||||||
|
|
||||||
SetupSubroutineUniform();
|
|
||||||
|
|
||||||
GLState::dirty_prog = false;
|
GLState::dirty_prog = false;
|
||||||
|
|
||||||
GL_POP();
|
GL_POP();
|
||||||
|
@ -294,11 +242,6 @@ std::string GSShaderOGL::GenGlslHeader(const std::string& entry, GLenum type, co
|
||||||
// Need GL version 410
|
// Need GL version 410
|
||||||
header += "#extension GL_ARB_separate_shader_objects: require\n";
|
header += "#extension GL_ARB_separate_shader_objects: require\n";
|
||||||
}
|
}
|
||||||
if (GLLoader::found_GL_ARB_shader_subroutine && GLLoader::found_GL_ARB_explicit_uniform_location) {
|
|
||||||
// Need GL version 400
|
|
||||||
header += "#define SUBROUTINE_GL40 1\n";
|
|
||||||
header += "#extension GL_ARB_shader_subroutine: require\n";
|
|
||||||
}
|
|
||||||
if (GLLoader::found_GL_ARB_explicit_uniform_location) {
|
if (GLLoader::found_GL_ARB_explicit_uniform_location) {
|
||||||
// Need GL version 430
|
// Need GL version 430
|
||||||
header += "#extension GL_ARB_explicit_uniform_location: require\n";
|
header += "#extension GL_ARB_explicit_uniform_location: require\n";
|
||||||
|
|
|
@ -25,13 +25,7 @@ class GSShaderOGL {
|
||||||
GLuint m_pipeline;
|
GLuint m_pipeline;
|
||||||
hash_map<uint64, GLuint > m_single_prog;
|
hash_map<uint64, GLuint > m_single_prog;
|
||||||
const bool m_debug_shader;
|
const bool m_debug_shader;
|
||||||
GLuint m_vs_sub_count;
|
|
||||||
GLuint m_ps_sub_count;
|
|
||||||
|
|
||||||
GLuint m_vs_sub[1];
|
|
||||||
GLuint m_ps_sub[5];
|
|
||||||
|
|
||||||
void SetupSubroutineUniform();
|
|
||||||
void SetupRessources();
|
void SetupRessources();
|
||||||
|
|
||||||
bool ValidateShader(GLuint p);
|
bool ValidateShader(GLuint p);
|
||||||
|
@ -46,11 +40,9 @@ class GSShaderOGL {
|
||||||
~GSShaderOGL();
|
~GSShaderOGL();
|
||||||
|
|
||||||
void GS(GLuint s);
|
void GS(GLuint s);
|
||||||
void PS(GLuint s, GLuint sub_count = 0);
|
void PS(GLuint s);
|
||||||
void PS_subroutine(GLuint *sub);
|
|
||||||
void PS_ressources(GLuint64 handle[2]);
|
void PS_ressources(GLuint64 handle[2]);
|
||||||
void VS(GLuint s, GLuint sub_count = 0);
|
void VS(GLuint s);
|
||||||
void VS_subroutine(GLuint *sub);
|
|
||||||
|
|
||||||
void UseProgram();
|
void UseProgram();
|
||||||
|
|
||||||
|
|
|
@ -143,16 +143,7 @@ void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer*
|
||||||
|
|
||||||
void GSDeviceOGL::SetupVS(VSSelector sel)
|
void GSDeviceOGL::SetupVS(VSSelector sel)
|
||||||
{
|
{
|
||||||
if (GLLoader::found_GL_ARB_shader_subroutine) {
|
m_shader->VS(m_vs[sel]);
|
||||||
GLuint sub[1];
|
|
||||||
sub[0] = sel.tme ? 1 + (uint32)sel.fst : 0;
|
|
||||||
m_shader->VS_subroutine(sub);
|
|
||||||
// Handle by subroutine useless now
|
|
||||||
sel.tme = 0;
|
|
||||||
sel.fst = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_shader->VS(m_vs[sel], 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::SetupGS(GSSelector sel)
|
void GSDeviceOGL::SetupGS(GSSelector sel)
|
||||||
|
@ -162,38 +153,6 @@ void GSDeviceOGL::SetupGS(GSSelector sel)
|
||||||
|
|
||||||
void GSDeviceOGL::SetupPS(PSSelector sel)
|
void GSDeviceOGL::SetupPS(PSSelector sel)
|
||||||
{
|
{
|
||||||
if (GLLoader::found_GL_ARB_shader_subroutine) {
|
|
||||||
GLuint tfx = sel.tfx > 3 ? 19 : 11 + (uint32)sel.tfx + (uint32)sel.tcc*4;
|
|
||||||
|
|
||||||
GLuint colclip = 8 + (uint32)sel.colclip;
|
|
||||||
|
|
||||||
GLuint clamp =
|
|
||||||
(sel.wms == 2 && sel.wmt == 2) ? 20 :
|
|
||||||
(sel.wms == 2) ? 21 :
|
|
||||||
(sel.wmt == 2) ? 22 : 23;
|
|
||||||
|
|
||||||
GLuint wrap =
|
|
||||||
(sel.wms == 2 && sel.wmt == 2) ? 24 :
|
|
||||||
(sel.wms == 3 && sel.wmt == 3) ? 25 :
|
|
||||||
(sel.wms == 2 && sel.wmt == 3) ? 26 :
|
|
||||||
(sel.wms == 3 && sel.wmt == 2) ? 27 :
|
|
||||||
(sel.wms == 2) ? 28 :
|
|
||||||
(sel.wmt == 3) ? 29 :
|
|
||||||
(sel.wms == 3) ? 30 :
|
|
||||||
(sel.wmt == 2) ? 31 : 32;
|
|
||||||
|
|
||||||
GLuint sub[5] = {sel.atst, colclip, tfx, clamp, wrap};
|
|
||||||
|
|
||||||
m_shader->PS_subroutine(sub);
|
|
||||||
// Handle by subroutine useless now
|
|
||||||
sel.atst = 0;
|
|
||||||
sel.colclip = 0;
|
|
||||||
sel.tfx = 0;
|
|
||||||
sel.tcc = 0;
|
|
||||||
// sel.wms = 0;
|
|
||||||
// sel.wmt = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *************************************************************
|
// *************************************************************
|
||||||
// Static
|
// Static
|
||||||
// *************************************************************
|
// *************************************************************
|
||||||
|
@ -210,7 +169,7 @@ void GSDeviceOGL::SetupPS(PSSelector sel)
|
||||||
// *************************************************************
|
// *************************************************************
|
||||||
// Dynamic
|
// Dynamic
|
||||||
// *************************************************************
|
// *************************************************************
|
||||||
m_shader->PS(ps, 3);
|
m_shader->PS(ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)
|
void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel)
|
||||||
|
|
|
@ -80,7 +80,6 @@ void GSWndGL::PopulateGlFunction()
|
||||||
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
|
*(void**)&(gl_ClientWaitSync) = GetProcAddress("glClientWaitSync");
|
||||||
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
|
*(void**)&(gl_FlushMappedBufferRange) = GetProcAddress("glFlushMappedBufferRange");
|
||||||
// GL4.0
|
// GL4.0
|
||||||
*(void**)&(gl_UniformSubroutinesuiv) = GetProcAddress("glUniformSubroutinesuiv", true);
|
|
||||||
*(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true);
|
*(void**)&(gl_BlendEquationSeparateiARB) = GetProcAddress("glBlendEquationSeparateiARB", true);
|
||||||
*(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true);
|
*(void**)&(gl_BlendFuncSeparateiARB) = GetProcAddress("glBlendFuncSeparateiARB", true);
|
||||||
// GL4.1
|
// GL4.1
|
||||||
|
|
|
@ -80,19 +80,6 @@ layout(std140, binding = 21) uniform cb21
|
||||||
vec2 TC_OffsetHack;
|
vec2 TC_OffsetHack;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SUBROUTINE_GL40
|
|
||||||
// Function pointer type + the functionn pointer variable
|
|
||||||
subroutine void AlphaTestType(vec4 c);
|
|
||||||
layout(location = 0) subroutine uniform AlphaTestType atst;
|
|
||||||
|
|
||||||
subroutine vec4 TfxType(vec4 t, vec4 c);
|
|
||||||
layout(location = 2) subroutine uniform TfxType tfx;
|
|
||||||
|
|
||||||
subroutine void ColClipType(inout vec4 c);
|
|
||||||
layout(location = 1) subroutine uniform ColClipType colclip;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
vec4 sample_c(vec2 uv)
|
vec4 sample_c(vec2 uv)
|
||||||
{
|
{
|
||||||
// FIXME: check the issue on openGL
|
// FIXME: check the issue on openGL
|
||||||
|
@ -291,7 +278,6 @@ vec4 sample_color(vec2 st, float q)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME Precompute the factor 255/128 in VS
|
// FIXME Precompute the factor 255/128 in VS
|
||||||
#ifndef SUBROUTINE_GL40
|
|
||||||
vec4 tfx(vec4 t, vec4 c)
|
vec4 tfx(vec4 t, vec4 c)
|
||||||
{
|
{
|
||||||
vec4 c_out = c;
|
vec4 c_out = c;
|
||||||
|
@ -319,9 +305,7 @@ vec4 tfx(vec4 t, vec4 c)
|
||||||
|
|
||||||
return c_out;
|
return c_out;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef SUBROUTINE_GL40
|
|
||||||
void atst(vec4 c)
|
void atst(vec4 c)
|
||||||
{
|
{
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
float a = trunc(c.a * 255.0 + 0.01);
|
||||||
|
@ -350,9 +334,7 @@ void atst(vec4 c)
|
||||||
discard;
|
discard;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef SUBROUTINE_GL40
|
|
||||||
void colclip(inout vec4 c)
|
void colclip(inout vec4 c)
|
||||||
{
|
{
|
||||||
#if (PS_COLCLIP == 2)
|
#if (PS_COLCLIP == 2)
|
||||||
|
@ -363,7 +345,6 @@ void colclip(inout vec4 c)
|
||||||
c.rgb *= vec3(factor);
|
c.rgb *= vec3(factor);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
void fog(inout vec4 c, float f)
|
void fog(inout vec4 c, float f)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,285 +0,0 @@
|
||||||
//#version 420 // Keep it for text editor detection
|
|
||||||
|
|
||||||
// Subroutine of standard fs function (I don't know if it will be ever used one day)
|
|
||||||
|
|
||||||
// FIXME crash nvidia
|
|
||||||
#if 0
|
|
||||||
// Function pointer type
|
|
||||||
subroutine vec4 WrapType(vec4 uv);
|
|
||||||
|
|
||||||
// a function pointer variable
|
|
||||||
layout(location = 4) subroutine uniform WrapType wrapuv;
|
|
||||||
|
|
||||||
layout(index = 24) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms_wmt_2(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 25) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms_wmt3(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 26) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms2_wmt3(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
|
|
||||||
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 27) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms3_wmt2(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
|
|
||||||
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 28) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms2_wmtx(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 29) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wmsx_wmt3(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 30) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wms3_wmtx(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 31) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_wmsx_wmt2(vec4 uv)
|
|
||||||
{
|
|
||||||
vec4 uv_out = uv;
|
|
||||||
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 32) subroutine(WrapType)
|
|
||||||
vec4 wrapuv_dummy(vec4 uv)
|
|
||||||
{
|
|
||||||
return uv;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// FIXME crash nvidia
|
|
||||||
#if 0
|
|
||||||
// Function pointer type
|
|
||||||
subroutine vec2 ClampType(vec2 uv);
|
|
||||||
|
|
||||||
// a function pointer variable
|
|
||||||
layout(location = 3) subroutine uniform ClampType clampuv;
|
|
||||||
|
|
||||||
layout(index = 20) subroutine(ClampType)
|
|
||||||
vec2 clampuv_wms2_wmt2(vec2 uv)
|
|
||||||
{
|
|
||||||
return clamp(uv, MinF, MinMax.zw);
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 21) subroutine(ClampType)
|
|
||||||
vec2 clampuv_wms2(vec2 uv)
|
|
||||||
{
|
|
||||||
vec2 uv_out = uv;
|
|
||||||
uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 22) subroutine(ClampType)
|
|
||||||
vec2 clampuv_wmt2(vec2 uv)
|
|
||||||
{
|
|
||||||
vec2 uv_out = uv;
|
|
||||||
uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
|
|
||||||
return uv_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 23) subroutine(ClampType)
|
|
||||||
vec2 clampuv_dummy(vec2 uv)
|
|
||||||
{
|
|
||||||
return uv;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUBROUTINE_GL40
|
|
||||||
layout(index = 11) subroutine(TfxType)
|
|
||||||
vec4 tfx_0_tcc_0(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 12) subroutine(TfxType)
|
|
||||||
vec4 tfx_1_tcc_0(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = t.rgb;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 13) subroutine(TfxType)
|
|
||||||
vec4 tfx_2_tcc_0(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 14) subroutine(TfxType)
|
|
||||||
vec4 tfx_3_tcc_0(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 15) subroutine(TfxType)
|
|
||||||
vec4 tfx_0_tcc_1(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out = c * t * 255.0f / 128.0f;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 16) subroutine(TfxType)
|
|
||||||
vec4 tfx_1_tcc_1(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out = t;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 17) subroutine(TfxType)
|
|
||||||
vec4 tfx_2_tcc_1(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
|
|
||||||
c_out.a += t.a;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 18) subroutine(TfxType)
|
|
||||||
vec4 tfx_3_tcc_1(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
vec4 c_out = c;
|
|
||||||
c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;
|
|
||||||
c_out.a = t.a;
|
|
||||||
return c_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 19) subroutine(TfxType)
|
|
||||||
vec4 tfx_dummy(vec4 t, vec4 c)
|
|
||||||
{
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUBROUTINE_GL40
|
|
||||||
layout(index = 0) subroutine(AlphaTestType)
|
|
||||||
void atest_never(vec4 c)
|
|
||||||
{
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 1) subroutine(AlphaTestType)
|
|
||||||
void atest_always(vec4 c)
|
|
||||||
{
|
|
||||||
// Nothing to do
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 2) subroutine(AlphaTestType)
|
|
||||||
void atest_l(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if (PS_SPRITEHACK == 0)
|
|
||||||
if ((AREF - a - 0.5f) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 3) subroutine(AlphaTestType)
|
|
||||||
void atest_le(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if ((AREF - a + 0.5f) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 4) subroutine(AlphaTestType)
|
|
||||||
void atest_e(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if ((0.5f - abs(a - AREF)) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 5) subroutine(AlphaTestType)
|
|
||||||
void atest_ge(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if ((a-AREF + 0.5f) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 6) subroutine(AlphaTestType)
|
|
||||||
void atest_g(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if ((a-AREF - 0.5f) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 7) subroutine(AlphaTestType)
|
|
||||||
void atest_ne(vec4 c)
|
|
||||||
{
|
|
||||||
float a = trunc(c.a * 255.0 + 0.01);
|
|
||||||
if ((abs(a - AREF) - 0.5f) < 0.0f)
|
|
||||||
discard;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SUBROUTINE_GL40
|
|
||||||
layout(index = 8) subroutine(ColClipType)
|
|
||||||
void colclip_0(inout vec4 c)
|
|
||||||
{
|
|
||||||
// nothing to do
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 9) subroutine(ColClipType)
|
|
||||||
void colclip_1(inout vec4 c)
|
|
||||||
{
|
|
||||||
// FIXME !!!!
|
|
||||||
//c.rgb *= c.rgb < 128./255;
|
|
||||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
|
||||||
c.rgb *= vec3(factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 10) subroutine(ColClipType)
|
|
||||||
void colclip_2(inout vec4 c)
|
|
||||||
{
|
|
||||||
c.rgb = 256.0f/255.0f - c.rgb;
|
|
||||||
// FIXME !!!!
|
|
||||||
//c.rgb *= c.rgb < 128./255;
|
|
||||||
bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
|
|
||||||
c.rgb *= vec3(factor);
|
|
||||||
}
|
|
||||||
#endif
|
|
|
@ -42,36 +42,6 @@ const float exp_min32 = exp2(-32.0f);
|
||||||
const float exp_min31 = exp2(-31.0f);
|
const float exp_min31 = exp2(-31.0f);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SUBROUTINE_GL40
|
|
||||||
// Function pointer type
|
|
||||||
subroutine void TextureCoordType(void);
|
|
||||||
|
|
||||||
// a function pointer variable
|
|
||||||
layout(location = 0) subroutine uniform TextureCoordType texture_coord;
|
|
||||||
|
|
||||||
layout(index = 0) subroutine(TextureCoordType)
|
|
||||||
void tme_0()
|
|
||||||
{
|
|
||||||
VSout_t.xy = vec2(0.0f, 0.0f);
|
|
||||||
VSout_t.w = 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 1) subroutine(TextureCoordType)
|
|
||||||
void tme_1_fst_0()
|
|
||||||
{
|
|
||||||
VSout_t.xy = i_st;
|
|
||||||
VSout_t.w = i_q;
|
|
||||||
}
|
|
||||||
|
|
||||||
layout(index = 2) subroutine(TextureCoordType)
|
|
||||||
void tme_1_fst_1()
|
|
||||||
{
|
|
||||||
VSout_t.xy = vec2(i_uv) * TextureScale;
|
|
||||||
VSout_t.w = 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
void texture_coord()
|
void texture_coord()
|
||||||
{
|
{
|
||||||
if(VS_TME != 0)
|
if(VS_TME != 0)
|
||||||
|
@ -98,8 +68,6 @@ void texture_coord()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void vs_main()
|
void vs_main()
|
||||||
{
|
{
|
||||||
highp uint z;
|
highp uint z;
|
||||||
|
|
|
@ -650,36 +650,6 @@ static const char* tfx_vgs_glsl =
|
||||||
"const float exp_min31 = exp2(-31.0f);\n"
|
"const float exp_min31 = exp2(-31.0f);\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#ifdef SUBROUTINE_GL40\n"
|
|
||||||
"// Function pointer type\n"
|
|
||||||
"subroutine void TextureCoordType(void);\n"
|
|
||||||
"\n"
|
|
||||||
"// a function pointer variable\n"
|
|
||||||
"layout(location = 0) subroutine uniform TextureCoordType texture_coord;\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 0) subroutine(TextureCoordType)\n"
|
|
||||||
"void tme_0()\n"
|
|
||||||
"{\n"
|
|
||||||
" VSout_t.xy = vec2(0.0f, 0.0f);\n"
|
|
||||||
" VSout_t.w = 1.0f;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 1) subroutine(TextureCoordType)\n"
|
|
||||||
"void tme_1_fst_0()\n"
|
|
||||||
"{\n"
|
|
||||||
" VSout_t.xy = i_st;\n"
|
|
||||||
" VSout_t.w = i_q;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 2) subroutine(TextureCoordType)\n"
|
|
||||||
"void tme_1_fst_1()\n"
|
|
||||||
"{\n"
|
|
||||||
" VSout_t.xy = vec2(i_uv) * TextureScale;\n"
|
|
||||||
" VSout_t.w = 1.0f;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"#else\n"
|
|
||||||
"\n"
|
|
||||||
"void texture_coord()\n"
|
"void texture_coord()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" if(VS_TME != 0)\n"
|
" if(VS_TME != 0)\n"
|
||||||
|
@ -706,8 +676,6 @@ static const char* tfx_vgs_glsl =
|
||||||
" }\n"
|
" }\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"void vs_main()\n"
|
"void vs_main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" highp uint z;\n"
|
" highp uint z;\n"
|
||||||
|
@ -973,19 +941,6 @@ static const char* tfx_fs_all_glsl =
|
||||||
" vec2 TC_OffsetHack;\n"
|
" vec2 TC_OffsetHack;\n"
|
||||||
"};\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#ifdef SUBROUTINE_GL40\n"
|
|
||||||
"// Function pointer type + the functionn pointer variable\n"
|
|
||||||
"subroutine void AlphaTestType(vec4 c);\n"
|
|
||||||
"layout(location = 0) subroutine uniform AlphaTestType atst;\n"
|
|
||||||
"\n"
|
|
||||||
"subroutine vec4 TfxType(vec4 t, vec4 c);\n"
|
|
||||||
"layout(location = 2) subroutine uniform TfxType tfx;\n"
|
|
||||||
"\n"
|
|
||||||
"subroutine void ColClipType(inout vec4 c);\n"
|
|
||||||
"layout(location = 1) subroutine uniform ColClipType colclip;\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"\n"
|
|
||||||
"vec4 sample_c(vec2 uv)\n"
|
"vec4 sample_c(vec2 uv)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" // FIXME: check the issue on openGL\n"
|
" // FIXME: check the issue on openGL\n"
|
||||||
|
@ -1184,7 +1139,6 @@ static const char* tfx_fs_all_glsl =
|
||||||
"}\n"
|
"}\n"
|
||||||
"\n"
|
"\n"
|
||||||
"// FIXME Precompute the factor 255/128 in VS\n"
|
"// FIXME Precompute the factor 255/128 in VS\n"
|
||||||
"#ifndef SUBROUTINE_GL40\n"
|
|
||||||
"vec4 tfx(vec4 t, vec4 c)\n"
|
"vec4 tfx(vec4 t, vec4 c)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" vec4 c_out = c;\n"
|
" vec4 c_out = c;\n"
|
||||||
|
@ -1212,9 +1166,7 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
" return c_out;\n"
|
" return c_out;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
"#ifndef SUBROUTINE_GL40\n"
|
|
||||||
"void atst(vec4 c)\n"
|
"void atst(vec4 c)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||||
|
@ -1243,9 +1195,7 @@ static const char* tfx_fs_all_glsl =
|
||||||
" discard;\n"
|
" discard;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
"#ifndef SUBROUTINE_GL40\n"
|
|
||||||
"void colclip(inout vec4 c)\n"
|
"void colclip(inout vec4 c)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
"#if (PS_COLCLIP == 2)\n"
|
"#if (PS_COLCLIP == 2)\n"
|
||||||
|
@ -1256,7 +1206,6 @@ static const char* tfx_fs_all_glsl =
|
||||||
" c.rgb *= vec3(factor);\n"
|
" c.rgb *= vec3(factor);\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
"void fog(inout vec4 c, float f)\n"
|
"void fog(inout vec4 c, float f)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
|
@ -1516,291 +1465,6 @@ static const char* tfx_fs_all_glsl =
|
||||||
"}\n"
|
"}\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"//#version 420 // Keep it for text editor detection\n"
|
|
||||||
"\n"
|
|
||||||
"// Subroutine of standard fs function (I don't know if it will be ever used one day)\n"
|
|
||||||
"\n"
|
|
||||||
"// FIXME crash nvidia\n"
|
|
||||||
"#if 0\n"
|
|
||||||
"// Function pointer type\n"
|
|
||||||
"subroutine vec4 WrapType(vec4 uv);\n"
|
|
||||||
"\n"
|
|
||||||
"// a function pointer variable\n"
|
|
||||||
"layout(location = 4) subroutine uniform WrapType wrapuv;\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 24) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms_wmt_2(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 25) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms_wmt3(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 26) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms2_wmt3(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
|
|
||||||
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 27) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms3_wmt2(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
|
|
||||||
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 28) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms2_wmtx(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 29) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wmsx_wmt3(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 30) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wms3_wmtx(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 31) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_wmsx_wmt2(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 uv_out = uv;\n"
|
|
||||||
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 32) subroutine(WrapType)\n"
|
|
||||||
"vec4 wrapuv_dummy(vec4 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" return uv;\n"
|
|
||||||
"}\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"// FIXME crash nvidia\n"
|
|
||||||
"#if 0\n"
|
|
||||||
"// Function pointer type\n"
|
|
||||||
"subroutine vec2 ClampType(vec2 uv);\n"
|
|
||||||
"\n"
|
|
||||||
"// a function pointer variable\n"
|
|
||||||
"layout(location = 3) subroutine uniform ClampType clampuv;\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 20) subroutine(ClampType)\n"
|
|
||||||
"vec2 clampuv_wms2_wmt2(vec2 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" return clamp(uv, MinF, MinMax.zw);\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 21) subroutine(ClampType)\n"
|
|
||||||
"vec2 clampuv_wms2(vec2 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec2 uv_out = uv;\n"
|
|
||||||
" uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 22) subroutine(ClampType)\n"
|
|
||||||
"vec2 clampuv_wmt2(vec2 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec2 uv_out = uv;\n"
|
|
||||||
" uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n"
|
|
||||||
" return uv_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 23) subroutine(ClampType)\n"
|
|
||||||
"vec2 clampuv_dummy(vec2 uv)\n"
|
|
||||||
"{\n"
|
|
||||||
" return uv;\n"
|
|
||||||
"}\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"#ifdef SUBROUTINE_GL40\n"
|
|
||||||
"layout(index = 11) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 12) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = t.rgb;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 13) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 14) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 15) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out = c * t * 255.0f / 128.0f;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 16) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out = t;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 17) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
|
|
||||||
" c_out.a += t.a;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 18) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" vec4 c_out = c;\n"
|
|
||||||
" c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n"
|
|
||||||
" c_out.a = t.a;\n"
|
|
||||||
" return c_out;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 19) subroutine(TfxType)\n"
|
|
||||||
"vec4 tfx_dummy(vec4 t, vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" return c;\n"
|
|
||||||
"}\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"#ifdef SUBROUTINE_GL40\n"
|
|
||||||
"layout(index = 0) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_never(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 1) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_always(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" // Nothing to do\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 2) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_l(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if (PS_SPRITEHACK == 0)\n"
|
|
||||||
" if ((AREF - a - 0.5f) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 3) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_le(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if ((AREF - a + 0.5f) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 4) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_e(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if ((0.5f - abs(a - AREF)) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 5) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_ge(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if ((a-AREF + 0.5f) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 6) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_g(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if ((a-AREF - 0.5f) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 7) subroutine(AlphaTestType)\n"
|
|
||||||
"void atest_ne(vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
|
||||||
" if ((abs(a - AREF) - 0.5f) < 0.0f)\n"
|
|
||||||
" discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"#endif\n"
|
|
||||||
"\n"
|
|
||||||
"#ifdef SUBROUTINE_GL40\n"
|
|
||||||
"layout(index = 8) subroutine(ColClipType)\n"
|
|
||||||
"void colclip_0(inout vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" // nothing to do\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 9) subroutine(ColClipType)\n"
|
|
||||||
"void colclip_1(inout vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" // FIXME !!!!\n"
|
|
||||||
" //c.rgb *= c.rgb < 128./255;\n"
|
|
||||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
|
||||||
" c.rgb *= vec3(factor);\n"
|
|
||||||
"}\n"
|
|
||||||
"\n"
|
|
||||||
"layout(index = 10) subroutine(ColClipType)\n"
|
|
||||||
"void colclip_2(inout vec4 c)\n"
|
|
||||||
"{\n"
|
|
||||||
" c.rgb = 256.0f/255.0f - c.rgb;\n"
|
|
||||||
" // FIXME !!!!\n"
|
|
||||||
" //c.rgb *= c.rgb < 128./255;\n"
|
|
||||||
" bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n"
|
|
||||||
" c.rgb *= vec3(factor);\n"
|
|
||||||
"}\n"
|
|
||||||
"#endif\n"
|
|
||||||
;
|
;
|
||||||
|
|
||||||
static const char* fxaa_fx =
|
static const char* fxaa_fx =
|
||||||
|
|
Loading…
Reference in New Issue