From 15264c6c63637f775a9fb1834f8390e673808f06 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 17 Apr 2015 20:18:07 +0200 Subject: [PATCH] glsl: split the main shader * separate VS/GS and FS * separate subroutine part of the FS It already complex enough without subroutine stuff. Besides I'm not sure we will keep subroutine on the future. --- linux_various/glsl2h.pl | 33 +- plugins/GSdx/GSDeviceOGL.cpp | 6 +- plugins/GSdx/res/glsl_source.h | 712 ++++++++++++------------ plugins/GSdx/res/tfx_fs.glsl | 539 ++++++++++++++++++ plugins/GSdx/res/tfx_fs_subroutine.glsl | 285 ++++++++++ plugins/GSdx/res/tfx_vgs.glsl | 258 +++++++++ 6 files changed, 1462 insertions(+), 371 deletions(-) create mode 100644 plugins/GSdx/res/tfx_fs.glsl create mode 100644 plugins/GSdx/res/tfx_fs_subroutine.glsl create mode 100644 plugins/GSdx/res/tfx_vgs.glsl diff --git a/linux_various/glsl2h.pl b/linux_various/glsl2h.pl index 51b848e38e..4adb3c1b45 100755 --- a/linux_various/glsl2h.pl +++ b/linux_various/glsl2h.pl @@ -34,16 +34,45 @@ eval { print "Disable MD5\n"; }; -my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx.glsl fxaa.fx/; +######################## +# GSdx +######################## +my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx_vgs.glsl tfx_fs_all.glsl fxaa.fx/; my $gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res"); +# Just a hack to reuse glsl2h function easily +my @tfx_res = qw/tfx_fs.glsl tfx_fs_subroutine.glsl/; +my $tfx_all = File::Spec->catdir($gsdx_path, "tfx_fs_all.glsl"); +concat($gsdx_path, $tfx_all, \@tfx_res); + my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h"); glsl2h($gsdx_path, $gsdx_out, \@gsdx_res); +unlink $tfx_all; + +######################## +# ZZOGL +######################## my @zz_res = qw/ps2hw_gl4.glsl/; my $zz_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "zzogl-pg", "opengl"); my $zz_out = File::Spec->catdir($zz_path, "ps2hw_gl4.h"); glsl2h($zz_path, $zz_out, \@zz_res); +sub concat { + my $in_dir = shift; + my $out_file = shift; + my $glsl_files = shift; + + my $line; + open(my $TMP, ">$out_file"); + foreach my $file (@{$glsl_files}) { + open(my $GLSL, File::Spec->catfile($in_dir, $file)) or die "$! : $file"; + while(defined($line = <$GLSL>)) { + print $TMP $line; + } + } + +} + sub glsl2h { my $in_dir = shift; my $out_file = shift; @@ -87,7 +116,7 @@ EOS $name =~ s/\./_/; $data .= "\nstatic const char* $name =\n"; - open(my $GLSL, File::Spec->catfile($in_dir, $file)) or die; + open(my $GLSL, File::Spec->catfile($in_dir, $file)) or die "$! : $file"; my $line; while(defined($line = <$GLSL>)) { chomp $line; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 88018adfe5..814af10b40 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -662,7 +662,7 @@ GLuint GSDeviceOGL::CompileVS(VSSelector sel) + format("#define VS_WILDHACK %d\n", sel.wildhack) ; - return m_shader->Compile("tfx.glsl", "vs_main", GL_VERTEX_SHADER, tfx_glsl, macro); + return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, macro); } /* Note: must be here because tfx_glsl is static */ @@ -671,7 +671,7 @@ GLuint GSDeviceOGL::CompileGS() #ifdef ENABLE_GLES return 0; #else - return m_shader->Compile("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, tfx_glsl, ""); + return m_shader->Compile("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, tfx_vgs_glsl, ""); #endif } @@ -699,7 +699,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_IIP %d\n", sel.iip) ; - return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_glsl, macro); + return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); } GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index e3b262bfe9..8516c51576 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -448,27 +448,9 @@ static const char* shadeboost_glsl = "#endif\n" ; -static const char* tfx_glsl = +static const char* tfx_vgs_glsl = "//#version 420 // Keep it for text editor detection\n" "\n" - "// note lerp => mix\n" - "\n" - "#define FMT_32 0\n" - "#define FMT_24 1\n" - "#define FMT_16 2\n" - "#define FMT_PAL 4 /* flag bit */\n" - "\n" - "// APITRACE_DEBUG allows to force pixel output to easily detect\n" - "// the fragment computed by primitive\n" - "#define APITRACE_DEBUG 0\n" - "// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n" - "// to detect bad sampling due to upscaling\n" - "//#define TEX_COORD_DEBUG\n" - "\n" - "// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n" - "// And I say this as an ATI user.\n" - "#define ATI_SUCKS 0\n" - "\n" "#ifndef VS_BPPZ\n" "#define VS_BPPZ 0\n" "#define VS_TME 1\n" @@ -476,28 +458,6 @@ static const char* tfx_glsl = "#define VS_LOGZ 0\n" "#endif\n" "\n" - "#ifndef PS_FST\n" - "#define PS_FST 0\n" - "#define PS_WMS 0\n" - "#define PS_WMT 0\n" - "#define PS_FMT FMT_32\n" - "#define PS_AEM 0\n" - "#define PS_TFX 0\n" - "#define PS_TCC 1\n" - "#define PS_ATST 1\n" - "#define PS_FOG 0\n" - "#define PS_CLR1 0\n" - "#define PS_FBA 0\n" - "#define PS_AOUT 0\n" - "#define PS_LTF 1\n" - "#define PS_COLCLIP 0\n" - "#define PS_DATE 0\n" - "#define PS_SPRITEHACK 0\n" - "#define PS_POINT_SAMPLER 0\n" - "#define PS_TCOFFSETHACK 0\n" - "#define PS_IIP 1\n" - "#endif\n" - "\n" "struct vertex\n" "{\n" " vec4 t;\n" @@ -747,6 +707,50 @@ static const char* tfx_glsl = "}\n" "\n" "#endif\n" + ; + +static const char* tfx_fs_all_glsl = + "//#version 420 // Keep it for text editor detection\n" + "\n" + "// note lerp => mix\n" + "\n" + "#define FMT_32 0\n" + "#define FMT_24 1\n" + "#define FMT_16 2\n" + "#define FMT_PAL 4 /* flag bit */\n" + "\n" + "// APITRACE_DEBUG allows to force pixel output to easily detect\n" + "// the fragment computed by primitive\n" + "#define APITRACE_DEBUG 0\n" + "// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n" + "// to detect bad sampling due to upscaling\n" + "//#define TEX_COORD_DEBUG\n" + "\n" + "// Not sure we have same issue on opengl. Doesn't work anyway on ATI card\n" + "// And I say this as an ATI user.\n" + "#define ATI_SUCKS 0\n" + "\n" + "#ifndef PS_FST\n" + "#define PS_FST 0\n" + "#define PS_WMS 0\n" + "#define PS_WMT 0\n" + "#define PS_FMT FMT_32\n" + "#define PS_AEM 0\n" + "#define PS_TFX 0\n" + "#define PS_TCC 1\n" + "#define PS_ATST 1\n" + "#define PS_FOG 0\n" + "#define PS_CLR1 0\n" + "#define PS_FBA 0\n" + "#define PS_AOUT 0\n" + "#define PS_LTF 1\n" + "#define PS_COLCLIP 0\n" + "#define PS_DATE 0\n" + "#define PS_SPRITEHACK 0\n" + "#define PS_POINT_SAMPLER 0\n" + "#define PS_TCOFFSETHACK 0\n" + "#define PS_IIP 1\n" + "#endif\n" "\n" "#ifdef FRAGMENT_SHADER\n" "\n" @@ -807,6 +811,19 @@ static const char* tfx_glsl = " vec4 TC_OffsetHack;\n" "};\n" "\n" + "#ifdef SUBROUTINE_GL40\n" + "// Function pointer type + the functionn pointer variable\n" + "subroutine void AlphaTestType(vec4 c);\n" + "layout(location = 0) subroutine uniform AlphaTestType atst;\n" + "\n" + "subroutine vec4 TfxType(vec4 t, vec4 c);\n" + "layout(location = 2) subroutine uniform TfxType tfx;\n" + "\n" + "subroutine void ColClipType(inout vec4 c);\n" + "layout(location = 1) subroutine uniform ColClipType colclip;\n" + "#endif\n" + "\n" + "\n" "vec4 sample_c(vec2 uv)\n" "{\n" " // FIXME: check the issue on openGL\n" @@ -830,94 +847,6 @@ static const char* tfx_glsl = " return texture(PaletteSampler, vec2(u, 0.0f));\n" "}\n" "\n" - "#if 0\n" - "vec4 sample_rt(vec2 uv)\n" - "{\n" - " return texture(RTCopySampler, uv);\n" - "}\n" - "#endif\n" - "\n" - "// FIXME crash nvidia\n" - "#if 0\n" - "// Function pointer type\n" - "subroutine vec4 WrapType(vec4 uv);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 4) subroutine uniform WrapType wrapuv;\n" - "\n" - "layout(index = 24) subroutine(WrapType)\n" - "vec4 wrapuv_wms_wmt_2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 25) subroutine(WrapType)\n" - "vec4 wrapuv_wms_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 26) subroutine(WrapType)\n" - "vec4 wrapuv_wms2_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 27) subroutine(WrapType)\n" - "vec4 wrapuv_wms3_wmt2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 28) subroutine(WrapType)\n" - "vec4 wrapuv_wms2_wmtx(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 29) subroutine(WrapType)\n" - "vec4 wrapuv_wmsx_wmt3(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 30) subroutine(WrapType)\n" - "vec4 wrapuv_wms3_wmtx(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 31) subroutine(WrapType)\n" - "vec4 wrapuv_wmsx_wmt2(vec4 uv)\n" - "{\n" - " vec4 uv_out = uv;\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 32) subroutine(WrapType)\n" - "vec4 wrapuv_dummy(vec4 uv)\n" - "{\n" - " return uv;\n" - "}\n" - "\n" - "#else\n" "vec4 wrapuv(vec4 uv)\n" "{\n" " vec4 uv_out = uv;\n" @@ -955,45 +884,7 @@ static const char* tfx_glsl = "\n" " return uv_out;\n" "}\n" - "#endif\n" "\n" - "// FIXME crash nvidia\n" - "#if 0\n" - "// Function pointer type\n" - "subroutine vec2 ClampType(vec2 uv);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 3) subroutine uniform ClampType clampuv;\n" - "\n" - "layout(index = 20) subroutine(ClampType)\n" - "vec2 clampuv_wms2_wmt2(vec2 uv)\n" - "{\n" - " return clamp(uv, MinF, MinMax.zw);\n" - "}\n" - "\n" - "layout(index = 21) subroutine(ClampType)\n" - "vec2 clampuv_wms2(vec2 uv)\n" - "{\n" - " vec2 uv_out = uv;\n" - " uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 22) subroutine(ClampType)\n" - "vec2 clampuv_wmt2(vec2 uv)\n" - "{\n" - " vec2 uv_out = uv;\n" - " uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n" - " return uv_out;\n" - "}\n" - "\n" - "layout(index = 23) subroutine(ClampType)\n" - "vec2 clampuv_dummy(vec2 uv)\n" - "{\n" - " return uv;\n" - "}\n" - "\n" - "#else\n" "vec2 clampuv(vec2 uv)\n" "{\n" " vec2 uv_out = uv;\n" @@ -1013,7 +904,6 @@ static const char* tfx_glsl = "\n" " return uv_out;\n" "}\n" - "#endif\n" "\n" "mat4 sample_4c(vec4 uv)\n" "{\n" @@ -1128,86 +1018,7 @@ static const char* tfx_glsl = " return t;\n" "}\n" "\n" - "#ifdef SUBROUTINE_GL40\n" - "// Function pointer type\n" - "subroutine vec4 TfxType(vec4 t, vec4 c);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 2) subroutine uniform TfxType tfx;\n" - "\n" - "layout(index = 11) subroutine(TfxType)\n" - "vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 12) subroutine(TfxType)\n" - "vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = t.rgb;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 13) subroutine(TfxType)\n" - "vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 14) subroutine(TfxType)\n" - "vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 15) subroutine(TfxType)\n" - "vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out = c * t * 255.0f / 128.0f;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 16) subroutine(TfxType)\n" - "vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out = t;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 17) subroutine(TfxType)\n" - "vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " c_out.a += t.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 18) subroutine(TfxType)\n" - "vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n" - "{\n" - " vec4 c_out = c;\n" - " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" - " c_out.a = t.a;\n" - " return c_out;\n" - "}\n" - "\n" - "layout(index = 19) subroutine(TfxType)\n" - "vec4 tfx_dummy(vec4 t, vec4 c)\n" - "{\n" - " return c;\n" - "}\n" - "\n" - "#else\n" + "#ifndef SUBROUTINE_GL40\n" "vec4 tfx(vec4 t, vec4 c)\n" "{\n" " vec4 c_out = c;\n" @@ -1256,91 +1067,7 @@ static const char* tfx_glsl = "}\n" "#endif\n" "\n" - "\n" - "#if 0\n" - "void datst()\n" - "{\n" - "#if PS_DATE > 0\n" - " float alpha = sample_rt(PSin_tp.xy).a;\n" - " float alpha0x80 = 128.0 / 255;\n" - "\n" - " if (PS_DATE == 1 && alpha >= alpha0x80)\n" - " discard;\n" - " else if (PS_DATE == 2 && alpha < alpha0x80)\n" - " discard;\n" - "#endif\n" - "}\n" - "#endif\n" - "\n" - "#ifdef SUBROUTINE_GL40\n" - "// Function pointer type\n" - "subroutine void AlphaTestType(vec4 c);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 0) subroutine uniform AlphaTestType atst;\n" - "\n" - "layout(index = 0) subroutine(AlphaTestType)\n" - "void atest_never(vec4 c)\n" - "{\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 1) subroutine(AlphaTestType)\n" - "void atest_always(vec4 c)\n" - "{\n" - " // Nothing to do\n" - "}\n" - "\n" - "layout(index = 2) subroutine(AlphaTestType)\n" - "void atest_l(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if (PS_SPRITEHACK == 0)\n" - " if ((AREF - a - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 3) subroutine(AlphaTestType)\n" - "void atest_le(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((AREF - a + 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 4) subroutine(AlphaTestType)\n" - "void atest_e(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((0.5f - abs(a - AREF)) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 5) subroutine(AlphaTestType)\n" - "void atest_ge(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((a-AREF + 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 6) subroutine(AlphaTestType)\n" - "void atest_g(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((a-AREF - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "layout(index = 7) subroutine(AlphaTestType)\n" - "void atest_ne(vec4 c)\n" - "{\n" - " float a = trunc(c.a * 255.0 + 0.01);\n" - " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" - " discard;\n" - "}\n" - "\n" - "#else\n" + "#ifndef SUBROUTINE_GL40\n" "void atst(vec4 c)\n" "{\n" " float a = trunc(c.a * 255.0 + 0.01);\n" @@ -1388,39 +1115,7 @@ static const char* tfx_glsl = "#endif\n" "\n" "// Note layout stuff might require gl4.3\n" - "#ifdef SUBROUTINE_GL40\n" - "// Function pointer type\n" - "subroutine void ColClipType(inout vec4 c);\n" - "\n" - "// a function pointer variable\n" - "layout(location = 1) subroutine uniform ColClipType colclip;\n" - "\n" - "layout(index = 8) subroutine(ColClipType)\n" - "void colclip_0(inout vec4 c)\n" - "{\n" - " // nothing to do\n" - "}\n" - "\n" - "layout(index = 9) subroutine(ColClipType)\n" - "void colclip_1(inout vec4 c)\n" - "{\n" - " // FIXME !!!!\n" - " //c.rgb *= c.rgb < 128./255;\n" - " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" - " c.rgb *= vec3(factor);\n" - "}\n" - "\n" - "layout(index = 10) subroutine(ColClipType)\n" - "void colclip_2(inout vec4 c)\n" - "{\n" - " c.rgb = 256.0f/255.0f - c.rgb;\n" - " // FIXME !!!!\n" - " //c.rgb *= c.rgb < 128./255;\n" - " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" - " c.rgb *= vec3(factor);\n" - "}\n" - "\n" - "#else\n" + "#ifndef SUBROUTINE_GL40\n" "void colclip(inout vec4 c)\n" "{\n" " if (PS_COLCLIP == 2)\n" @@ -1554,6 +1249,291 @@ static const char* tfx_glsl = "#endif // !pGL_ES\n" "\n" "#endif\n" + "//#version 420 // Keep it for text editor detection\n" + "\n" + "// Subroutine of standard fs function (I don't know if it will be ever used one day)\n" + "\n" + "// FIXME crash nvidia\n" + "#if 0\n" + "// Function pointer type\n" + "subroutine vec4 WrapType(vec4 uv);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 4) subroutine uniform WrapType wrapuv;\n" + "\n" + "layout(index = 24) subroutine(WrapType)\n" + "vec4 wrapuv_wms_wmt_2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 25) subroutine(WrapType)\n" + "vec4 wrapuv_wms_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 26) subroutine(WrapType)\n" + "vec4 wrapuv_wms2_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" + " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 27) subroutine(WrapType)\n" + "vec4 wrapuv_wms3_wmt2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" + " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 28) subroutine(WrapType)\n" + "vec4 wrapuv_wms2_wmtx(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 29) subroutine(WrapType)\n" + "vec4 wrapuv_wmsx_wmt3(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 30) subroutine(WrapType)\n" + "vec4 wrapuv_wms3_wmtx(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 31) subroutine(WrapType)\n" + "vec4 wrapuv_wmsx_wmt2(vec4 uv)\n" + "{\n" + " vec4 uv_out = uv;\n" + " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 32) subroutine(WrapType)\n" + "vec4 wrapuv_dummy(vec4 uv)\n" + "{\n" + " return uv;\n" + "}\n" + "#endif\n" + "\n" + "// FIXME crash nvidia\n" + "#if 0\n" + "// Function pointer type\n" + "subroutine vec2 ClampType(vec2 uv);\n" + "\n" + "// a function pointer variable\n" + "layout(location = 3) subroutine uniform ClampType clampuv;\n" + "\n" + "layout(index = 20) subroutine(ClampType)\n" + "vec2 clampuv_wms2_wmt2(vec2 uv)\n" + "{\n" + " return clamp(uv, MinF, MinMax.zw);\n" + "}\n" + "\n" + "layout(index = 21) subroutine(ClampType)\n" + "vec2 clampuv_wms2(vec2 uv)\n" + "{\n" + " vec2 uv_out = uv;\n" + " uv_out.x = clamp(uv.x, MinF.x, MinMax.z);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 22) subroutine(ClampType)\n" + "vec2 clampuv_wmt2(vec2 uv)\n" + "{\n" + " vec2 uv_out = uv;\n" + " uv_out.y = clamp(uv.y, MinF.y, MinMax.w);\n" + " return uv_out;\n" + "}\n" + "\n" + "layout(index = 23) subroutine(ClampType)\n" + "vec2 clampuv_dummy(vec2 uv)\n" + "{\n" + " return uv;\n" + "}\n" + "#endif\n" + "\n" + "#ifdef SUBROUTINE_GL40\n" + "layout(index = 11) subroutine(TfxType)\n" + "vec4 tfx_0_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 12) subroutine(TfxType)\n" + "vec4 tfx_1_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = t.rgb;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 13) subroutine(TfxType)\n" + "vec4 tfx_2_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 14) subroutine(TfxType)\n" + "vec4 tfx_3_tcc_0(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 15) subroutine(TfxType)\n" + "vec4 tfx_0_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out = c * t * 255.0f / 128.0f;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 16) subroutine(TfxType)\n" + "vec4 tfx_1_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out = t;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 17) subroutine(TfxType)\n" + "vec4 tfx_2_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " c_out.a += t.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 18) subroutine(TfxType)\n" + "vec4 tfx_3_tcc_1(vec4 t, vec4 c)\n" + "{\n" + " vec4 c_out = c;\n" + " c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a;\n" + " c_out.a = t.a;\n" + " return c_out;\n" + "}\n" + "\n" + "layout(index = 19) subroutine(TfxType)\n" + "vec4 tfx_dummy(vec4 t, vec4 c)\n" + "{\n" + " return c;\n" + "}\n" + "#endif\n" + "\n" + "#ifdef SUBROUTINE_GL40\n" + "layout(index = 0) subroutine(AlphaTestType)\n" + "void atest_never(vec4 c)\n" + "{\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 1) subroutine(AlphaTestType)\n" + "void atest_always(vec4 c)\n" + "{\n" + " // Nothing to do\n" + "}\n" + "\n" + "layout(index = 2) subroutine(AlphaTestType)\n" + "void atest_l(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if (PS_SPRITEHACK == 0)\n" + " if ((AREF - a - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 3) subroutine(AlphaTestType)\n" + "void atest_le(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((AREF - a + 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 4) subroutine(AlphaTestType)\n" + "void atest_e(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((0.5f - abs(a - AREF)) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 5) subroutine(AlphaTestType)\n" + "void atest_ge(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((a-AREF + 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 6) subroutine(AlphaTestType)\n" + "void atest_g(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((a-AREF - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "\n" + "layout(index = 7) subroutine(AlphaTestType)\n" + "void atest_ne(vec4 c)\n" + "{\n" + " float a = trunc(c.a * 255.0 + 0.01);\n" + " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" + " discard;\n" + "}\n" + "#endif\n" + "\n" + "#ifdef SUBROUTINE_GL40\n" + "layout(index = 8) subroutine(ColClipType)\n" + "void colclip_0(inout vec4 c)\n" + "{\n" + " // nothing to do\n" + "}\n" + "\n" + "layout(index = 9) subroutine(ColClipType)\n" + "void colclip_1(inout vec4 c)\n" + "{\n" + " // FIXME !!!!\n" + " //c.rgb *= c.rgb < 128./255;\n" + " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" + " c.rgb *= vec3(factor);\n" + "}\n" + "\n" + "layout(index = 10) subroutine(ColClipType)\n" + "void colclip_2(inout vec4 c)\n" + "{\n" + " c.rgb = 256.0f/255.0f - c.rgb;\n" + " // FIXME !!!!\n" + " //c.rgb *= c.rgb < 128./255;\n" + " bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);\n" + " c.rgb *= vec3(factor);\n" + "}\n" + "#endif\n" ; static const char* fxaa_fx = diff --git a/plugins/GSdx/res/tfx_fs.glsl b/plugins/GSdx/res/tfx_fs.glsl new file mode 100644 index 0000000000..5b46e1d15d --- /dev/null +++ b/plugins/GSdx/res/tfx_fs.glsl @@ -0,0 +1,539 @@ +//#version 420 // Keep it for text editor detection + +// note lerp => mix + +#define FMT_32 0 +#define FMT_24 1 +#define FMT_16 2 +#define FMT_PAL 4 /* flag bit */ + +// APITRACE_DEBUG allows to force pixel output to easily detect +// the fragment computed by primitive +#define APITRACE_DEBUG 0 +// TEX_COORD_DEBUG output the uv coordinate as color. It is useful +// to detect bad sampling due to upscaling +//#define TEX_COORD_DEBUG + +// Not sure we have same issue on opengl. Doesn't work anyway on ATI card +// And I say this as an ATI user. +#define ATI_SUCKS 0 + +#ifndef PS_FST +#define PS_FST 0 +#define PS_WMS 0 +#define PS_WMT 0 +#define PS_FMT FMT_32 +#define PS_AEM 0 +#define PS_TFX 0 +#define PS_TCC 1 +#define PS_ATST 1 +#define PS_FOG 0 +#define PS_CLR1 0 +#define PS_FBA 0 +#define PS_AOUT 0 +#define PS_LTF 1 +#define PS_COLCLIP 0 +#define PS_DATE 0 +#define PS_SPRITEHACK 0 +#define PS_POINT_SAMPLER 0 +#define PS_TCOFFSETHACK 0 +#define PS_IIP 1 +#endif + +#ifdef FRAGMENT_SHADER + +in SHADER +{ + vec4 t; + vec4 c; + flat vec4 fc; +} PSin; + +#define PSin_t (PSin.t) +#define PSin_c (PSin.c) +#define PSin_fc (PSin.fc) + +// Same buffer but 2 colors for dual source blending +#if pGL_ES +layout(location = 0) out vec4 SV_Target0; +#else +layout(location = 0, index = 0) out vec4 SV_Target0; +layout(location = 0, index = 1) out vec4 SV_Target1; +#endif + +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +layout(bindless_sampler, location = 1) uniform sampler2D PaletteSampler; +#else +layout(binding = 0) uniform sampler2D TextureSampler; +layout(binding = 1) uniform sampler2D PaletteSampler; +#endif + +#ifndef DISABLE_GL42_image +#if PS_DATE > 0 +// FIXME how to declare memory access +layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min; +#endif +#else +// use basic stencil +#endif + +#ifndef DISABLE_GL42_image +#if PS_DATE > 0 +// origin_upper_left +layout(pixel_center_integer) in vec4 gl_FragCoord; +//in int gl_PrimitiveID; +#endif +#endif + +layout(std140, binding = 21) uniform cb21 +{ + vec3 FogColor; + float AREF; + vec4 WH; + vec2 MinF; + vec2 TA; + uvec4 MskFix; + vec4 HalfTexel; + vec4 MinMax; + vec4 TC_OffsetHack; +}; + +#ifdef SUBROUTINE_GL40 +// Function pointer type + the functionn pointer variable +subroutine void AlphaTestType(vec4 c); +layout(location = 0) subroutine uniform AlphaTestType atst; + +subroutine vec4 TfxType(vec4 t, vec4 c); +layout(location = 2) subroutine uniform TfxType tfx; + +subroutine void ColClipType(inout vec4 c); +layout(location = 1) subroutine uniform ColClipType colclip; +#endif + + +vec4 sample_c(vec2 uv) +{ + // FIXME: check the issue on openGL + if (ATI_SUCKS == 1 && PS_POINT_SAMPLER == 1) + { + // Weird issue with ATI cards (happens on at least HD 4xxx and 5xxx), + // it looks like they add 127/128 of a texel to sampling coordinates + // occasionally causing point sampling to erroneously round up. + // I'm manually adjusting coordinates to the centre of texels here, + // though the centre is just paranoia, the top left corner works fine. + uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw; + } + + return texture(TextureSampler, uv); +} + +vec4 sample_p(float u) +{ + //FIXME do we need a 1D sampler. Big impact on opengl to find 1 dim + // So for the moment cheat with 0.0f dunno if it work + return texture(PaletteSampler, vec2(u, 0.0f)); +} + +vec4 wrapuv(vec4 uv) +{ + vec4 uv_out = uv; + + if(PS_WMS == PS_WMT) + { + if(PS_WMS == 2) + { + uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); + } + else if(PS_WMS == 3) + { + uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; + } + } + else + { + if(PS_WMS == 2) + { + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + } + else if(PS_WMS == 3) + { + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + } + if(PS_WMT == 2) + { + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + } + else if(PS_WMT == 3) + { + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + } + } + + return uv_out; +} + +vec2 clampuv(vec2 uv) +{ + vec2 uv_out = uv; + + if(PS_WMS == 2 && PS_WMT == 2) + { + uv_out = clamp(uv, MinF, MinMax.zw); + } + else if(PS_WMS == 2) + { + uv_out.x = clamp(uv.x, MinF.x, MinMax.z); + } + else if(PS_WMT == 2) + { + uv_out.y = clamp(uv.y, MinF.y, MinMax.w); + } + + return uv_out; +} + +mat4 sample_4c(vec4 uv) +{ + mat4 c; + + c[0] = sample_c(uv.xy); + c[1] = sample_c(uv.zy); + c[2] = sample_c(uv.xw); + c[3] = sample_c(uv.zw); + + return c; +} + +vec4 sample_4a(vec4 uv) +{ + vec4 c; + + // Dx used the alpha channel. + // Opengl is only 8 bits on red channel. + c.x = sample_c(uv.xy).r; + c.y = sample_c(uv.zy).r; + c.z = sample_c(uv.xw).r; + c.w = sample_c(uv.zw).r; + + return c * 255.0/256.0 + 0.5/256.0; +} + +mat4 sample_4p(vec4 u) +{ + mat4 c; + + c[0] = sample_p(u.x); + c[1] = sample_p(u.y); + c[2] = sample_p(u.z); + c[3] = sample_p(u.w); + + return c; +} + +vec4 sample_color(vec2 st, float q) +{ + if(PS_FST == 0) st /= q; + + if(PS_TCOFFSETHACK == 1) st += TC_OffsetHack.xy; + + vec4 t; + mat4 c; + vec2 dd; + + if (PS_LTF == 0 && PS_FMT <= FMT_16 && PS_WMS < 3 && PS_WMT < 3) + { + c[0] = sample_c(clampuv(st)); +#ifdef TEX_COORD_DEBUG + c[0].rg = clampuv(st).xy; +#endif + } + else + { + vec4 uv; + + if(PS_LTF != 0) + { + uv = st.xyxy + HalfTexel; + dd = fract(uv.xy * WH.zw); + } + else + { + uv = st.xyxy; + } + + uv = wrapuv(uv); + + if((PS_FMT & FMT_PAL) != 0) + { + c = sample_4p(sample_4a(uv)); + } + else + { + c = sample_4c(uv); + } +#ifdef TEX_COORD_DEBUG + c[0].rg = uv.xy; +#endif + } + + // PERF: see the impact of the exansion before/after the interpolation + for (int i = 0; i < 4; i++) + { + if((PS_FMT & ~FMT_PAL) == FMT_24) + { + // FIXME GLSL any only support bvec so try to mix it with notEqual + bvec3 rgb_check = notEqual( c[i].rgb, vec3(0.0f, 0.0f, 0.0f) ); + c[i].a = ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; + } + else if((PS_FMT & ~FMT_PAL) == FMT_16) + { + // FIXME GLSL any only support bvec so try to mix it with notEqual + bvec3 rgb_check = notEqual( c[i].rgb, vec3(0.0f, 0.0f, 0.0f) ); + c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; + } + } + + if(PS_LTF != 0) + { + t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); + } + else + { + t = c[0]; + } + + return t; +} + +#ifndef SUBROUTINE_GL40 +vec4 tfx(vec4 t, vec4 c) +{ + vec4 c_out = c; + if(PS_TFX == 0) + { + if(PS_TCC != 0) + { + c_out = c * t * 255.0f / 128.0f; + } + else + { + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f; + } + } + else if(PS_TFX == 1) + { + if(PS_TCC != 0) + { + c_out = t; + } + else + { + c_out.rgb = t.rgb; + } + } + else if(PS_TFX == 2) + { + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + + if(PS_TCC != 0) + { + c_out.a += t.a; + } + } + else if(PS_TFX == 3) + { + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + + if(PS_TCC != 0) + { + c_out.a = t.a; + } + } + + return c_out; +} +#endif + +#ifndef SUBROUTINE_GL40 +void atst(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + + if(PS_ATST == 0) // never + { + discard; + } + else if(PS_ATST == 1) // always + { + // nothing to do + } + else if(PS_ATST == 2 ) // l + { + if (PS_SPRITEHACK == 0) + if ((AREF - a - 0.5f) < 0.0f) + discard; + } + else if(PS_ATST == 3 ) // le + { + if ((AREF - a + 0.5f) < 0.0f) + discard; + } + else if(PS_ATST == 4) // e + { + if ((0.5f - abs(a - AREF)) < 0.0f) + discard; + } + else if(PS_ATST == 5) // ge + { + if ((a-AREF + 0.5f) < 0.0f) + discard; + } + else if(PS_ATST == 6) // g + { + if ((a-AREF - 0.5f) < 0.0f) + discard; + } + else if(PS_ATST == 7) // ne + { + if ((abs(a - AREF) - 0.5f) < 0.0f) + discard; + } +} +#endif + +// Note layout stuff might require gl4.3 +#ifndef SUBROUTINE_GL40 +void colclip(inout vec4 c) +{ + if (PS_COLCLIP == 2) + { + c.rgb = 256.0f/255.0f - c.rgb; + } + if (PS_COLCLIP > 0) + { + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); + } +} +#endif + +void fog(inout vec4 c, float f) +{ + if(PS_FOG != 0) + { + c.rgb = mix(FogColor, c.rgb, f); + } +} + +vec4 ps_color() +{ + vec4 t = sample_color(PSin_t.xy, PSin_t.w); + + vec4 zero = vec4(0.0f, 0.0f, 0.0f, 0.0f); + vec4 one = vec4(1.0f, 1.0f, 1.0f, 1.0f); +#ifdef TEX_COORD_DEBUG + vec4 c = clamp(t, zero, one); +#else +#if PS_IIP == 1 + vec4 c = clamp(tfx(t, PSin_c), zero, one); +#else + vec4 c = clamp(tfx(t, PSin_fc), zero, one); +#endif +#endif + + atst(c); + + fog(c, PSin_t.z); + + colclip(c); + + if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes + { + c.rgb = vec3(1.0f, 1.0f, 1.0f); + } + + return c; +} + +#if pGL_ES +void ps_main() +{ + vec4 c = ps_color(); + c.a *= 2.0; + SV_Target0 = c; +} +#endif + +#if !pGL_ES +void ps_main() +{ +#if PS_DATE == 3 && !defined(DISABLE_GL42_image) + int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)); + // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update + // the bad alpha value so we must keep it. + + if (gl_PrimitiveID > stencil_ceil) { + discard; + } +#endif + + vec4 c = ps_color(); +#if (APITRACE_DEBUG & 1) == 1 + c.r = 1.0f; +#endif +#if (APITRACE_DEBUG & 2) == 2 + c.g = 1.0f; +#endif +#if (APITRACE_DEBUG & 4) == 4 + c.b = 1.0f; +#endif +#if (APITRACE_DEBUG & 8) == 8 + c.a = 0.5f; +#endif + + float alpha = c.a * 2.0; + + if(PS_AOUT != 0) // 16 bit output + { + float a = 128.0f / 255.0; // alpha output will be 0x80 + + c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a; + } + else if(PS_FBA != 0) + { + if(c.a < 0.5) c.a += 0.5; + } + + // Get first primitive that will write a failling alpha value +#if PS_DATE == 1 && !defined(DISABLE_GL42_image) + // DATM == 0 + // Pixel with alpha equal to 1 will failed + if (c.a > 127.5f / 255.0f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + } + //memoryBarrier(); +#elif PS_DATE == 2 && !defined(DISABLE_GL42_image) + // DATM == 1 + // Pixel with alpha equal to 0 will failed + if (c.a < 127.5f / 255.0f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + } +#endif + + +#if (PS_DATE == 2 || PS_DATE == 1) && !defined(DISABLE_GL42_image) + // Don't write anything on the framebuffer + // Note: you can't use discard because it will also drop + // image operation +#else + SV_Target0 = c; + SV_Target1 = vec4(alpha, alpha, alpha, alpha); +#endif + +} +#endif // !pGL_ES + +#endif diff --git a/plugins/GSdx/res/tfx_fs_subroutine.glsl b/plugins/GSdx/res/tfx_fs_subroutine.glsl new file mode 100644 index 0000000000..ce3d4bac58 --- /dev/null +++ b/plugins/GSdx/res/tfx_fs_subroutine.glsl @@ -0,0 +1,285 @@ +//#version 420 // Keep it for text editor detection + +// Subroutine of standard fs function (I don't know if it will be ever used one day) + +// FIXME crash nvidia +#if 0 +// Function pointer type +subroutine vec4 WrapType(vec4 uv); + +// a function pointer variable +layout(location = 4) subroutine uniform WrapType wrapuv; + +layout(index = 24) subroutine(WrapType) +vec4 wrapuv_wms_wmt_2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); + return uv_out; +} + +layout(index = 25) subroutine(WrapType) +vec4 wrapuv_wms_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; + return uv_out; +} + +layout(index = 26) subroutine(WrapType) +vec4 wrapuv_wms2_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + return uv_out; +} + +layout(index = 27) subroutine(WrapType) +vec4 wrapuv_wms3_wmt2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return uv_out; +} + +layout(index = 28) subroutine(WrapType) +vec4 wrapuv_wms2_wmtx(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + return uv_out; +} + +layout(index = 29) subroutine(WrapType) +vec4 wrapuv_wmsx_wmt3(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + return uv_out; +} + +layout(index = 30) subroutine(WrapType) +vec4 wrapuv_wms3_wmtx(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + return uv_out; +} + +layout(index = 31) subroutine(WrapType) +vec4 wrapuv_wmsx_wmt2(vec4 uv) +{ + vec4 uv_out = uv; + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return uv_out; +} + +layout(index = 32) subroutine(WrapType) +vec4 wrapuv_dummy(vec4 uv) +{ + return uv; +} +#endif + +// FIXME crash nvidia +#if 0 +// Function pointer type +subroutine vec2 ClampType(vec2 uv); + +// a function pointer variable +layout(location = 3) subroutine uniform ClampType clampuv; + +layout(index = 20) subroutine(ClampType) +vec2 clampuv_wms2_wmt2(vec2 uv) +{ + return clamp(uv, MinF, MinMax.zw); +} + +layout(index = 21) subroutine(ClampType) +vec2 clampuv_wms2(vec2 uv) +{ + vec2 uv_out = uv; + uv_out.x = clamp(uv.x, MinF.x, MinMax.z); + return uv_out; +} + +layout(index = 22) subroutine(ClampType) +vec2 clampuv_wmt2(vec2 uv) +{ + vec2 uv_out = uv; + uv_out.y = clamp(uv.y, MinF.y, MinMax.w); + return uv_out; +} + +layout(index = 23) subroutine(ClampType) +vec2 clampuv_dummy(vec2 uv) +{ + return uv; +} +#endif + +#ifdef SUBROUTINE_GL40 +layout(index = 11) subroutine(TfxType) +vec4 tfx_0_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f; + return c_out; +} + +layout(index = 12) subroutine(TfxType) +vec4 tfx_1_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = t.rgb; + return c_out; +} + +layout(index = 13) subroutine(TfxType) +vec4 tfx_2_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + return c_out; +} + +layout(index = 14) subroutine(TfxType) +vec4 tfx_3_tcc_0(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + return c_out; +} + +layout(index = 15) subroutine(TfxType) +vec4 tfx_0_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out = c * t * 255.0f / 128.0f; + return c_out; +} + +layout(index = 16) subroutine(TfxType) +vec4 tfx_1_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out = t; + return c_out; +} + +layout(index = 17) subroutine(TfxType) +vec4 tfx_2_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + c_out.a += t.a; + return c_out; +} + +layout(index = 18) subroutine(TfxType) +vec4 tfx_3_tcc_1(vec4 t, vec4 c) +{ + vec4 c_out = c; + c_out.rgb = c.rgb * t.rgb * 255.0f / 128.0f + c.a; + c_out.a = t.a; + return c_out; +} + +layout(index = 19) subroutine(TfxType) +vec4 tfx_dummy(vec4 t, vec4 c) +{ + return c; +} +#endif + +#ifdef SUBROUTINE_GL40 +layout(index = 0) subroutine(AlphaTestType) +void atest_never(vec4 c) +{ + discard; +} + +layout(index = 1) subroutine(AlphaTestType) +void atest_always(vec4 c) +{ + // Nothing to do +} + +layout(index = 2) subroutine(AlphaTestType) +void atest_l(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if (PS_SPRITEHACK == 0) + if ((AREF - a - 0.5f) < 0.0f) + discard; +} + +layout(index = 3) subroutine(AlphaTestType) +void atest_le(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((AREF - a + 0.5f) < 0.0f) + discard; +} + +layout(index = 4) subroutine(AlphaTestType) +void atest_e(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((0.5f - abs(a - AREF)) < 0.0f) + discard; +} + +layout(index = 5) subroutine(AlphaTestType) +void atest_ge(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((a-AREF + 0.5f) < 0.0f) + discard; +} + +layout(index = 6) subroutine(AlphaTestType) +void atest_g(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((a-AREF - 0.5f) < 0.0f) + discard; +} + +layout(index = 7) subroutine(AlphaTestType) +void atest_ne(vec4 c) +{ + float a = trunc(c.a * 255.0 + 0.01); + if ((abs(a - AREF) - 0.5f) < 0.0f) + discard; +} +#endif + +#ifdef SUBROUTINE_GL40 +layout(index = 8) subroutine(ColClipType) +void colclip_0(inout vec4 c) +{ + // nothing to do +} + +layout(index = 9) subroutine(ColClipType) +void colclip_1(inout vec4 c) +{ + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); +} + +layout(index = 10) subroutine(ColClipType) +void colclip_2(inout vec4 c) +{ + c.rgb = 256.0f/255.0f - c.rgb; + // FIXME !!!! + //c.rgb *= c.rgb < 128./255; + bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f); + c.rgb *= vec3(factor); +} +#endif diff --git a/plugins/GSdx/res/tfx_vgs.glsl b/plugins/GSdx/res/tfx_vgs.glsl new file mode 100644 index 0000000000..6ffbed1e57 --- /dev/null +++ b/plugins/GSdx/res/tfx_vgs.glsl @@ -0,0 +1,258 @@ +//#version 420 // Keep it for text editor detection + +#ifndef VS_BPPZ +#define VS_BPPZ 0 +#define VS_TME 1 +#define VS_FST 1 +#define VS_LOGZ 0 +#endif + +struct vertex +{ + vec4 t; + vec4 c; + vec4 fc; +}; + +#ifdef VERTEX_SHADER +layout(location = 0) in vec2 i_st; +layout(location = 2) in vec4 i_c; +layout(location = 3) in float i_q; +layout(location = 4) in uvec2 i_p; +layout(location = 5) in uint i_z; +layout(location = 6) in uvec2 i_uv; +layout(location = 7) in vec4 i_f; + +out SHADER +{ + vec4 t; + vec4 c; + flat vec4 fc; +} VSout; + +#define VSout_t (VSout.t) +#define VSout_c (VSout.c) +#define VSout_fc (VSout.fc) + +out gl_PerVertex { + invariant vec4 gl_Position; + float gl_PointSize; +#if !pGL_ES + float gl_ClipDistance[]; +#endif +}; + +layout(std140, binding = 20) uniform cb20 +{ + vec2 VertexScale; + vec2 VertexOffset; + vec2 TextureScale; +}; + +#ifdef ZERO_TO_ONE_DEPTH +const float exp_min32 = exp2(-32.0f); +#else +const float exp_min31 = exp2(-31.0f); +#endif + +#ifdef SUBROUTINE_GL40 +// Function pointer type +subroutine void TextureCoordType(void); + +// a function pointer variable +layout(location = 0) subroutine uniform TextureCoordType texture_coord; + +layout(index = 0) subroutine(TextureCoordType) +void tme_0() +{ + VSout_t.xy = vec2(0.0f, 0.0f); + VSout_t.w = 1.0f; +} + +layout(index = 1) subroutine(TextureCoordType) +void tme_1_fst_0() +{ + VSout_t.xy = i_st; + VSout_t.w = i_q; +} + +layout(index = 2) subroutine(TextureCoordType) +void tme_1_fst_1() +{ + VSout_t.xy = vec2(i_uv) * TextureScale; + VSout_t.w = 1.0f; +} + +#else + +void texture_coord() +{ + if(VS_TME != 0) + { + if(VS_FST != 0) + { + if (VS_WILDHACK == 1) { + VSout_t.xy = vec2(i_uv & uvec2(0x3FEF, 0x3FEF)) * TextureScale; + } else { + VSout_t.xy = vec2(i_uv) * TextureScale; + } + VSout_t.w = 1.0f; + } + else + { + VSout_t.xy = i_st; + VSout_t.w = i_q; + } + } + else + { + VSout_t.xy = vec2(0.0f, 0.0f); + VSout_t.w = 1.0f; + } +} + +#endif + +void vs_main() +{ + highp uint z; + if(VS_BPPZ == 1) // 24 + z = i_z & uint(0xffffff); + else if(VS_BPPZ == 2) // 16 + z = i_z & uint(0xffff); + else + z = i_z; + + // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go) + // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty + // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel + // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133 + vec4 p; + + p.xy = vec2(i_p) - vec2(0.05f, 0.05f); + p.xy = p.xy * VertexScale - VertexOffset; + p.w = 1.0f; +#ifdef ZERO_TO_ONE_DEPTH + if(VS_LOGZ == 1) { + p.z = log2(float(1u+z)) / 32.0f; + } else { + p.z = float(z) * exp_min32; + } +#else + if(VS_LOGZ == 1) { + p.z = log2(float(1u+z)) / 31.0f - 1.0f; + } else { + p.z = float(z) * exp_min31 - 1.0f; + } +#endif + + gl_Position = p; + + texture_coord(); + + VSout_c = i_c; + VSout_fc = i_c; + VSout_t.z = i_f.r; +} + +#endif + +#ifdef GEOMETRY_SHADER +in gl_PerVertex { + invariant vec4 gl_Position; + float gl_PointSize; +#if !pGL_ES + float gl_ClipDistance[]; +#endif +} gl_in[]; +//in int gl_PrimitiveIDIn; + +out gl_PerVertex { + vec4 gl_Position; + float gl_PointSize; +#if !pGL_ES + float gl_ClipDistance[]; +#endif +}; +//out int gl_PrimitiveID; + +in SHADER +{ + vec4 t; + vec4 c; + flat vec4 fc; +} GSin[]; + +out SHADER +{ + vec4 t; + vec4 c; + flat vec4 fc; +} GSout; + +void out_vertex(in vertex v) +{ + GSout.t = v.t; + GSout.c = v.c; + GSout.fc = v.fc; + gl_PrimitiveID = gl_PrimitiveIDIn; + EmitVertex(); +} + +layout(lines) in; +layout(triangle_strip, max_vertices = 6) out; + +void gs_main() +{ + // left top => GSin[0]; + // right bottom => GSin[1]; + vertex rb = vertex(GSin[1].t, GSin[1].c, GSin[1].fc); + vertex lt = vertex(GSin[0].t, GSin[0].c, GSin[0].fc); + + vec4 rb_p = gl_in[1].gl_Position; + vec4 lb_p = gl_in[1].gl_Position; + vec4 rt_p = gl_in[1].gl_Position; + vec4 lt_p = gl_in[0].gl_Position; + + // flat depth + lt_p.z = rb_p.z; + // flat fog and texture perspective + lt.t.zw = rb.t.zw; + // flat color + lt.c = rb.c; + + // Swap texture and position coordinate + vertex lb = rb; + lb_p.x = lt_p.x; + lb.t.x = lt.t.x; + + vertex rt = rb; + rt_p.y = lt_p.y; + rt.t.y = lt.t.y; + + // Triangle 1 + gl_Position = lt_p; + out_vertex(lt); + + gl_Position = lb_p; + out_vertex(lb); + + gl_Position = rt_p; + out_vertex(rt); + + EndPrimitive(); + + // Triangle 2 + gl_Position = lb_p; + out_vertex(lb); + + gl_Position = rt_p; + out_vertex(rt); + + gl_Position = rb_p; + out_vertex(rb); + + EndPrimitive(); +} + +#endif