2015-04-17 18:18:07 +00:00
|
|
|
//#version 420 // Keep it for text editor detection
|
|
|
|
|
2015-07-18 09:22:08 +00:00
|
|
|
// Require for bit operation
|
|
|
|
//#extension GL_ARB_gpu_shader5 : enable
|
2015-04-17 18:18:07 +00:00
|
|
|
|
|
|
|
#define FMT_32 0
|
|
|
|
#define FMT_24 1
|
|
|
|
#define FMT_16 2
|
2015-08-08 11:34:55 +00:00
|
|
|
|
2015-08-18 02:00:07 +00:00
|
|
|
// APITRACE_DEBUG enables forced pixel output to easily detect
|
2015-04-17 18:18:07 +00:00
|
|
|
// the fragment computed by primitive
|
|
|
|
#define APITRACE_DEBUG 0
|
|
|
|
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
|
|
|
|
// to detect bad sampling due to upscaling
|
|
|
|
//#define TEX_COORD_DEBUG
|
2015-07-17 18:57:32 +00:00
|
|
|
// Just copy directly the texture coordinate
|
|
|
|
#ifdef TEX_COORD_DEBUG
|
|
|
|
#define PS_TFX 1
|
|
|
|
#define PS_TCC 1
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
|
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
#ifdef FRAGMENT_SHADER
|
|
|
|
|
2016-10-23 10:24:59 +00:00
|
|
|
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
|
2016-10-19 21:03:39 +00:00
|
|
|
layout(location = 0)
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
in SHADER
|
|
|
|
{
|
2016-02-17 21:49:05 +00:00
|
|
|
vec4 t_float;
|
|
|
|
vec4 t_int;
|
2021-12-23 11:35:05 +00:00
|
|
|
|
|
|
|
#if PS_IIP != 0
|
|
|
|
vec4 c;
|
|
|
|
#else
|
|
|
|
flat vec4 c;
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
} PSin;
|
|
|
|
|
|
|
|
// Same buffer but 2 colors for dual source blending
|
|
|
|
layout(location = 0, index = 0) out vec4 SV_Target0;
|
|
|
|
layout(location = 0, index = 1) out vec4 SV_Target1;
|
|
|
|
|
|
|
|
layout(binding = 1) uniform sampler2D PaletteSampler;
|
2022-01-30 07:10:10 +00:00
|
|
|
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
|
2015-04-17 18:18:07 +00:00
|
|
|
|
|
|
|
#ifndef DISABLE_GL42_image
|
|
|
|
#if PS_DATE > 0
|
2016-06-05 14:42:52 +00:00
|
|
|
// Performance note: images mustn't be declared if they are unused. Otherwise it will
|
|
|
|
// require extra shader validation.
|
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
// FIXME how to declare memory access
|
2022-01-30 07:10:10 +00:00
|
|
|
layout(r32i, binding = 3) uniform iimage2D img_prim_min;
|
2015-10-16 19:03:08 +00:00
|
|
|
// WARNING:
|
|
|
|
// You can't enable it if you discard the fragment. The depth is still
|
|
|
|
// updated (shadow in Shin Megami Tensei Nocturne)
|
|
|
|
//
|
|
|
|
// early_fragment_tests must still be enabled in the first pass of the 2 passes algo
|
|
|
|
// First pass search the first primitive that will write the bad alpha value. Value
|
|
|
|
// won't be written if the fragment fails the depth test.
|
|
|
|
//
|
|
|
|
// In theory the best solution will be do
|
|
|
|
// 1/ copy the depth buffer
|
|
|
|
// 2/ do the full depth (current depth writes are disabled)
|
|
|
|
// 3/ restore the depth buffer for 2nd pass
|
|
|
|
// Of course, it is likely too costly.
|
|
|
|
#if PS_DATE == 1 || PS_DATE == 2
|
|
|
|
layout(early_fragment_tests) in;
|
|
|
|
#endif
|
2015-06-26 18:03:15 +00:00
|
|
|
|
2015-05-01 18:04:23 +00:00
|
|
|
// I don't remember why I set this parameter but it is surely useless
|
|
|
|
//layout(pixel_center_integer) in vec4 gl_FragCoord;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
// use basic stencil
|
|
|
|
#endif
|
|
|
|
|
|
|
|
vec4 sample_c(vec2 uv)
|
|
|
|
{
|
2016-09-08 21:26:38 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
|
|
|
#else
|
2016-10-06 18:15:50 +00:00
|
|
|
|
2018-08-21 22:51:19 +00:00
|
|
|
#if PS_POINT_SAMPLER
|
2018-10-09 07:07:45 +00:00
|
|
|
// Weird issue with ATI/AMD cards,
|
2018-08-21 22:51:19 +00:00
|
|
|
// it looks like they add 127/128 of a texel to sampling coordinates
|
|
|
|
// occasionally causing point sampling to erroneously round up.
|
|
|
|
// I'm manually adjusting coordinates to the centre of texels here,
|
|
|
|
// though the centre is just paranoia, the top left corner works fine.
|
2018-10-09 07:07:45 +00:00
|
|
|
// As of 2018 this issue is still present.
|
2018-08-21 22:51:19 +00:00
|
|
|
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
|
|
|
|
#endif
|
2022-01-16 07:05:05 +00:00
|
|
|
uv *= STScale;
|
2018-08-21 22:51:19 +00:00
|
|
|
|
2016-10-06 18:15:50 +00:00
|
|
|
#if PS_AUTOMATIC_LOD == 1
|
2015-08-20 22:33:45 +00:00
|
|
|
return texture(TextureSampler, uv);
|
2016-10-06 18:15:50 +00:00
|
|
|
#elif PS_MANUAL_LOD == 1
|
|
|
|
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
|
|
|
|
float K = MinMax.x;
|
|
|
|
float L = MinMax.y;
|
|
|
|
float bias = MinMax.z;
|
|
|
|
float max_lod = MinMax.w;
|
|
|
|
|
|
|
|
float gs_lod = K - log2(abs(PSin.t_float.w)) * L;
|
|
|
|
// FIXME max useful ?
|
|
|
|
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
|
|
|
|
float lod = min(gs_lod, max_lod) - bias;
|
|
|
|
|
|
|
|
return textureLod(TextureSampler, uv, lod);
|
|
|
|
#else
|
|
|
|
return textureLod(TextureSampler, uv, 0); // No lod
|
|
|
|
#endif
|
|
|
|
|
2016-09-08 21:26:38 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-08-14 15:53:41 +00:00
|
|
|
vec4 sample_p(float idx)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
return texture(PaletteSampler, vec2(idx, 0.0f));
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-08-14 18:57:45 +00:00
|
|
|
vec4 clamp_wrap_uv(vec4 uv)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 uv_out = uv;
|
2019-02-15 14:01:18 +00:00
|
|
|
#if PS_INVALID_TEX0 == 1
|
|
|
|
vec4 tex_size = WH.zwzw;
|
|
|
|
#else
|
|
|
|
vec4 tex_size = WH.xyxy;
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-04-24 15:51:29 +00:00
|
|
|
#if PS_WMS == PS_WMT
|
|
|
|
|
|
|
|
#if PS_WMS == 2
|
2015-08-20 22:33:45 +00:00
|
|
|
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
|
2015-04-24 15:51:29 +00:00
|
|
|
#elif PS_WMS == 3
|
2018-10-02 19:42:02 +00:00
|
|
|
#if PS_FST == 0
|
|
|
|
// wrap negative uv coords to avoid an off by one error that shifted
|
|
|
|
// textures. Fixes Xenosaga's hair issue.
|
|
|
|
uv = fract(uv);
|
|
|
|
#endif
|
2019-02-15 14:01:18 +00:00
|
|
|
uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#else // PS_WMS != PS_WMT
|
|
|
|
|
|
|
|
#if PS_WMS == 2
|
2015-08-20 22:33:45 +00:00
|
|
|
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
|
2015-04-24 15:51:29 +00:00
|
|
|
|
|
|
|
#elif PS_WMS == 3
|
2018-10-02 19:42:02 +00:00
|
|
|
#if PS_FST == 0
|
|
|
|
uv.xz = fract(uv.xz);
|
|
|
|
#endif
|
2019-02-15 14:01:18 +00:00
|
|
|
uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
|
2015-04-24 15:51:29 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if PS_WMT == 2
|
2015-08-20 22:33:45 +00:00
|
|
|
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
|
2015-04-24 15:51:29 +00:00
|
|
|
|
|
|
|
#elif PS_WMT == 3
|
2018-10-02 19:42:02 +00:00
|
|
|
#if PS_FST == 0
|
|
|
|
uv.yw = fract(uv.yw);
|
|
|
|
#endif
|
2019-02-15 14:01:18 +00:00
|
|
|
uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
return uv_out;
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mat4 sample_4c(vec4 uv)
|
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
mat4 c;
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-07-18 09:22:08 +00:00
|
|
|
// Note: texture gather can't be used because of special clamping/wrapping
|
|
|
|
// Also it doesn't support lod
|
2015-08-20 22:33:45 +00:00
|
|
|
c[0] = sample_c(uv.xy);
|
|
|
|
c[1] = sample_c(uv.zy);
|
|
|
|
c[2] = sample_c(uv.xw);
|
|
|
|
c[3] = sample_c(uv.zw);
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
return c;
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-08-14 15:53:41 +00:00
|
|
|
vec4 sample_4_index(vec4 uv)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 c;
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2021-05-12 08:01:46 +00:00
|
|
|
// Either GS will send a texture that contains a single channel
|
2015-08-20 22:33:45 +00:00
|
|
|
// in this case the red channel is remapped as alpha channel
|
|
|
|
//
|
|
|
|
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
|
2015-05-24 14:50:01 +00:00
|
|
|
|
2015-07-18 09:22:08 +00:00
|
|
|
// Note: texture gather can't be used because of special clamping/wrapping
|
|
|
|
// Also it doesn't support lod
|
2015-08-20 22:33:45 +00:00
|
|
|
c.x = sample_c(uv.xy).a;
|
|
|
|
c.y = sample_c(uv.zy).a;
|
|
|
|
c.z = sample_c(uv.xw).a;
|
|
|
|
c.w = sample_c(uv.zw).a;
|
2015-05-24 14:50:01 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
|
2015-05-24 14:50:01 +00:00
|
|
|
|
2015-08-08 11:34:55 +00:00
|
|
|
#if PS_PAL_FMT == 1
|
2018-08-22 12:58:57 +00:00
|
|
|
// 4HL
|
2015-08-20 22:33:45 +00:00
|
|
|
return vec4(i & 0xFu) / 255.0f;
|
2015-08-14 15:53:41 +00:00
|
|
|
|
2015-08-08 11:34:55 +00:00
|
|
|
#elif PS_PAL_FMT == 2
|
2018-08-22 12:58:57 +00:00
|
|
|
// 4HH
|
2015-08-08 11:34:55 +00:00
|
|
|
return vec4(i >> 4u) / 255.0f;
|
|
|
|
|
2015-05-24 14:50:01 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
// Most of texture will hit this code so keep normalized float value
|
2015-08-14 15:53:41 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// 8 bits
|
|
|
|
return c;
|
2015-05-24 14:50:01 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-08-14 15:53:41 +00:00
|
|
|
mat4 sample_4p(vec4 u)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
mat4 c;
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
c[0] = sample_p(u.x);
|
|
|
|
c[1] = sample_p(u.y);
|
|
|
|
c[2] = sample_p(u.z);
|
|
|
|
c[3] = sample_p(u.w);
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
return c;
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2016-05-04 16:05:45 +00:00
|
|
|
int fetch_raw_depth()
|
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
return int(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
|
|
|
|
#else
|
|
|
|
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
|
|
|
|
#endif
|
2016-05-04 16:05:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4 fetch_raw_color()
|
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
|
|
|
#else
|
|
|
|
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
|
|
|
|
#endif
|
2016-05-04 16:05:45 +00:00
|
|
|
}
|
|
|
|
|
2016-04-23 10:06:10 +00:00
|
|
|
vec4 fetch_c(ivec2 uv)
|
|
|
|
{
|
|
|
|
return texelFetch(TextureSampler, ivec2(uv), 0);
|
|
|
|
}
|
|
|
|
|
2016-05-04 16:05:45 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Depth sampling
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
2016-04-23 10:06:10 +00:00
|
|
|
ivec2 clamp_wrap_uv_depth(ivec2 uv)
|
|
|
|
{
|
|
|
|
ivec2 uv_out = uv;
|
|
|
|
|
|
|
|
// Keep the full precision
|
|
|
|
// It allow to multiply the ScalingFactor before the 1/16 coeff
|
|
|
|
ivec4 mask = ivec4(MskFix) << 4;
|
|
|
|
|
|
|
|
#if PS_WMS == PS_WMT
|
|
|
|
|
|
|
|
#if PS_WMS == 2
|
|
|
|
uv_out = clamp(uv, mask.xy, mask.zw);
|
|
|
|
#elif PS_WMS == 3
|
|
|
|
uv_out = (uv & mask.xy) | mask.zw;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#else // PS_WMS != PS_WMT
|
|
|
|
|
|
|
|
#if PS_WMS == 2
|
2016-04-27 07:52:35 +00:00
|
|
|
uv_out.x = clamp(uv.x, mask.x, mask.z);
|
2016-04-23 10:06:10 +00:00
|
|
|
#elif PS_WMS == 3
|
|
|
|
uv_out.x = (uv.x & mask.x) | mask.z;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if PS_WMT == 2
|
2016-04-27 07:52:35 +00:00
|
|
|
uv_out.y = clamp(uv.y, mask.y, mask.w);
|
2016-04-23 10:06:10 +00:00
|
|
|
#elif PS_WMT == 3
|
|
|
|
uv_out.y = (uv.y & mask.y) | mask.w;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return uv_out;
|
|
|
|
}
|
|
|
|
|
|
|
|
vec4 sample_depth(vec2 st)
|
|
|
|
{
|
2021-12-21 11:00:24 +00:00
|
|
|
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(float(PS_SCALE_FACTOR)) * vec2(1.0f/16.0f);
|
2016-04-23 10:06:10 +00:00
|
|
|
ivec2 uv = ivec2(uv_f);
|
|
|
|
|
2016-06-09 08:06:11 +00:00
|
|
|
vec4 t = vec4(0.0f);
|
2016-05-06 17:57:42 +00:00
|
|
|
#if PS_TALES_OF_ABYSS_HLE == 1
|
|
|
|
// Warning: UV can't be used in channel effect
|
|
|
|
int depth = fetch_raw_depth();
|
|
|
|
|
|
|
|
// Convert msb based on the palette
|
|
|
|
t = texelFetch(PaletteSampler, ivec2((depth >> 8) & 0xFF, 0), 0) * 255.0f;
|
|
|
|
|
|
|
|
#elif PS_URBAN_CHAOS_HLE == 1
|
2016-05-04 16:05:45 +00:00
|
|
|
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
|
|
|
|
// So it will do a first channel trick to extract lsb, value is right-shifted.
|
|
|
|
// Then a new channel trick to extract msb which will shifted to the left.
|
|
|
|
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
|
|
|
|
// To be faster both steps (msb&lsb) are done in a single pass.
|
|
|
|
|
|
|
|
// Warning: UV can't be used in channel effect
|
|
|
|
int depth = fetch_raw_depth();
|
2016-04-29 23:20:47 +00:00
|
|
|
|
|
|
|
// Convert lsb based on the palette
|
2016-05-06 17:55:12 +00:00
|
|
|
t = texelFetch(PaletteSampler, ivec2((depth & 0xFF), 0), 0) * 255.0f;
|
2016-04-29 23:20:47 +00:00
|
|
|
|
|
|
|
// Msb is easier
|
|
|
|
float green = float((depth >> 8) & 0xFF) * 36.0f;
|
|
|
|
green = min(green, 255.0f);
|
|
|
|
|
|
|
|
t.g += green;
|
|
|
|
|
|
|
|
|
|
|
|
#elif PS_DEPTH_FMT == 1
|
2016-04-23 10:06:10 +00:00
|
|
|
// Based on ps_main11 of convert
|
|
|
|
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
|
2022-02-12 04:50:35 +00:00
|
|
|
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
|
|
|
t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
|
2016-04-23 10:06:10 +00:00
|
|
|
|
|
|
|
#elif PS_DEPTH_FMT == 2
|
|
|
|
// Based on ps_main12 of convert
|
|
|
|
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
|
2022-02-12 04:50:35 +00:00
|
|
|
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
|
|
|
t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
|
2016-04-23 10:06:10 +00:00
|
|
|
|
|
|
|
#elif PS_DEPTH_FMT == 3
|
|
|
|
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
|
|
|
|
t = fetch_c(uv) * 255.0f;
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2016-04-29 23:20:47 +00:00
|
|
|
|
2016-04-23 10:06:10 +00:00
|
|
|
// warning t ranges from 0 to 255
|
|
|
|
#if (PS_AEM_FMT == FMT_24)
|
|
|
|
t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
|
|
|
|
#elif (PS_AEM_FMT == FMT_16)
|
|
|
|
t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
2016-04-28 20:15:28 +00:00
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Fetch a Single Channel
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
vec4 fetch_red()
|
|
|
|
{
|
2016-04-30 14:07:45 +00:00
|
|
|
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
|
|
|
|
int depth = (fetch_raw_depth()) & 0xFF;
|
|
|
|
vec4 rt = vec4(depth) / 255.0f;
|
|
|
|
#else
|
|
|
|
vec4 rt = fetch_raw_color();
|
|
|
|
#endif
|
2016-04-28 20:15:28 +00:00
|
|
|
return sample_p(rt.r) * 255.0f;
|
|
|
|
}
|
|
|
|
|
2021-12-05 04:57:43 +00:00
|
|
|
vec4 fetch_green()
|
2016-04-28 20:15:28 +00:00
|
|
|
{
|
2016-04-30 14:07:45 +00:00
|
|
|
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
|
2021-12-05 04:57:43 +00:00
|
|
|
int depth = (fetch_raw_depth() >> 8) & 0xFF;
|
2016-04-30 14:07:45 +00:00
|
|
|
vec4 rt = vec4(depth) / 255.0f;
|
|
|
|
#else
|
|
|
|
vec4 rt = fetch_raw_color();
|
|
|
|
#endif
|
2021-12-05 04:57:43 +00:00
|
|
|
return sample_p(rt.g) * 255.0f;
|
2016-04-28 20:15:28 +00:00
|
|
|
}
|
|
|
|
|
2021-12-05 04:57:43 +00:00
|
|
|
vec4 fetch_blue()
|
2016-04-28 20:15:28 +00:00
|
|
|
{
|
2021-12-05 04:57:43 +00:00
|
|
|
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
|
|
|
|
int depth = (fetch_raw_depth() >> 16) & 0xFF;
|
|
|
|
vec4 rt = vec4(depth) / 255.0f;
|
|
|
|
#else
|
2016-04-30 14:07:45 +00:00
|
|
|
vec4 rt = fetch_raw_color();
|
2021-12-05 04:57:43 +00:00
|
|
|
#endif
|
|
|
|
return sample_p(rt.b) * 255.0f;
|
2016-04-28 20:15:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4 fetch_alpha()
|
|
|
|
{
|
2016-04-30 14:07:45 +00:00
|
|
|
vec4 rt = fetch_raw_color();
|
2016-04-28 20:15:28 +00:00
|
|
|
return sample_p(rt.a) * 255.0f;
|
|
|
|
}
|
|
|
|
|
2016-05-06 13:18:22 +00:00
|
|
|
vec4 fetch_rgb()
|
|
|
|
{
|
|
|
|
vec4 rt = fetch_raw_color();
|
|
|
|
vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0f);
|
|
|
|
return c * 255.0f;
|
|
|
|
}
|
|
|
|
|
2016-05-29 08:11:23 +00:00
|
|
|
vec4 fetch_gXbY()
|
2016-05-20 17:55:39 +00:00
|
|
|
{
|
2016-05-30 17:39:52 +00:00
|
|
|
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
|
|
|
|
int depth = fetch_raw_depth();
|
|
|
|
int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF;
|
|
|
|
return vec4(bg);
|
|
|
|
#else
|
2016-05-29 08:11:23 +00:00
|
|
|
ivec4 rt = ivec4(fetch_raw_color() * 255.0f);
|
|
|
|
int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z;
|
|
|
|
int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x;
|
|
|
|
return vec4(green | blue);
|
2016-05-30 17:39:52 +00:00
|
|
|
#endif
|
2016-05-20 17:45:20 +00:00
|
|
|
}
|
|
|
|
|
2016-04-23 10:06:10 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
|
2016-02-17 21:49:05 +00:00
|
|
|
vec4 sample_color(vec2 st)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-04-24 15:51:29 +00:00
|
|
|
#if (PS_TCOFFSETHACK == 1)
|
2015-08-20 22:33:45 +00:00
|
|
|
st += TC_OffsetHack.xy;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 t;
|
|
|
|
mat4 c;
|
|
|
|
vec2 dd;
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-14 18:57:45 +00:00
|
|
|
// FIXME I'm not sure this condition is useful (I think code will be optimized)
|
2015-08-08 11:34:55 +00:00
|
|
|
#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
|
2015-08-20 22:33:45 +00:00
|
|
|
// No software LTF and pure 32 bits RGBA texure without special texture wrapping
|
|
|
|
c[0] = sample_c(st);
|
2015-04-17 18:18:07 +00:00
|
|
|
#ifdef TEX_COORD_DEBUG
|
2015-08-20 22:33:45 +00:00
|
|
|
c[0].rg = st.xy;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
2015-04-24 15:51:29 +00:00
|
|
|
|
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 uv;
|
|
|
|
|
|
|
|
if(PS_LTF != 0)
|
|
|
|
{
|
|
|
|
uv = st.xyxy + HalfTexel;
|
|
|
|
dd = fract(uv.xy * WH.zw);
|
2015-10-22 16:36:45 +00:00
|
|
|
#if (PS_FST == 0)
|
|
|
|
// Background in Shin Megami Tensei Lucifers
|
|
|
|
// I suspect that uv isn't a standard number, so fract is outside of the [0;1] range
|
|
|
|
// Note: it is free on GPU but let's do it only for float coordinate
|
|
|
|
dd = clamp(dd, vec2(0.0f), vec2(1.0f));
|
|
|
|
#endif
|
2015-08-20 22:33:45 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
uv = st.xyxy;
|
|
|
|
}
|
|
|
|
|
|
|
|
uv = clamp_wrap_uv(uv);
|
|
|
|
|
2015-08-08 11:34:55 +00:00
|
|
|
#if PS_PAL_FMT != 0
|
|
|
|
c = sample_4p(sample_4_index(uv));
|
|
|
|
#else
|
|
|
|
c = sample_4c(uv);
|
|
|
|
#endif
|
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
#ifdef TEX_COORD_DEBUG
|
2015-08-20 22:33:45 +00:00
|
|
|
c[0].rg = uv.xy;
|
|
|
|
c[1].rg = uv.xy;
|
|
|
|
c[2].rg = uv.xy;
|
|
|
|
c[3].rg = uv.xy;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
2015-04-24 15:51:29 +00:00
|
|
|
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2018-08-22 12:58:57 +00:00
|
|
|
// PERF note: using dot product reduces by 1 the number of instruction
|
|
|
|
// but I'm not sure it is equivalent neither faster.
|
|
|
|
for (int i = 0; i < 4; i++)
|
|
|
|
{
|
2015-07-18 09:22:08 +00:00
|
|
|
//float sum = dot(c[i].rgb, vec3(1.0f));
|
2015-08-08 11:34:55 +00:00
|
|
|
#if (PS_AEM_FMT == FMT_24)
|
2018-08-22 12:58:57 +00:00
|
|
|
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
|
|
|
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
2015-08-08 11:34:55 +00:00
|
|
|
#elif (PS_AEM_FMT == FMT_16)
|
2018-08-22 12:58:57 +00:00
|
|
|
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
|
|
|
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-08-20 22:33:45 +00:00
|
|
|
}
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-04-24 15:51:29 +00:00
|
|
|
#if(PS_LTF != 0)
|
2015-08-20 22:33:45 +00:00
|
|
|
t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
|
2015-04-24 15:51:29 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
t = c[0];
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// The 0.05f helps to fix the overbloom of sotc
|
|
|
|
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
|
|
|
|
// interpolation could be slightly below the correct one.
|
|
|
|
return trunc(t * 255.0f + 0.05f);
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
vec4 tfx(vec4 T, vec4 C)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 C_out;
|
|
|
|
vec4 FxT = trunc(trunc(C) * T / 128.0f);
|
2015-07-17 18:05:31 +00:00
|
|
|
|
2015-04-24 15:51:29 +00:00
|
|
|
#if (PS_TFX == 0)
|
2015-08-20 22:33:45 +00:00
|
|
|
C_out = FxT;
|
2015-04-24 15:51:29 +00:00
|
|
|
#elif (PS_TFX == 1)
|
2015-08-20 22:33:45 +00:00
|
|
|
C_out = T;
|
2015-04-24 15:51:29 +00:00
|
|
|
#elif (PS_TFX == 2)
|
2015-08-20 22:33:45 +00:00
|
|
|
C_out.rgb = FxT.rgb + C.a;
|
|
|
|
C_out.a = T.a + C.a;
|
2015-04-24 15:51:29 +00:00
|
|
|
#elif (PS_TFX == 3)
|
2015-08-20 22:33:45 +00:00
|
|
|
C_out.rgb = FxT.rgb + C.a;
|
|
|
|
C_out.a = T.a;
|
2015-07-17 18:05:31 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
C_out = C;
|
2015-07-17 18:05:31 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-07-17 18:05:31 +00:00
|
|
|
#if (PS_TCC == 0)
|
2015-07-18 11:40:10 +00:00
|
|
|
C_out.a = C.a;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
|
2015-08-20 22:33:45 +00:00
|
|
|
// Clamp only when it is useful
|
|
|
|
C_out = min(C_out, 255.0f);
|
2015-07-18 11:40:10 +00:00
|
|
|
#endif
|
2015-07-17 18:57:32 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
return C_out;
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
void atst(vec4 C)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
float a = C.a;
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2016-06-10 23:08:50 +00:00
|
|
|
#if (PS_ATST == 0)
|
2015-08-20 22:33:45 +00:00
|
|
|
// nothing to do
|
2016-06-10 23:08:50 +00:00
|
|
|
#elif (PS_ATST == 1)
|
|
|
|
if (a > AREF) discard;
|
|
|
|
#elif (PS_ATST == 2)
|
|
|
|
if (a < AREF) discard;
|
|
|
|
#elif (PS_ATST == 3)
|
|
|
|
if (abs(a - AREF) > 0.5f) discard;
|
|
|
|
#elif (PS_ATST == 4)
|
|
|
|
if (abs(a - AREF) < 0.5f) discard;
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
void fog(inout vec4 C, float f)
|
2015-04-17 18:18:07 +00:00
|
|
|
{
|
2015-04-24 15:51:29 +00:00
|
|
|
#if PS_FOG != 0
|
2015-08-20 22:33:45 +00:00
|
|
|
C.rgb = trunc(mix(FogColor, C.rgb, f));
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4 ps_color()
|
|
|
|
{
|
2016-02-17 21:49:05 +00:00
|
|
|
//FIXME: maybe we can set gl_Position.w = q in VS
|
2019-02-15 14:01:18 +00:00
|
|
|
#if (PS_FST == 0) && (PS_INVALID_TEX0 == 1)
|
|
|
|
// Re-normalize coordinate from invalid GS to corrected texture size
|
|
|
|
vec2 st = (PSin.t_float.xy * WH.xy) / (vec2(PSin.t_float.w) * WH.zw);
|
|
|
|
// no st_int yet
|
|
|
|
#elif (PS_FST == 0)
|
2016-04-23 10:06:10 +00:00
|
|
|
vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
|
2017-07-02 16:54:43 +00:00
|
|
|
vec2 st_int = PSin.t_int.zw / vec2(PSin.t_float.w);
|
2016-02-17 21:49:05 +00:00
|
|
|
#else
|
|
|
|
// Note xy are normalized coordinate
|
2016-04-23 10:06:10 +00:00
|
|
|
vec2 st = PSin.t_int.xy;
|
2017-07-02 16:54:43 +00:00
|
|
|
vec2 st_int = PSin.t_int.zw;
|
2016-04-23 10:06:10 +00:00
|
|
|
#endif
|
|
|
|
|
2016-04-28 20:15:28 +00:00
|
|
|
#if PS_CHANNEL_FETCH == 1
|
|
|
|
vec4 T = fetch_red();
|
|
|
|
#elif PS_CHANNEL_FETCH == 2
|
|
|
|
vec4 T = fetch_green();
|
|
|
|
#elif PS_CHANNEL_FETCH == 3
|
|
|
|
vec4 T = fetch_blue();
|
|
|
|
#elif PS_CHANNEL_FETCH == 4
|
|
|
|
vec4 T = fetch_alpha();
|
2018-11-25 03:06:04 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 5
|
|
|
|
vec4 T = fetch_rgb();
|
2016-05-20 17:45:20 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 6
|
2016-05-29 08:11:23 +00:00
|
|
|
vec4 T = fetch_gXbY();
|
2016-04-28 20:15:28 +00:00
|
|
|
#elif PS_DEPTH_FMT > 0
|
2016-04-23 10:06:10 +00:00
|
|
|
// Integral coordinate
|
2017-07-02 16:54:43 +00:00
|
|
|
vec4 T = sample_depth(st_int);
|
2016-04-23 10:06:10 +00:00
|
|
|
#else
|
|
|
|
vec4 T = sample_color(st);
|
2016-02-17 21:49:05 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2016-04-24 08:54:39 +00:00
|
|
|
vec4 C = tfx(T, PSin.c);
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
atst(C);
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2016-02-17 21:49:05 +00:00
|
|
|
fog(C, PSin.t_float.z);
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
return C;
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
void ps_fbmask(inout vec4 C)
|
2015-06-21 06:47:45 +00:00
|
|
|
{
|
2015-08-20 22:33:45 +00:00
|
|
|
// FIXME do I need special case for 16 bits
|
2015-06-21 06:47:45 +00:00
|
|
|
#if PS_FBMASK
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
|
|
|
|
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
|
2015-06-21 06:47:45 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2021-11-30 11:54:42 +00:00
|
|
|
void ps_dither(inout vec3 C)
|
2019-09-15 18:49:34 +00:00
|
|
|
{
|
|
|
|
#if PS_DITHER
|
|
|
|
#if PS_DITHER == 2
|
|
|
|
ivec2 fpos = ivec2(gl_FragCoord.xy);
|
|
|
|
#else
|
2021-12-21 11:00:24 +00:00
|
|
|
ivec2 fpos = ivec2(gl_FragCoord.xy / float(PS_SCALE_FACTOR));
|
2019-09-15 18:49:34 +00:00
|
|
|
#endif
|
2021-11-30 11:54:42 +00:00
|
|
|
C += DitherMatrix[fpos.y&3][fpos.x&3];
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void ps_color_clamp_wrap(inout vec3 C)
|
|
|
|
{
|
|
|
|
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
|
|
|
|
// so we need to limit the color depth on dithered items
|
2022-02-11 17:02:51 +00:00
|
|
|
#if SW_BLEND || PS_DITHER || PS_FBMASK
|
2021-11-30 11:54:42 +00:00
|
|
|
|
|
|
|
// Correct the Color value based on the output format
|
|
|
|
#if PS_COLCLIP == 0 && PS_HDR == 0
|
|
|
|
// Standard Clamp
|
|
|
|
C = clamp(C, vec3(0.0f), vec3(255.0f));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// FIXME rouding of negative float?
|
|
|
|
// compiler uses trunc but it might need floor
|
|
|
|
|
|
|
|
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
|
|
|
|
// GS: Color = 1, Alpha = 255 => output 1
|
|
|
|
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
|
|
|
|
#if PS_DFMT == FMT_16
|
|
|
|
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
|
|
|
C = vec3(ivec3(C) & ivec3(0xF8));
|
|
|
|
#elif PS_COLCLIP == 1 && PS_HDR == 0
|
|
|
|
C = vec3(ivec3(C) & ivec3(0xFF));
|
|
|
|
#endif
|
|
|
|
|
2019-09-15 18:49:34 +00:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-07-18 11:40:10 +00:00
|
|
|
void ps_blend(inout vec4 Color, float As)
|
2015-05-08 18:27:13 +00:00
|
|
|
{
|
2015-07-13 13:19:33 +00:00
|
|
|
#if SW_BLEND
|
2022-01-08 17:43:28 +00:00
|
|
|
|
|
|
|
// PABE
|
|
|
|
#if PS_PABE
|
|
|
|
// No blending so early exit
|
|
|
|
if (As < 1.0f)
|
|
|
|
return;
|
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
|
2015-07-18 11:40:10 +00:00
|
|
|
|
2015-05-26 14:16:36 +00:00
|
|
|
#if PS_DFMT == FMT_24
|
2015-08-20 22:33:45 +00:00
|
|
|
float Ad = 1.0f;
|
2015-05-26 14:16:36 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
// FIXME FMT_16 case
|
|
|
|
// FIXME Ad or Ad * 2?
|
|
|
|
float Ad = RT.a / 128.0f;
|
2015-05-26 14:16:36 +00:00
|
|
|
#endif
|
2015-07-18 11:40:10 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Let the compiler do its jobs !
|
|
|
|
vec3 Cd = RT.rgb;
|
|
|
|
vec3 Cs = Color.rgb;
|
2015-05-20 07:07:01 +00:00
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#if PS_BLEND_A == 0
|
|
|
|
vec3 A = Cs;
|
|
|
|
#elif PS_BLEND_A == 1
|
|
|
|
vec3 A = Cd;
|
|
|
|
#else
|
|
|
|
vec3 A = vec3(0.0f);
|
|
|
|
#endif
|
2015-05-20 07:07:01 +00:00
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#if PS_BLEND_B == 0
|
|
|
|
vec3 B = Cs;
|
|
|
|
#elif PS_BLEND_B == 1
|
|
|
|
vec3 B = Cd;
|
|
|
|
#else
|
|
|
|
vec3 B = vec3(0.0f);
|
|
|
|
#endif
|
2015-05-20 07:07:01 +00:00
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#if PS_BLEND_C == 0
|
|
|
|
float C = As;
|
|
|
|
#elif PS_BLEND_C == 1
|
|
|
|
float C = Ad;
|
|
|
|
#else
|
|
|
|
float C = Af;
|
|
|
|
#endif
|
2015-05-20 07:07:01 +00:00
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#if PS_BLEND_D == 0
|
|
|
|
vec3 D = Cs;
|
|
|
|
#elif PS_BLEND_D == 1
|
|
|
|
vec3 D = Cd;
|
|
|
|
#else
|
|
|
|
vec3 D = vec3(0.0f);
|
|
|
|
#endif
|
2015-05-20 07:07:01 +00:00
|
|
|
|
2021-12-26 17:12:09 +00:00
|
|
|
// As/Af clamp alpha for Blend mix
|
|
|
|
#if PS_ALPHA_CLAMP
|
|
|
|
C = min(C, float(1.0f));
|
|
|
|
#endif
|
|
|
|
|
2015-07-13 13:19:33 +00:00
|
|
|
#if PS_BLEND_A == PS_BLEND_B
|
2015-07-18 11:40:10 +00:00
|
|
|
Color.rgb = D;
|
2015-07-13 13:19:33 +00:00
|
|
|
#else
|
2015-07-18 15:16:46 +00:00
|
|
|
Color.rgb = trunc((A - B) * C + D);
|
2015-05-19 22:51:37 +00:00
|
|
|
#endif
|
|
|
|
|
2022-01-23 11:39:01 +00:00
|
|
|
#else
|
|
|
|
// Needed for Cd * (As/Ad/F + 1) blending modes
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
#if PS_CLR_HW == 1 || PS_CLR_HW == 5
|
2022-01-24 10:11:38 +00:00
|
|
|
Color.rgb = vec3(255.0f);
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
#elif PS_CLR_HW == 2 || PS_CLR_HW == 4
|
|
|
|
// Cd*As,Cd*Ad or Cd*F
|
2022-01-23 11:39:01 +00:00
|
|
|
|
2022-02-01 18:19:20 +00:00
|
|
|
#if PS_BLEND_C == 2
|
2022-01-23 11:39:01 +00:00
|
|
|
float Alpha = Af;
|
|
|
|
#else
|
|
|
|
float Alpha = As;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
|
|
|
|
Color.rgb *= vec3(255.0f);
|
2022-02-01 18:19:20 +00:00
|
|
|
#elif PS_CLR_HW == 3
|
2022-01-26 02:05:06 +00:00
|
|
|
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
|
|
|
|
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
|
|
|
|
|
|
|
|
Color.rgb *= (255.0f / 128.0f);
|
2022-01-23 11:39:01 +00:00
|
|
|
#endif
|
|
|
|
|
2015-06-21 06:47:45 +00:00
|
|
|
#endif
|
2015-07-13 13:19:33 +00:00
|
|
|
}
|
2015-05-08 18:27:13 +00:00
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
void ps_main()
|
|
|
|
{
|
2021-12-22 10:56:38 +00:00
|
|
|
#if PS_SCANMSK & 2
|
|
|
|
// fail depth test on prohibited lines
|
|
|
|
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
|
|
|
|
discard;
|
|
|
|
#endif
|
2016-04-07 17:28:22 +00:00
|
|
|
#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)
|
2015-08-04 18:08:33 +00:00
|
|
|
|
|
|
|
#if PS_WRITE_RG == 1
|
2015-08-20 22:33:45 +00:00
|
|
|
// Pseudo 16 bits access.
|
|
|
|
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;
|
2015-05-09 17:54:01 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
2015-08-04 18:08:33 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if (PS_DATE & 3) == 1
|
2015-08-20 22:33:45 +00:00
|
|
|
// DATM == 0: Pixel with alpha equal to 1 will failed
|
|
|
|
bool bad = (127.5f / 255.0f) < rt_a;
|
2015-08-04 18:08:33 +00:00
|
|
|
#elif (PS_DATE & 3) == 2
|
2015-08-20 22:33:45 +00:00
|
|
|
// DATM == 1: Pixel with alpha equal to 0 will failed
|
|
|
|
bool bad = rt_a < (127.5f / 255.0f);
|
2015-08-04 18:08:33 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
if (bad) {
|
2016-04-07 17:28:22 +00:00
|
|
|
#if PS_DATE >= 5 || defined(DISABLE_GL42_image)
|
2015-08-20 22:33:45 +00:00
|
|
|
discard;
|
2015-05-09 17:54:01 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));
|
|
|
|
return;
|
2015-05-09 17:54:01 +00:00
|
|
|
#endif
|
2015-08-20 22:33:45 +00:00
|
|
|
}
|
2015-08-04 18:08:33 +00:00
|
|
|
|
2015-04-24 18:13:38 +00:00
|
|
|
#endif
|
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
#if PS_DATE == 3 && !defined(DISABLE_GL42_image)
|
2015-08-20 22:33:45 +00:00
|
|
|
int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;
|
|
|
|
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
|
|
|
|
// the bad alpha value so we must keep it.
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
if (gl_PrimitiveID > stencil_ceil) {
|
|
|
|
discard;
|
|
|
|
}
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
vec4 C = ps_color();
|
2015-04-17 18:18:07 +00:00
|
|
|
#if (APITRACE_DEBUG & 1) == 1
|
2015-08-20 22:33:45 +00:00
|
|
|
C.r = 255f;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
#if (APITRACE_DEBUG & 2) == 2
|
2015-08-20 22:33:45 +00:00
|
|
|
C.g = 255f;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
#if (APITRACE_DEBUG & 4) == 4
|
2015-08-20 22:33:45 +00:00
|
|
|
C.b = 255f;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
#if (APITRACE_DEBUG & 8) == 8
|
2015-08-20 22:33:45 +00:00
|
|
|
C.a = 128f;
|
2015-06-06 11:56:08 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if PS_SHUFFLE
|
2015-08-20 22:33:45 +00:00
|
|
|
uvec4 denorm_c = uvec4(C);
|
|
|
|
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
|
2015-06-06 11:56:08 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Write RB part. Mask will take care of the correct destination
|
2015-06-06 11:56:08 +00:00
|
|
|
#if PS_READ_BA
|
2015-08-20 22:33:45 +00:00
|
|
|
C.rb = C.bb;
|
2015-06-06 11:56:08 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
C.rb = C.rr;
|
2015-06-06 11:56:08 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// FIXME precompute my_TA & 0x80
|
2015-07-17 15:53:00 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Write GA part. Mask will take care of the correct destination
|
|
|
|
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
|
|
|
|
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
|
2015-06-06 11:56:08 +00:00
|
|
|
#if PS_READ_BA
|
2015-08-20 22:33:45 +00:00
|
|
|
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
|
|
|
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
|
|
|
|
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
|
|
|
|
// c.ga = vec2(float(denorm_c.a));
|
2015-07-18 09:22:08 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
if (bool(denorm_c.a & 0x80u))
|
|
|
|
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
|
|
|
else
|
|
|
|
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
2015-07-17 15:53:00 +00:00
|
|
|
|
2015-06-06 11:56:08 +00:00
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
if (bool(denorm_c.g & 0x80u))
|
|
|
|
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
|
|
|
else
|
|
|
|
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
2015-07-17 15:53:00 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Nice idea but step/mix requires 4 instructions
|
|
|
|
// set / trunc / I2F / Mad
|
|
|
|
//
|
|
|
|
// float sel = step(128.0f, c.g);
|
|
|
|
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
|
|
|
|
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
|
2015-06-06 11:56:08 +00:00
|
|
|
#endif
|
|
|
|
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Must be done before alpha correction
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
#if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
|
|
|
|
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
|
|
|
|
float alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
|
|
|
|
#else
|
2015-08-20 22:33:45 +00:00
|
|
|
float alpha_blend = C.a / 128.0f;
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Correct the ALPHA value based on the output format
|
2015-07-19 20:43:48 +00:00
|
|
|
#if (PS_DFMT == FMT_16)
|
2015-08-20 22:33:45 +00:00
|
|
|
float A_one = 128.0f; // alpha output will be 0x80
|
|
|
|
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
|
2015-05-26 14:16:36 +00:00
|
|
|
#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
|
2015-08-20 22:33:45 +00:00
|
|
|
if(C.a < 128.0f) C.a += 128.0f;
|
2015-04-24 15:51:29 +00:00
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
// Get first primitive that will write a failling alpha value
|
2015-04-17 18:18:07 +00:00
|
|
|
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
|
2015-08-20 22:33:45 +00:00
|
|
|
// DATM == 0
|
|
|
|
// Pixel with alpha equal to 1 will failed (128-255)
|
|
|
|
if (C.a > 127.5f) {
|
|
|
|
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
|
|
|
|
}
|
2015-10-16 19:03:08 +00:00
|
|
|
return;
|
2015-04-17 18:18:07 +00:00
|
|
|
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
|
2015-08-20 22:33:45 +00:00
|
|
|
// DATM == 1
|
|
|
|
// Pixel with alpha equal to 0 will failed (0-127)
|
|
|
|
if (C.a < 127.5f) {
|
|
|
|
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
|
|
|
|
}
|
2015-10-16 19:03:08 +00:00
|
|
|
return;
|
2015-04-17 18:18:07 +00:00
|
|
|
#endif
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
ps_blend(C, alpha_blend);
|
2015-05-08 18:27:13 +00:00
|
|
|
|
2021-11-30 11:54:42 +00:00
|
|
|
ps_dither(C.rgb);
|
|
|
|
|
|
|
|
// Color clamp/wrap needs to be done after sw blending and dithering
|
|
|
|
ps_color_clamp_wrap(C.rgb);
|
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
ps_fbmask(C);
|
2015-06-21 06:47:45 +00:00
|
|
|
|
2015-08-20 22:33:45 +00:00
|
|
|
SV_Target0 = C / 255.0f;
|
|
|
|
SV_Target1 = vec4(alpha_blend);
|
2020-06-06 15:21:03 +00:00
|
|
|
|
|
|
|
#if PS_ZCLAMP
|
|
|
|
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
|
|
|
|
#endif
|
2015-04-17 18:18:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|