2023-07-20 13:23:09 +00:00
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2023 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
2015-04-17 18:18:07 +00:00
//#version 420 // Keep it for text editor detection
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
2015-08-08 11:34:55 +00:00
2015-04-17 18:18:07 +00:00
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
// to detect bad sampling due to upscaling
//#define TEX_COORD_DEBUG
2015-07-17 18:57:32 +00:00
// Just copy directly the texture coordinate
#ifdef TEX_COORD_DEBUG
#define PS_TFX 1
#define PS_TCC 1
#endif
2015-04-17 18:18:07 +00:00
2015-07-13 13:19:33 +00:00
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
2022-03-07 14:36:05 +00:00
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
2023-03-10 12:02:18 +00:00
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
2022-08-31 09:17:53 +00:00
#define PS_PRIMID_INIT (PS_DATE == 1 || PS_DATE == 2)
#define NEEDS_RT_EARLY (PS_TEX_IS_FB == 1 || PS_DATE >= 5)
#define NEEDS_RT (NEEDS_RT_EARLY || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
2023-03-26 10:09:42 +00:00
#define NEEDS_TEX (PS_TFX != 4)
2015-07-13 13:19:33 +00:00
2023-03-26 10:09:42 +00:00
layout(std140, binding = 0) uniform cb21
{
2023-04-13 03:23:31 +00:00
vec3 FogColor;
float AREF;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
vec4 WH;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
vec2 TA;
float MaxDepthPS;
float Af;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
uvec4 FbMask;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
vec4 HalfTexel;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
vec4 MinMax;
vec4 STRange;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
ivec4 ChannelShuffle;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
vec2 TC_OffsetHack;
vec2 STScale;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
mat4 DitherMatrix;
2023-03-26 10:09:42 +00:00
2023-04-13 03:23:31 +00:00
float ScaledScaleFactor;
float RcpScaleFactor;
2023-03-26 10:09:42 +00:00
};
2015-04-17 18:18:07 +00:00
in SHADER
{
2023-04-13 03:23:31 +00:00
vec4 t_float;
vec4 t_int;
#if PS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
2015-04-17 18:18:07 +00:00
} PSin;
2022-03-07 14:36:05 +00:00
#define TARGET_0_QUALIFIER out
// Only enable framebuffer fetch when we actually need it.
2022-08-31 09:17:53 +00:00
#if HAS_FRAMEBUFFER_FETCH && NEEDS_RT
2023-04-13 03:23:31 +00:00
// We need to force the colour to be defined here, to read from it.
// Basically the only scenario where this'll happen is RGBA masked and DATE is active.
#undef PS_NO_COLOR
#define PS_NO_COLOR 0
#if defined(GL_EXT_shader_framebuffer_fetch)
#undef TARGET_0_QUALIFIER
#define TARGET_0_QUALIFIER inout
#define LAST_FRAG_COLOR SV_Target0
#elif defined(GL_ARM_shader_framebuffer_fetch)
#define LAST_FRAG_COLOR gl_LastFragColorARM
#endif
2022-03-07 14:36:05 +00:00
#endif
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2021-12-31 07:29:26 +00:00
#if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1
2023-04-13 03:23:31 +00:00
// Same buffer but 2 colors for dual source blending
layout(location = 0, index = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
2022-03-07 14:36:05 +00:00
#else
2023-04-13 03:23:31 +00:00
layout(location = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
2022-03-07 14:36:05 +00:00
#endif
2022-03-20 08:25:25 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-03-26 10:09:42 +00:00
#if NEEDS_TEX
layout(binding = 0) uniform sampler2D TextureSampler;
2015-04-17 18:18:07 +00:00
layout(binding = 1) uniform sampler2D PaletteSampler;
2023-03-26 10:09:42 +00:00
#endif
2022-03-07 14:36:05 +00:00
2022-08-31 09:17:53 +00:00
#if !HAS_FRAMEBUFFER_FETCH && NEEDS_RT
2022-01-30 07:10:10 +00:00
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
2022-03-07 14:36:05 +00:00
#endif
2015-04-17 18:18:07 +00:00
2022-08-31 20:16:31 +00:00
#if PS_DATE == 3
layout(binding = 3) uniform sampler2D img_prim_min;
2015-06-26 18:03:15 +00:00
2015-05-01 18:04:23 +00:00
// I don't remember why I set this parameter but it is surely useless
//layout(pixel_center_integer) in vec4 gl_FragCoord;
2015-04-17 18:18:07 +00:00
#endif
2022-08-31 09:17:53 +00:00
vec4 fetch_rt()
2015-04-17 18:18:07 +00:00
{
2022-08-31 09:17:53 +00:00
#if !NEEDS_RT
2023-04-13 03:23:31 +00:00
return vec4(0.0);
2022-08-31 09:17:53 +00:00
#elif HAS_FRAMEBUFFER_FETCH
2023-04-13 03:23:31 +00:00
return LAST_FRAG_COLOR;
2022-03-07 14:36:05 +00:00
#else
2023-04-13 03:23:31 +00:00
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
2022-03-07 14:36:05 +00:00
#endif
2022-08-31 09:17:53 +00:00
}
2023-03-26 10:09:42 +00:00
#if NEEDS_TEX
2022-08-31 09:17:53 +00:00
vec4 sample_c(vec2 uv)
{
#if PS_TEX_IS_FB == 1
2023-04-13 03:23:31 +00:00
return fetch_rt();
2023-03-17 13:20:06 +00:00
#elif PS_REGION_RECT
return texelFetch(TextureSampler, ivec2(uv), 0);
2016-09-08 21:26:38 +00:00
#else
2016-10-06 18:15:50 +00:00
2018-08-21 22:51:19 +00:00
#if PS_POINT_SAMPLER
2023-04-13 03:23:31 +00:00
// Weird issue with ATI/AMD cards,
// it looks like they add 127/128 of a texel to sampling coordinates
// occasionally causing point sampling to erroneously round up.
// I'm manually adjusting coordinates to the centre of texels here,
// though the centre is just paranoia, the top left corner works fine.
// As of 2018 this issue is still present.
uv = (trunc(uv * WH.zw) + vec2(0.5, 0.5)) / WH.zw;
2018-08-21 22:51:19 +00:00
#endif
2023-01-31 10:50:45 +00:00
#if !PS_ADJS && !PS_ADJT
uv *= STScale;
#else
#if PS_ADJS
uv.x = (uv.x - STRange.x) * STRange.z;
#else
uv.x = uv.x * STScale.x;
#endif
#if PS_ADJT
uv.y = (uv.y - STRange.y) * STRange.w;
#else
uv.y = uv.y * STScale.y;
#endif
#endif
2018-08-21 22:51:19 +00:00
2016-10-06 18:15:50 +00:00
#if PS_AUTOMATIC_LOD == 1
2023-04-13 03:23:31 +00:00
return texture(TextureSampler, uv);
2016-10-06 18:15:50 +00:00
#elif PS_MANUAL_LOD == 1
2023-04-13 03:23:31 +00:00
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
float K = MinMax.x;
float L = MinMax.y;
float bias = MinMax.z;
float max_lod = MinMax.w;
float gs_lod = K - log2(abs(PSin.t_float.w)) * L;
// FIXME max useful ?
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
float lod = min(gs_lod, max_lod) - bias;
return textureLod(TextureSampler, uv, lod);
2016-10-06 18:15:50 +00:00
#else
2023-04-13 03:23:31 +00:00
return textureLod(TextureSampler, uv, 0.0f); // No lod
2016-10-06 18:15:50 +00:00
#endif
2016-09-08 21:26:38 +00:00
#endif
2015-04-17 18:18:07 +00:00
}
2023-01-03 11:13:50 +00:00
vec4 sample_p(uint idx)
2015-04-17 18:18:07 +00:00
{
2023-01-03 11:13:50 +00:00
return texelFetch(PaletteSampler, ivec2(int(idx), 0), 0);
}
vec4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
2015-04-17 18:18:07 +00:00
}
2015-08-14 18:57:45 +00:00
vec4 clamp_wrap_uv(vec4 uv)
2015-04-17 18:18:07 +00:00
{
2023-04-13 03:23:31 +00:00
vec4 uv_out = uv;
vec4 tex_size = WH.xyxy;
2015-04-17 18:18:07 +00:00
2015-04-24 15:51:29 +00:00
#if PS_WMS == PS_WMT
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMS == 0
uv_out = fract(uv);
#elif PS_REGION_RECT == 1 && PS_WMS == 1
uv_out = clamp(uv, vec4(0.0f), vec4(1.0f));
#elif PS_WMS == 2
2023-04-13 03:23:31 +00:00
uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
2015-04-24 15:51:29 +00:00
#elif PS_WMS == 3
2023-04-13 03:23:31 +00:00
#if PS_FST == 0
// wrap negative uv coords to avoid an off by one error that shifted
// textures. Fixes Xenosaga's hair issue.
uv = fract(uv);
#endif
uv_out = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
2015-04-24 15:51:29 +00:00
#endif
#else // PS_WMS != PS_WMT
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMS == 0
uv.xz = fract(uv.xz);
#elif PS_REGION_RECT == 1 && PS_WMS == 1
uv.xz = clamp(uv.xz, vec2(0.0f), vec2(1.0f));
#elif PS_WMS == 2
2023-04-13 03:23:31 +00:00
uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
2015-04-24 15:51:29 +00:00
#elif PS_WMS == 3
2023-04-13 03:23:31 +00:00
#if PS_FST == 0
uv.xz = fract(uv.xz);
#endif
uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
2015-04-24 15:51:29 +00:00
#endif
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMT == 0
uv_out.yw = fract(uv.yw);
#elif PS_REGION_RECT == 1 && PS_WMT == 1
uv_out.yw = clamp(uv.yw, vec2(0.0f), vec2(1.0f));
#elif PS_WMT == 2
2023-04-13 03:23:31 +00:00
uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
2015-04-24 15:51:29 +00:00
#elif PS_WMT == 3
2023-04-13 03:23:31 +00:00
#if PS_FST == 0
uv.yw = fract(uv.yw);
#endif
uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
2015-04-24 15:51:29 +00:00
#endif
2023-03-17 13:20:06 +00:00
#endif
#if PS_REGION_RECT == 1
2023-04-13 03:23:31 +00:00
// Normalized -> Integer Coordinates.
2023-03-17 13:20:06 +00:00
uv_out = clamp(uv_out * WH.zwzw + STRange.xyxy, STRange.xyxy, STRange.zwzw);
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
return uv_out;
2015-04-17 18:18:07 +00:00
}
mat4 sample_4c(vec4 uv)
{
2023-04-13 03:23:31 +00:00
mat4 c;
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// Note: texture gather can't be used because of special clamping/wrapping
// Also it doesn't support lod
c[0] = sample_c(uv.xy);
c[1] = sample_c(uv.zy);
c[2] = sample_c(uv.xw);
c[3] = sample_c(uv.zw);
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
return c;
2015-04-17 18:18:07 +00:00
}
2023-01-03 11:13:50 +00:00
uvec4 sample_4_index(vec4 uv)
2015-04-17 18:18:07 +00:00
{
2023-04-13 03:23:31 +00:00
vec4 c;
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// Either GS will send a texture that contains a single channel
// in this case the red channel is remapped as alpha channel
//
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
2015-05-24 14:50:01 +00:00
2023-04-13 03:23:31 +00:00
// Note: texture gather can't be used because of special clamping/wrapping
// Also it doesn't support lod
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
2015-05-24 14:50:01 +00:00
2023-04-13 03:23:31 +00:00
uvec4 i = uvec4(c * 255.5f); // Denormalize value
2015-05-24 14:50:01 +00:00
2015-08-08 11:34:55 +00:00
#if PS_PAL_FMT == 1
2023-04-13 03:23:31 +00:00
// 4HL
return i & 0xFu;
2015-08-08 11:34:55 +00:00
#elif PS_PAL_FMT == 2
2023-04-13 03:23:31 +00:00
// 4HH
return i >> 4u;
2015-05-24 14:50:01 +00:00
#else
2023-04-13 03:23:31 +00:00
// 8
return i;
2015-05-24 14:50:01 +00:00
#endif
2015-04-17 18:18:07 +00:00
}
2023-01-03 11:13:50 +00:00
mat4 sample_4p(uvec4 u)
2015-04-17 18:18:07 +00:00
{
2023-04-13 03:23:31 +00:00
mat4 c;
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
c[0] = sample_p(u.x);
c[1] = sample_p(u.y);
c[2] = sample_p(u.z);
c[3] = sample_p(u.w);
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
return c;
2015-04-17 18:18:07 +00:00
}
2016-05-04 16:05:45 +00:00
int fetch_raw_depth()
{
2022-07-16 17:29:59 +00:00
#if HAS_CLIP_CONTROL
2023-04-13 03:23:31 +00:00
float multiplier = exp2(32.0f);
2022-07-16 17:29:59 +00:00
#else
2023-04-13 03:23:31 +00:00
float multiplier = exp2(24.0f);
2022-07-16 17:29:59 +00:00
#endif
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
2023-04-13 03:23:31 +00:00
return int(fetch_rt().r * multiplier);
2022-01-30 07:10:10 +00:00
#else
2023-04-13 03:23:31 +00:00
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
2022-01-30 07:10:10 +00:00
#endif
2016-05-04 16:05:45 +00:00
}
vec4 fetch_raw_color()
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
2023-04-13 03:23:31 +00:00
return fetch_rt();
2022-01-30 07:10:10 +00:00
#else
2023-04-13 03:23:31 +00:00
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
2022-01-30 07:10:10 +00:00
#endif
2016-05-04 16:05:45 +00:00
}
2016-04-23 10:06:10 +00:00
vec4 fetch_c(ivec2 uv)
{
2023-04-13 03:23:31 +00:00
return texelFetch(TextureSampler, ivec2(uv), 0);
2016-04-23 10:06:10 +00:00
}
2016-05-04 16:05:45 +00:00
//////////////////////////////////////////////////////////////////////
// Depth sampling
//////////////////////////////////////////////////////////////////////
2016-04-23 10:06:10 +00:00
ivec2 clamp_wrap_uv_depth(ivec2 uv)
{
2023-04-13 03:23:31 +00:00
ivec2 uv_out = uv;
2016-04-23 10:06:10 +00:00
2023-04-13 03:23:31 +00:00
// Keep the full precision
// It allow to multiply the ScalingFactor before the 1/16 coeff
ivec4 mask = floatBitsToInt(MinMax) << 4;
2016-04-23 10:06:10 +00:00
#if PS_WMS == PS_WMT
#if PS_WMS == 2
2023-04-13 03:23:31 +00:00
uv_out = clamp(uv, mask.xy, mask.zw);
2016-04-23 10:06:10 +00:00
#elif PS_WMS == 3
2023-04-13 03:23:31 +00:00
uv_out = (uv & mask.xy) | mask.zw;
2016-04-23 10:06:10 +00:00
#endif
#else // PS_WMS != PS_WMT
#if PS_WMS == 2
2023-04-13 03:23:31 +00:00
uv_out.x = clamp(uv.x, mask.x, mask.z);
2016-04-23 10:06:10 +00:00
#elif PS_WMS == 3
2023-04-13 03:23:31 +00:00
uv_out.x = (uv.x & mask.x) | mask.z;
2016-04-23 10:06:10 +00:00
#endif
#if PS_WMT == 2
2023-04-13 03:23:31 +00:00
uv_out.y = clamp(uv.y, mask.y, mask.w);
2016-04-23 10:06:10 +00:00
#elif PS_WMT == 3
2023-04-13 03:23:31 +00:00
uv_out.y = (uv.y & mask.y) | mask.w;
2016-04-23 10:06:10 +00:00
#endif
#endif
2023-04-13 03:23:31 +00:00
return uv_out;
2016-04-23 10:06:10 +00:00
}
vec4 sample_depth(vec2 st)
{
2023-04-13 03:23:31 +00:00
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScaledScaleFactor);
2016-04-23 10:06:10 +00:00
2023-04-13 03:23:31 +00:00
#if PS_REGION_RECT == 1
uv_f = clamp(uv_f + STRange.xy, STRange.xy, STRange.zw);
#endif
2023-04-08 08:11:35 +00:00
2023-04-13 03:23:31 +00:00
ivec2 uv = ivec2(uv_f);
vec4 t = vec4(0.0f);
2023-04-08 08:11:35 +00:00
2016-05-06 17:57:42 +00:00
#if PS_TALES_OF_ABYSS_HLE == 1
2023-04-13 03:23:31 +00:00
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth();
2016-05-06 17:57:42 +00:00
2023-04-13 03:23:31 +00:00
// Convert msb based on the palette
t = texelFetch(PaletteSampler, ivec2((depth >> 8) & 0xFF, 0), 0) * 255.0f;
2016-05-06 17:57:42 +00:00
#elif PS_URBAN_CHAOS_HLE == 1
2023-04-13 03:23:31 +00:00
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
// So it will do a first channel trick to extract lsb, value is right-shifted.
// Then a new channel trick to extract msb which will shifted to the left.
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
// To be faster both steps (msb&lsb) are done in a single pass.
2016-05-04 16:05:45 +00:00
2023-04-13 03:23:31 +00:00
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth();
2016-04-29 23:20:47 +00:00
2023-04-13 03:23:31 +00:00
// Convert lsb based on the palette
t = texelFetch(PaletteSampler, ivec2((depth & 0xFF), 0), 0) * 255.0f;
2016-04-29 23:20:47 +00:00
2023-04-13 03:23:31 +00:00
// Msb is easier
float green = float((depth >> 8) & 0xFF) * 36.0f;
green = min(green, 255.0f);
2016-04-29 23:20:47 +00:00
2023-04-13 03:23:31 +00:00
t.g += green;
2016-04-29 23:20:47 +00:00
#elif PS_DEPTH_FMT == 1
2023-04-13 03:23:31 +00:00
// Based on ps_convert_float32_rgba8 of convert
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
#if HAS_CLIP_CONTROL
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#else
uint d = uint(fetch_c(uv).r * exp2(24.0f));
#endif
t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
2016-04-23 10:06:10 +00:00
#elif PS_DEPTH_FMT == 2
2023-04-13 03:23:31 +00:00
// Based on ps_convert_float16_rgb5a1 of convert
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
#if HAS_CLIP_CONTROL
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#else
uint d = uint(fetch_c(uv).r * exp2(24.0f));
#endif
t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
2016-04-23 10:06:10 +00:00
#elif PS_DEPTH_FMT == 3
2023-04-13 03:23:31 +00:00
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
t = fetch_c(uv) * 255.0f;
2016-04-23 10:06:10 +00:00
#endif
2016-04-29 23:20:47 +00:00
2023-04-13 03:23:31 +00:00
// warning t ranges from 0 to 255
2016-04-23 10:06:10 +00:00
#if (PS_AEM_FMT == FMT_24)
2023-04-13 03:23:31 +00:00
t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
2016-04-23 10:06:10 +00:00
#elif (PS_AEM_FMT == FMT_16)
2023-04-13 03:23:31 +00:00
t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
2016-04-23 10:06:10 +00:00
#endif
2023-04-13 03:23:31 +00:00
return t;
2016-04-23 10:06:10 +00:00
}
2016-04-28 20:15:28 +00:00
//////////////////////////////////////////////////////////////////////
// Fetch a Single Channel
//////////////////////////////////////////////////////////////////////
vec4 fetch_red()
{
2016-04-30 14:07:45 +00:00
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
2023-04-13 03:23:31 +00:00
int depth = (fetch_raw_depth()) & 0xFF;
vec4 rt = vec4(depth) / 255.0f;
2016-04-30 14:07:45 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 rt = fetch_raw_color();
2016-04-30 14:07:45 +00:00
#endif
2023-04-13 03:23:31 +00:00
return sample_p_norm(rt.r) * 255.0f;
2016-04-28 20:15:28 +00:00
}
2021-12-05 04:57:43 +00:00
vec4 fetch_green()
2016-04-28 20:15:28 +00:00
{
2016-04-30 14:07:45 +00:00
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
2023-04-13 03:23:31 +00:00
int depth = (fetch_raw_depth() >> 8) & 0xFF;
vec4 rt = vec4(depth) / 255.0f;
2016-04-30 14:07:45 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 rt = fetch_raw_color();
2016-04-30 14:07:45 +00:00
#endif
2023-04-13 03:23:31 +00:00
return sample_p_norm(rt.g) * 255.0f;
2016-04-28 20:15:28 +00:00
}
2021-12-05 04:57:43 +00:00
vec4 fetch_blue()
2016-04-28 20:15:28 +00:00
{
2021-12-05 04:57:43 +00:00
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
2023-04-13 03:23:31 +00:00
int depth = (fetch_raw_depth() >> 16) & 0xFF;
vec4 rt = vec4(depth) / 255.0f;
2021-12-05 04:57:43 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 rt = fetch_raw_color();
2021-12-05 04:57:43 +00:00
#endif
2023-04-13 03:23:31 +00:00
return sample_p_norm(rt.b) * 255.0f;
2016-04-28 20:15:28 +00:00
}
vec4 fetch_alpha()
{
2023-04-13 03:23:31 +00:00
vec4 rt = fetch_raw_color();
return sample_p_norm(rt.a) * 255.0f;
2016-04-28 20:15:28 +00:00
}
2016-05-06 13:18:22 +00:00
vec4 fetch_rgb()
{
2023-04-13 03:23:31 +00:00
vec4 rt = fetch_raw_color();
vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0f);
return c * 255.0f;
2016-05-06 13:18:22 +00:00
}
2016-05-29 08:11:23 +00:00
vec4 fetch_gXbY()
2016-05-20 17:55:39 +00:00
{
2016-05-30 17:39:52 +00:00
#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2
2023-04-13 03:23:31 +00:00
int depth = fetch_raw_depth();
int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF;
return vec4(bg);
2016-05-30 17:39:52 +00:00
#else
2023-04-13 03:23:31 +00:00
ivec4 rt = ivec4(fetch_raw_color() * 255.0f);
int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z;
int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x;
return vec4(green | blue);
2016-05-30 17:39:52 +00:00
#endif
2016-05-20 17:45:20 +00:00
}
2016-04-23 10:06:10 +00:00
//////////////////////////////////////////////////////////////////////
2016-02-17 21:49:05 +00:00
vec4 sample_color(vec2 st)
2015-04-17 18:18:07 +00:00
{
2015-04-24 15:51:29 +00:00
#if (PS_TCOFFSETHACK == 1)
2023-04-13 03:23:31 +00:00
st += TC_OffsetHack.xy;
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
vec4 t;
mat4 c;
vec2 dd;
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// FIXME I'm not sure this condition is useful (I think code will be optimized)
2023-03-17 13:20:06 +00:00
#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2)
2023-04-13 03:23:31 +00:00
// No software LTF and pure 32 bits RGBA texure without special texture wrapping
c[0] = sample_c(st);
2015-04-17 18:18:07 +00:00
#ifdef TEX_COORD_DEBUG
2023-04-13 03:23:31 +00:00
c[0].rg = st.xy;
2015-04-17 18:18:07 +00:00
#endif
2015-04-24 15:51:29 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 uv;
2015-08-20 22:33:45 +00:00
2023-04-13 03:23:31 +00:00
if(PS_LTF != 0)
{
uv = st.xyxy + HalfTexel;
dd = fract(uv.xy * WH.zw);
2015-10-22 16:36:45 +00:00
#if (PS_FST == 0)
2023-04-13 03:23:31 +00:00
// Background in Shin Megami Tensei Lucifers
// I suspect that uv isn't a standard number, so fract is outside of the [0;1] range
// Note: it is free on GPU but let's do it only for float coordinate
dd = clamp(dd, vec2(0.0f), vec2(1.0f));
2015-10-22 16:36:45 +00:00
#endif
2023-04-13 03:23:31 +00:00
}
else
{
uv = st.xyxy;
}
2015-08-20 22:33:45 +00:00
2023-04-13 03:23:31 +00:00
uv = clamp_wrap_uv(uv);
2015-08-20 22:33:45 +00:00
2015-08-08 11:34:55 +00:00
#if PS_PAL_FMT != 0
2023-04-13 03:23:31 +00:00
c = sample_4p(sample_4_index(uv));
2015-08-08 11:34:55 +00:00
#else
2023-04-13 03:23:31 +00:00
c = sample_4c(uv);
2015-08-08 11:34:55 +00:00
#endif
2015-04-17 18:18:07 +00:00
#ifdef TEX_COORD_DEBUG
2023-04-13 03:23:31 +00:00
c[0].rg = uv.xy;
c[1].rg = uv.xy;
c[2].rg = uv.xy;
c[3].rg = uv.xy;
2015-04-17 18:18:07 +00:00
#endif
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// PERF note: using dot product reduces by 1 the number of instruction
// but I'm not sure it is equivalent neither faster.
for (int i = 0; i < 4; i++)
{
//float sum = dot(c[i].rgb, vec3(1.0f));
2015-08-08 11:34:55 +00:00
#if (PS_AEM_FMT == FMT_24)
2023-04-13 03:23:31 +00:00
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
2015-08-08 11:34:55 +00:00
#elif (PS_AEM_FMT == FMT_16)
2023-04-13 03:23:31 +00:00
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
2015-04-24 15:51:29 +00:00
#endif
2023-04-13 03:23:31 +00:00
}
2015-04-17 18:18:07 +00:00
2015-04-24 15:51:29 +00:00
#if(PS_LTF != 0)
2023-04-13 03:23:31 +00:00
t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
2015-04-24 15:51:29 +00:00
#else
2023-04-13 03:23:31 +00:00
t = c[0];
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// The 0.05f helps to fix the overbloom of sotc
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
// interpolation could be slightly below the correct one.
return trunc(t * 255.0f + 0.05f);
2015-04-17 18:18:07 +00:00
}
2023-03-26 10:09:42 +00:00
#endif // NEEDS_TEX
2015-07-18 11:40:10 +00:00
vec4 tfx(vec4 T, vec4 C)
2015-04-17 18:18:07 +00:00
{
2023-04-13 03:23:31 +00:00
vec4 C_out;
2023-07-28 13:23:53 +00:00
vec4 FxT = trunc((C * T) / 128.0f);
2015-07-17 18:05:31 +00:00
2015-04-24 15:51:29 +00:00
#if (PS_TFX == 0)
2023-04-13 03:23:31 +00:00
C_out = FxT;
2015-04-24 15:51:29 +00:00
#elif (PS_TFX == 1)
2023-04-13 03:23:31 +00:00
C_out = T;
2015-04-24 15:51:29 +00:00
#elif (PS_TFX == 2)
2023-04-13 03:23:31 +00:00
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a + C.a;
2015-04-24 15:51:29 +00:00
#elif (PS_TFX == 3)
2023-04-13 03:23:31 +00:00
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a;
2015-07-17 18:05:31 +00:00
#else
2023-04-13 03:23:31 +00:00
C_out = C;
2015-07-17 18:05:31 +00:00
#endif
2015-04-17 18:18:07 +00:00
2015-07-17 18:05:31 +00:00
#if (PS_TCC == 0)
2023-04-13 03:23:31 +00:00
C_out.a = C.a;
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2015-07-18 11:40:10 +00:00
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
2023-04-13 03:23:31 +00:00
// Clamp only when it is useful
C_out = min(C_out, 255.0f);
2015-07-18 11:40:10 +00:00
#endif
2015-07-17 18:57:32 +00:00
2023-04-13 03:23:31 +00:00
return C_out;
2015-04-17 18:18:07 +00:00
}
2015-07-18 11:40:10 +00:00
void atst(vec4 C)
2015-04-17 18:18:07 +00:00
{
2023-04-13 03:23:31 +00:00
float a = C.a;
2015-04-17 18:18:07 +00:00
2016-06-10 23:08:50 +00:00
#if (PS_ATST == 0)
2023-04-13 03:23:31 +00:00
// nothing to do
2016-06-10 23:08:50 +00:00
#elif (PS_ATST == 1)
2023-04-13 03:23:31 +00:00
if (a > AREF) discard;
2016-06-10 23:08:50 +00:00
#elif (PS_ATST == 2)
2023-04-13 03:23:31 +00:00
if (a < AREF) discard;
2016-06-10 23:08:50 +00:00
#elif (PS_ATST == 3)
2023-04-13 03:23:31 +00:00
if (abs(a - AREF) > 0.5f) discard;
2016-06-10 23:08:50 +00:00
#elif (PS_ATST == 4)
2023-04-13 03:23:31 +00:00
if (abs(a - AREF) < 0.5f) discard;
2016-06-10 23:08:50 +00:00
#endif
2015-04-17 18:18:07 +00:00
}
2015-07-18 11:40:10 +00:00
void fog(inout vec4 C, float f)
2015-04-17 18:18:07 +00:00
{
2015-04-24 15:51:29 +00:00
#if PS_FOG != 0
2023-04-13 03:23:31 +00:00
C.rgb = trunc(mix(FogColor, C.rgb, f));
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
}
vec4 ps_color()
{
2023-04-13 03:23:31 +00:00
//FIXME: maybe we can set gl_Position.w = q in VS
2023-01-31 10:50:45 +00:00
#if (PS_FST == 0)
2023-04-13 03:23:31 +00:00
vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
vec2 st_int = PSin.t_int.zw / vec2(PSin.t_float.w);
2016-02-17 21:49:05 +00:00
#else
2023-04-13 03:23:31 +00:00
// Note xy are normalized coordinate
vec2 st = PSin.t_int.xy;
vec2 st_int = PSin.t_int.zw;
2016-04-23 10:06:10 +00:00
#endif
2023-03-26 10:09:42 +00:00
#if !NEEDS_TEX
2023-04-13 03:23:31 +00:00
vec4 T = vec4(0.0);
2023-03-26 10:09:42 +00:00
#elif PS_CHANNEL_FETCH == 1
2023-04-13 03:23:31 +00:00
vec4 T = fetch_red();
2016-04-28 20:15:28 +00:00
#elif PS_CHANNEL_FETCH == 2
2023-04-13 03:23:31 +00:00
vec4 T = fetch_green();
2016-04-28 20:15:28 +00:00
#elif PS_CHANNEL_FETCH == 3
2023-04-13 03:23:31 +00:00
vec4 T = fetch_blue();
2016-04-28 20:15:28 +00:00
#elif PS_CHANNEL_FETCH == 4
2023-04-13 03:23:31 +00:00
vec4 T = fetch_alpha();
2018-11-25 03:06:04 +00:00
#elif PS_CHANNEL_FETCH == 5
2023-04-13 03:23:31 +00:00
vec4 T = fetch_rgb();
2016-05-20 17:45:20 +00:00
#elif PS_CHANNEL_FETCH == 6
2023-04-13 03:23:31 +00:00
vec4 T = fetch_gXbY();
2016-04-28 20:15:28 +00:00
#elif PS_DEPTH_FMT > 0
2023-04-13 03:23:31 +00:00
// Integral coordinate
vec4 T = sample_depth(st_int);
2016-04-23 10:06:10 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 T = sample_color(st);
2016-02-17 21:49:05 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
vec4 C = tfx(T, PSin.c);
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
atst(C);
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
fog(C, PSin.t_float.z);
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
return C;
2015-04-17 18:18:07 +00:00
}
2015-07-18 11:40:10 +00:00
void ps_fbmask(inout vec4 C)
2015-06-21 06:47:45 +00:00
{
2023-04-13 03:23:31 +00:00
// FIXME do I need special case for 16 bits
2015-06-21 06:47:45 +00:00
#if PS_FBMASK
2023-04-13 03:23:31 +00:00
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
2015-06-21 06:47:45 +00:00
#endif
}
2021-11-30 11:54:42 +00:00
void ps_dither(inout vec3 C)
2019-09-15 18:49:34 +00:00
{
#if PS_DITHER
2023-04-13 03:23:31 +00:00
#if PS_DITHER == 2
ivec2 fpos = ivec2(gl_FragCoord.xy);
#else
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
#endif
float value = DitherMatrix[fpos.y&3][fpos.x&3];
#if PS_ROUND_INV
C -= value;
#else
C += value;
#endif
2021-11-30 11:54:42 +00:00
#endif
}
void ps_color_clamp_wrap(inout vec3 C)
{
2023-04-13 03:23:31 +00:00
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
2022-10-09 05:51:41 +00:00
#if SW_BLEND || PS_DITHER || PS_FBMASK
2021-11-30 11:54:42 +00:00
2023-10-19 09:23:59 +00:00
#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV
2023-04-13 03:23:31 +00:00
C += 7.0f; // Need to round up, not down since the shader will invert
2023-02-25 04:01:04 +00:00
#endif
2023-04-13 03:23:31 +00:00
// Correct the Color value based on the output format
2021-11-30 11:54:42 +00:00
#if PS_COLCLIP == 0 && PS_HDR == 0
2023-04-13 03:23:31 +00:00
// Standard Clamp
C = clamp(C, vec3(0.0f), vec3(255.0f));
2021-11-30 11:54:42 +00:00
#endif
2023-04-13 03:23:31 +00:00
// FIXME rouding of negative float?
// compiler uses trunc but it might need floor
2021-11-30 11:54:42 +00:00
2023-04-13 03:23:31 +00:00
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
2023-10-19 09:23:59 +00:00
#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0
2023-04-13 03:23:31 +00:00
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8));
2022-10-09 05:51:41 +00:00
#elif PS_COLCLIP == 1 || PS_HDR == 1
2023-04-13 03:23:31 +00:00
C = vec3(ivec3(C) & ivec3(0xFF));
2021-11-30 11:54:42 +00:00
#endif
2019-09-15 18:49:34 +00:00
#endif
}
2023-02-21 16:37:29 +00:00
void ps_blend(inout vec4 Color, inout vec4 As_rgba)
2015-05-08 18:27:13 +00:00
{
2023-02-21 16:37:29 +00:00
float As = As_rgba.a;
2015-07-13 13:19:33 +00:00
#if SW_BLEND
2022-01-08 17:43:28 +00:00
2023-04-13 03:23:31 +00:00
// PABE
2022-01-08 17:43:28 +00:00
#if PS_PABE
2023-04-13 03:23:31 +00:00
// No blending so early exit
if (As < 1.0f)
return;
2022-01-08 17:43:28 +00:00
#endif
2022-03-07 14:36:05 +00:00
#if SW_BLEND_NEEDS_RT
2023-04-13 03:23:31 +00:00
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
2023-08-11 23:03:53 +00:00
#else
// Not used, but we define it to make the selection below simpler.
vec4 RT = vec4(0.0f);
#endif
2023-04-13 03:23:31 +00:00
// FIXME FMT_16 case
// FIXME Ad or Ad * 2?
float Ad = RT.a / 128.0f;
2015-07-18 11:40:10 +00:00
2023-04-13 03:23:31 +00:00
// Let the compiler do its jobs !
vec3 Cd = RT.rgb;
2023-08-11 23:03:53 +00:00
vec3 Cs = Color.rgb;
2015-05-20 07:07:01 +00:00
2015-07-13 13:19:33 +00:00
#if PS_BLEND_A == 0
2023-04-13 03:23:31 +00:00
vec3 A = Cs;
2015-07-13 13:19:33 +00:00
#elif PS_BLEND_A == 1
2023-04-13 03:23:31 +00:00
vec3 A = Cd;
2015-07-13 13:19:33 +00:00
#else
2023-04-13 03:23:31 +00:00
vec3 A = vec3(0.0f);
2015-07-13 13:19:33 +00:00
#endif
2015-05-20 07:07:01 +00:00
2015-07-13 13:19:33 +00:00
#if PS_BLEND_B == 0
2023-04-13 03:23:31 +00:00
vec3 B = Cs;
2015-07-13 13:19:33 +00:00
#elif PS_BLEND_B == 1
2023-04-13 03:23:31 +00:00
vec3 B = Cd;
2015-07-13 13:19:33 +00:00
#else
2023-04-13 03:23:31 +00:00
vec3 B = vec3(0.0f);
2015-07-13 13:19:33 +00:00
#endif
2015-05-20 07:07:01 +00:00
2015-07-13 13:19:33 +00:00
#if PS_BLEND_C == 0
2023-04-13 03:23:31 +00:00
float C = As;
2015-07-13 13:19:33 +00:00
#elif PS_BLEND_C == 1
2023-04-13 03:23:31 +00:00
float C = Ad;
2015-07-13 13:19:33 +00:00
#else
2023-04-13 03:23:31 +00:00
float C = Af;
2015-07-13 13:19:33 +00:00
#endif
2015-05-20 07:07:01 +00:00
2015-07-13 13:19:33 +00:00
#if PS_BLEND_D == 0
2023-04-13 03:23:31 +00:00
vec3 D = Cs;
2015-07-13 13:19:33 +00:00
#elif PS_BLEND_D == 1
2023-04-13 03:23:31 +00:00
vec3 D = Cd;
2015-07-13 13:19:33 +00:00
#else
2023-04-13 03:23:31 +00:00
vec3 D = vec3(0.0f);
2015-07-13 13:19:33 +00:00
#endif
2015-05-20 07:07:01 +00:00
2023-04-13 03:23:31 +00:00
// As/Af clamp alpha for Blend mix
// We shouldn't clamp blend mix with blend hw 1 as we want alpha higher
float C_clamped = C;
2023-03-10 09:41:09 +00:00
#if PS_BLEND_MIX > 0 && PS_BLEND_HW != 1
2023-04-13 03:23:31 +00:00
C_clamped = min(C_clamped, 1.0f);
2021-12-26 17:12:09 +00:00
#endif
2015-07-13 13:19:33 +00:00
#if PS_BLEND_A == PS_BLEND_B
2023-04-13 03:23:31 +00:00
Color.rgb = D;
2022-08-16 09:11:17 +00:00
// In blend_mix, HW adds on some alpha factor * dst.
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
// Instead, apply an offset to convert HW's round to a floor.
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
#elif PS_BLEND_MIX == 2
2023-04-13 03:23:31 +00:00
Color.rgb = ((A - B) * C_clamped + D) + (124.0f/256.0f);
2022-08-16 09:11:17 +00:00
#elif PS_BLEND_MIX == 1
2023-04-13 03:23:31 +00:00
Color.rgb = ((A - B) * C_clamped + D) - (124.0f/256.0f);
2015-07-13 13:19:33 +00:00
#else
2023-04-13 03:23:31 +00:00
Color.rgb = trunc((A - B) * C + D);
2015-05-19 22:51:37 +00:00
#endif
2023-03-10 09:41:09 +00:00
#if PS_BLEND_HW == 1
2023-04-13 03:23:31 +00:00
// As or Af
As_rgba.rgb = vec3(C);
// Subtract 1 for alpha to compensate for the changed equation,
// if c.rgb > 255.0f then we further need to adjust alpha accordingly,
// we pick the lowest overflow from all colors because it's the safest,
// we divide by 255 the color because we don't know Cd value,
// changed alpha should only be done for hw blend.
vec3 alpha_compensate = max(vec3(1.0f), Color.rgb / vec3(255.0f));
As_rgba.rgb -= alpha_compensate;
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 2
2023-04-13 03:23:31 +00:00
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
// as that is the minimum color Cd can be,
// then we multiply by alpha to get the minimum
// blended value it can be.
float color_compensate = 1.0f * (C + 1.0f);
Color.rgb -= vec3(color_compensate);
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 3
2023-04-13 03:23:31 +00:00
// As, Ad or Af clamped.
As_rgba.rgb = vec3(C_clamped);
// Cs*(Alpha + 1) might overflow, if it does then adjust alpha value
// that is sent on second output to compensate.
vec3 overflow_check = (Color.rgb - vec3(255.0f)) / 255.0f;
vec3 alpha_compensate = max(vec3(0.0f), overflow_check);
As_rgba.rgb -= alpha_compensate;
2022-08-05 18:54:25 +00:00
#endif
2022-01-23 11:39:01 +00:00
#else
2023-04-13 03:23:31 +00:00
// Needed for Cd * (As/Ad/F + 1) blending modes
2023-03-10 12:02:18 +00:00
#if PS_BLEND_HW == 1
2023-04-13 03:23:31 +00:00
Color.rgb = vec3(255.0f);
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 2
2023-04-13 03:23:31 +00:00
// Cd*As,Cd*Ad or Cd*F
2022-01-23 11:39:01 +00:00
2022-02-01 18:19:20 +00:00
#if PS_BLEND_C == 2
2023-04-13 03:23:31 +00:00
float Alpha = Af;
2022-01-23 11:39:01 +00:00
#else
2023-04-13 03:23:31 +00:00
float Alpha = As;
2022-01-23 11:39:01 +00:00
#endif
2023-04-13 03:23:31 +00:00
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
2023-03-10 09:41:09 +00:00
#elif PS_BLEND_HW == 3
2023-04-13 03:23:31 +00:00
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value when rgb are below 128.
// When any color channel is higher than 128 then adjust the compensation automatically
// to give us more accurate colors, otherwise they will be wrong.
// The higher the value (>128) the lower the compensation will be.
float max_color = max(max(Color.r, Color.g), Color.b);
float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate);
2022-01-23 11:39:01 +00:00
#endif
2015-06-21 06:47:45 +00:00
#endif
2015-07-13 13:19:33 +00:00
}
2015-05-08 18:27:13 +00:00
2015-04-17 18:18:07 +00:00
void ps_main()
{
2021-12-22 10:56:38 +00:00
#if PS_SCANMSK & 2
2023-04-13 03:23:31 +00:00
// fail depth test on prohibited lines
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
discard;
2021-12-22 10:56:38 +00:00
#endif
2022-03-07 14:36:05 +00:00
2022-08-31 20:16:31 +00:00
#if PS_DATE >= 5
2015-08-04 18:08:33 +00:00
#if PS_WRITE_RG == 1
2023-04-13 03:23:31 +00:00
// Pseudo 16 bits access.
float rt_a = fetch_rt().g;
2022-03-07 14:36:05 +00:00
#else
2023-04-13 03:23:31 +00:00
float rt_a = fetch_rt().a;
2022-03-07 14:36:05 +00:00
#endif
2015-08-04 18:08:33 +00:00
#if (PS_DATE & 3) == 1
2023-04-13 03:23:31 +00:00
// DATM == 0: Pixel with alpha equal to 1 will failed
bool bad = (127.5f / 255.0f) < rt_a;
2015-08-04 18:08:33 +00:00
#elif (PS_DATE & 3) == 2
2023-04-13 03:23:31 +00:00
// DATM == 1: Pixel with alpha equal to 0 will failed
bool bad = rt_a < (127.5f / 255.0f);
2015-08-04 18:08:33 +00:00
#endif
2023-04-13 03:23:31 +00:00
if (bad) {
discard;
}
2015-08-04 18:08:33 +00:00
2015-04-24 18:13:38 +00:00
#endif
2022-08-31 20:16:31 +00:00
#if PS_DATE == 3
2023-04-13 03:23:31 +00:00
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
if (gl_PrimitiveID > stencil_ceil) {
discard;
}
2015-04-17 18:18:07 +00:00
#endif
2023-04-13 03:23:31 +00:00
vec4 C = ps_color();
2015-06-06 11:56:08 +00:00
#if PS_SHUFFLE
2023-04-13 03:23:31 +00:00
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
2023-08-09 09:02:01 +00:00
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
2023-08-03 00:26:30 +00:00
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
2023-08-09 09:02:01 +00:00
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
2023-08-03 00:26:30 +00:00
#else
C.ga = C.rg;
#endif
2023-08-09 09:02:01 +00:00
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
2023-02-16 10:25:46 +00:00
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
2023-08-09 09:02:01 +00:00
// Write RB part. Mask will take care of the correct destination
#elif PS_READ_BA
2023-04-13 03:23:31 +00:00
C.rb = C.bb;
// FIXME precompute my_TA & 0x80
2015-07-17 15:53:00 +00:00
2023-04-13 03:23:31 +00:00
// Write GA part. Mask will take care of the correct destination
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
2023-08-09 09:02:01 +00:00
2023-04-13 03:23:31 +00:00
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
// c.ga = vec2(float(denorm_c.a));
2015-07-18 09:22:08 +00:00
2023-04-13 03:23:31 +00:00
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
2015-07-17 15:53:00 +00:00
2015-06-06 11:56:08 +00:00
#else
2023-08-09 09:02:01 +00:00
C.rb = C.rr;
2023-04-13 03:23:31 +00:00
if (bool(denorm_c.g & 0x80u))
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
// Nice idea but step/mix requires 4 instructions
// set / trunc / I2F / Mad
//
// float sel = step(128.0f, c.g);
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
2015-06-06 11:56:08 +00:00
2023-08-03 00:26:30 +00:00
#endif // PS_SHUFFLE_SAME
2023-02-16 10:25:46 +00:00
#endif // PS_SHUFFLE
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// Must be done before alpha correction
2022-07-16 17:26:29 +00:00
2023-04-13 03:23:31 +00:00
// AA (Fixed one) will output a coverage of 1.0 as alpha
2022-07-16 17:26:29 +00:00
#if PS_FIXED_ONE_A
2023-04-13 03:23:31 +00:00
C.a = 128.0f;
2022-07-16 17:26:29 +00:00
#endif
2022-08-31 09:17:53 +00:00
#if SW_AD_TO_HW
2023-04-13 03:23:31 +00:00
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
vec4 alpha_blend = vec4(RT.a / 128.0f);
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
#else
2023-04-13 03:23:31 +00:00
vec4 alpha_blend = vec4(C.a / 128.0f);
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// Correct the ALPHA value based on the output format
2023-10-19 09:23:59 +00:00
#if (PS_DST_FMT == FMT_16)
2023-04-13 03:23:31 +00:00
float A_one = 128.0f; // alpha output will be 0x80
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
2023-10-19 09:23:59 +00:00
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
2023-04-13 03:23:31 +00:00
if(C.a < 128.0f) C.a += 128.0f;
2015-04-24 15:51:29 +00:00
#endif
2015-04-17 18:18:07 +00:00
2023-04-13 03:23:31 +00:00
// Get first primitive that will write a failling alpha value
2022-08-31 20:16:31 +00:00
#if PS_DATE == 1
2023-04-13 03:23:31 +00:00
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
2022-08-31 20:16:31 +00:00
#elif PS_DATE == 2
2023-04-13 03:23:31 +00:00
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
2015-04-17 18:18:07 +00:00
#endif
2023-04-13 03:23:31 +00:00
ps_blend(C, alpha_blend);
2015-05-08 18:27:13 +00:00
2023-04-13 03:23:31 +00:00
ps_dither(C.rgb);
2021-11-30 11:54:42 +00:00
2023-04-13 03:23:31 +00:00
// Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C.rgb);
2021-11-30 11:54:42 +00:00
2023-04-13 03:23:31 +00:00
ps_fbmask(C);
2015-06-21 06:47:45 +00:00
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2023-04-13 03:23:31 +00:00
#if PS_HDR == 1
SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
#else
SV_Target0 = C / 255.0f;
#endif
#if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1
SV_Target1 = alpha_blend;
#endif
2020-06-06 15:21:03 +00:00
2023-04-13 03:23:31 +00:00
#if PS_NO_ABLEND
// write alpha blend factor into col0
SV_Target0.a = alpha_blend.a;
#endif
#if PS_ONLY_ALPHA
// rgb isn't used
SV_Target0.rgb = vec3(0.0f);
#endif
2022-03-20 08:25:25 +00:00
#endif
2021-12-31 07:29:26 +00:00
2020-06-06 15:21:03 +00:00
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
2022-11-25 22:18:20 +00:00
#endif
2015-04-17 18:18:07 +00:00
}