2024-07-30 11:42:36 +00:00
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
2023-07-20 13:23:09 +00:00
2021-10-21 08:45:27 +00:00
//////////////////////////////////////////////////////////////////////
// Vertex Shader
//////////////////////////////////////////////////////////////////////
2023-04-07 07:55:55 +00:00
#if defined(VERTEX_SHADER)
2021-10-21 08:45:27 +00:00
layout(std140, set = 0, binding = 0) uniform cb0
{
vec2 VertexScale;
vec2 VertexOffset;
vec2 TextureScale;
vec2 TextureOffset;
vec2 PointSize;
uint MaxDepth;
uint pad_cb0;
};
layout(location = 0) out VSOutput
{
vec4 t;
vec4 ti;
#if VS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} vsOut;
2023-04-07 07:55:55 +00:00
#if VS_EXPAND == 0
layout(location = 0) in vec2 a_st;
layout(location = 1) in uvec4 a_c;
layout(location = 2) in float a_q;
layout(location = 3) in uvec2 a_p;
layout(location = 4) in uint a_z;
layout(location = 5) in uvec2 a_uv;
layout(location = 6) in vec4 a_f;
2021-10-21 08:45:27 +00:00
void main()
{
// Clamp to max depth, gs doesn't wrap
2023-04-07 07:55:55 +00:00
uint z = min(a_z, MaxDepth);
2021-10-21 08:45:27 +00:00
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
2023-04-07 07:55:55 +00:00
gl_Position = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
2021-10-21 08:45:27 +00:00
gl_Position.xy = gl_Position.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
gl_Position.z *= exp2(-32.0f); // integer->float depth
gl_Position.y = -gl_Position.y;
#if VS_TME
vec2 uv = a_uv - TextureOffset;
vec2 st = a_st - TextureOffset;
// Integer nomalized
vsOut.ti.xy = uv * TextureScale;
#if VS_FST
// Integer integral
vsOut.ti.zw = uv;
#else
// float for post-processing in some games
vsOut.ti.zw = st / TextureScale;
#endif
// Float coords
vsOut.t.xy = st;
vsOut.t.w = a_q;
#else
vsOut.t = vec4(0.0f, 0.0f, 0.0f, 1.0f);
vsOut.ti = vec4(0.0f);
#endif
#if VS_POINT_SIZE
2023-03-12 11:05:25 +00:00
gl_PointSize = PointSize.x;
2021-10-21 08:45:27 +00:00
#endif
2023-04-07 07:55:55 +00:00
vsOut.c = vec4(a_c);
2021-10-21 08:45:27 +00:00
vsOut.t.z = a_f.r;
}
2023-04-07 07:55:55 +00:00
#else // VS_EXPAND
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
struct RawVertex
2021-10-21 08:45:27 +00:00
{
2023-04-07 07:55:55 +00:00
vec2 ST;
uint RGBA;
float Q;
uint XY;
uint Z;
uint UV;
uint FOG;
};
layout(std140, set = 0, binding = 2) readonly buffer VertexBuffer {
RawVertex vertex_buffer[];
};
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
struct ProcessedVertex
2021-10-21 08:45:27 +00:00
{
2023-04-07 07:55:55 +00:00
vec4 p;
2021-10-21 08:45:27 +00:00
vec4 t;
vec4 ti;
2023-04-07 07:55:55 +00:00
vec4 c;
};
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
ProcessedVertex load_vertex(uint index)
2021-10-21 08:45:27 +00:00
{
2023-04-07 07:55:55 +00:00
RawVertex rvtx = vertex_buffer[gl_BaseVertexARB + index];
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vec2 a_st = rvtx.ST;
2023-04-15 04:05:40 +00:00
uvec4 a_c = uvec4(bitfieldExtract(rvtx.RGBA, 0, 8), bitfieldExtract(rvtx.RGBA, 8, 8),
bitfieldExtract(rvtx.RGBA, 16, 8), bitfieldExtract(rvtx.RGBA, 24, 8));
2023-04-07 07:55:55 +00:00
float a_q = rvtx.Q;
2023-04-15 04:05:40 +00:00
uvec2 a_p = uvec2(bitfieldExtract(rvtx.XY, 0, 16), bitfieldExtract(rvtx.XY, 16, 16));
2023-04-07 07:55:55 +00:00
uint a_z = rvtx.Z;
2023-04-15 04:05:40 +00:00
uvec2 a_uv = uvec2(bitfieldExtract(rvtx.UV, 0, 16), bitfieldExtract(rvtx.UV, 16, 16));
2023-04-07 07:55:55 +00:00
vec4 a_f = unpackUnorm4x8(rvtx.FOG);
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
ProcessedVertex vtx;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
uint z = min(a_z, MaxDepth);
vtx.p = vec4(a_p, float(z), 1.0f) - vec4(0.05f, 0.05f, 0, 0);
vtx.p.xy = vtx.p.xy * vec2(VertexScale.x, -VertexScale.y) - vec2(VertexOffset.x, -VertexOffset.y);
vtx.p.z *= exp2(-32.0f); // integer->float depth
vtx.p.y = -vtx.p.y;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
#if VS_TME
vec2 uv = a_uv - TextureOffset;
vec2 st = a_st - TextureOffset;
vtx.ti.xy = uv * TextureScale;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
#if VS_FST
vtx.ti.zw = uv;
#else
vtx.ti.zw = st / TextureScale;
#endif
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vtx.t.xy = st;
vtx.t.w = a_q;
#else
vtx.t = vec4(0.0f, 0.0f, 0.0f, 1.0f);
vtx.ti = vec4(0.0f);
#endif
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vtx.c = a_c;
vtx.t.z = a_f.r;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
return vtx;
}
2021-10-21 08:45:27 +00:00
void main()
{
2023-04-07 07:55:55 +00:00
ProcessedVertex vtx;
uint vid = uint(gl_VertexIndex - gl_BaseVertexARB);
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
#if VS_EXPAND == 1 // Point
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vtx = load_vertex(vid >> 2);
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vtx.p.x += ((vid & 1u) != 0u) ? PointSize.x : 0.0f;
vtx.p.y += ((vid & 2u) != 0u) ? PointSize.y : 0.0f;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
#elif VS_EXPAND == 2 // Line
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
uint vid_base = vid >> 2;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
bool is_bottom = (vid & 2u) != 0u;
bool is_right = (vid & 1u) != 0u;
#ifdef VS_PROVOKING_VERTEX_LAST
uint vid_other = is_bottom ? vid_base - 1 : vid_base + 1;
#else
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
#endif
vtx = load_vertex(vid_base);
ProcessedVertex other = load_vertex(vid_other);
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
vec2 line_vector = normalize(vtx.p.xy - other.p.xy);
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
vec2 line_width = (line_normal * PointSize) / 2;
// line_normal is inverted for bottom point
vec2 offset = ((uint(is_bottom) ^ uint(is_right)) != 0u) ? line_width : -line_width;
vtx.p.xy += offset;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
#elif VS_EXPAND == 3 // Sprite
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
// Sprite points are always in pairs
uint vid_base = vid >> 1;
uint vid_lt = vid_base & ~1u;
uint vid_rb = vid_base | 1u;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
ProcessedVertex lt = load_vertex(vid_lt);
ProcessedVertex rb = load_vertex(vid_rb);
vtx = rb;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
bool is_right = ((vid & 1u) != 0u);
vtx.p.x = is_right ? lt.p.x : vtx.p.x;
vtx.t.x = is_right ? lt.t.x : vtx.t.x;
vtx.ti.xz = is_right ? lt.ti.xz : vtx.ti.xz;
2021-10-21 08:45:27 +00:00
2023-04-07 07:55:55 +00:00
bool is_bottom = ((vid & 2u) != 0u);
vtx.p.y = is_bottom ? lt.p.y : vtx.p.y;
vtx.t.y = is_bottom ? lt.t.y : vtx.t.y;
vtx.ti.yw = is_bottom ? lt.ti.yw : vtx.ti.yw;
2021-10-21 08:45:27 +00:00
#endif
2023-04-07 07:55:55 +00:00
gl_Position = vtx.p;
vsOut.t = vtx.t;
vsOut.ti = vtx.ti;
vsOut.c = vtx.c;
}
#endif // VS_EXPAND
#endif // VERTEX_SHADER
2021-10-21 08:45:27 +00:00
#ifdef FRAGMENT_SHADER
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
2024-03-03 18:02:03 +00:00
#define SHUFFLE_READ 1
#define SHUFFLE_WRITE 2
#define SHUFFLE_READWRITE 3
2021-10-21 08:45:27 +00:00
#ifndef VS_TME
#define VS_TME 1
#define VS_FST 1
#endif
#ifndef GS_IIP
#define GS_IIP 0
#define GS_PRIM 3
#define GS_POINT 0
#define GS_LINE 0
#endif
#ifndef PS_FST
#define PS_FST 0
#define PS_WMS 0
#define PS_WMT 0
2023-01-31 10:50:45 +00:00
#define PS_ADJS 0
#define PS_ADJT 0
2021-10-21 08:45:27 +00:00
#define PS_FMT FMT_32
#define PS_AEM 0
#define PS_TFX 0
#define PS_TCC 1
#define PS_ATST 1
2024-03-27 09:33:55 +00:00
#define PS_AFAIL 0
2021-10-21 08:45:27 +00:00
#define PS_FOG 0
2023-03-10 09:41:09 +00:00
#define PS_BLEND_HW 0
2023-03-10 12:02:18 +00:00
#define PS_A_MASKED 0
2021-10-21 08:45:27 +00:00
#define PS_FBA 0
#define PS_FBMASK 0
#define PS_LTF 1
#define PS_TCOFFSETHACK 0
#define PS_SHUFFLE 0
2023-08-03 00:26:30 +00:00
#define PS_SHUFFLE_SAME 0
2024-03-03 18:02:03 +00:00
#define PS_PROCESS_BA 0
#define PS_PROCESS_RG 0
#define PS_SHUFFLE_ACROSS 0
2023-08-03 00:26:30 +00:00
#define PS_WRITE_RG 0
2023-02-16 10:25:46 +00:00
#define PS_READ16_SRC 0
2023-10-19 09:23:59 +00:00
#define PS_DST_FMT 0
2021-10-21 08:45:27 +00:00
#define PS_DEPTH_FMT 0
#define PS_PAL_FMT 0
#define PS_CHANNEL_FETCH 0
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
#define PS_HDR 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
#define PS_BLEND_B 0
#define PS_BLEND_C 0
#define PS_BLEND_D 0
2022-07-16 17:26:29 +00:00
#define PS_FIXED_ONE_A 0
2021-10-21 08:45:27 +00:00
#define PS_PABE 0
#define PS_DITHER 0
2024-03-05 19:53:23 +00:00
#define PS_DITHER_ADJUST 0
2021-10-21 08:45:27 +00:00
#define PS_ZCLAMP 0
#define PS_FEEDBACK_LOOP 0
#define PS_TEX_IS_FB 0
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
2023-08-11 23:03:53 +00:00
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
2023-03-10 12:02:18 +00:00
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
2021-10-21 08:45:27 +00:00
2023-11-04 10:00:43 +00:00
#define PS_FEEDBACK_LOOP_IS_NEEDED (PS_TEX_IS_FB == 1 || PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW || (PS_DATE >= 5))
2021-10-21 08:45:27 +00:00
2023-03-26 10:09:42 +00:00
#define NEEDS_TEX (PS_TFX != 4)
2021-10-21 08:45:27 +00:00
layout(std140, set = 0, binding = 1) uniform cb1
{
vec3 FogColor;
float AREF;
vec4 WH;
vec2 TA;
float MaxDepthPS;
float Af;
uvec4 FbMask;
vec4 HalfTexel;
vec4 MinMax;
2024-06-22 21:10:42 +00:00
vec4 LODParams;
2023-01-31 10:50:45 +00:00
vec4 STRange;
2021-10-21 08:45:27 +00:00
ivec4 ChannelShuffle;
vec2 TC_OffsetHack;
2022-01-16 07:05:05 +00:00
vec2 STScale;
2021-10-21 08:45:27 +00:00
mat4 DitherMatrix;
2023-03-12 11:05:25 +00:00
float ScaledScaleFactor;
float RcpScaleFactor;
2021-10-21 08:45:27 +00:00
};
layout(location = 0) in VSOutput
{
vec4 t;
vec4 ti;
#if PS_IIP != 0
vec4 c;
#else
flat vec4 c;
#endif
} vsIn;
2024-03-27 05:29:56 +00:00
#if !PS_NO_COLOR && !PS_NO_COLOR1
2021-10-21 08:45:27 +00:00
layout(location = 0, index = 0) out vec4 o_col0;
layout(location = 0, index = 1) out vec4 o_col1;
2023-03-02 14:23:29 +00:00
#elif !PS_NO_COLOR
2021-10-21 08:45:27 +00:00
layout(location = 0) out vec4 o_col0;
#endif
2023-03-26 10:09:42 +00:00
#if NEEDS_TEX
2021-10-21 08:45:27 +00:00
layout(set = 1, binding = 0) uniform sampler2D Texture;
2023-01-03 11:13:50 +00:00
layout(set = 1, binding = 1) uniform texture2D Palette;
2023-03-26 10:09:42 +00:00
#endif
2021-10-21 08:45:27 +00:00
#if PS_FEEDBACK_LOOP_IS_NEEDED
2024-08-01 22:57:41 +00:00
#if defined(DISABLE_TEXTURE_BARRIER) || defined(HAS_FEEDBACK_LOOP_LAYOUT)
2023-08-08 14:27:12 +00:00
layout(set = 1, binding = 2) uniform texture2D RtSampler;
2022-01-05 11:13:27 +00:00
vec4 sample_from_rt() { return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); }
2023-07-10 15:03:57 +00:00
#else
2023-08-08 14:27:12 +00:00
layout(input_attachment_index = 0, set = 1, binding = 2) uniform subpassInput RtSampler;
2023-07-10 15:03:57 +00:00
vec4 sample_from_rt() { return subpassLoad(RtSampler); }
2022-01-05 11:13:27 +00:00
#endif
2021-10-21 08:45:27 +00:00
#endif
#if PS_DATE > 0
2023-08-08 14:27:12 +00:00
layout(set = 1, binding = 3) uniform texture2D PrimMinTexture;
2021-10-21 08:45:27 +00:00
#endif
2023-03-26 10:09:42 +00:00
#if NEEDS_TEX
2021-10-21 08:45:27 +00:00
vec4 sample_c(vec2 uv)
{
#if PS_TEX_IS_FB
2022-01-05 11:13:27 +00:00
return sample_from_rt();
2023-03-17 13:20:06 +00:00
#elif PS_REGION_RECT
return texelFetch(Texture, ivec2(uv), 0);
2021-10-21 08:45:27 +00:00
#else
2024-07-08 19:29:22 +00:00
2023-01-31 10:50:45 +00:00
#if !PS_ADJS && !PS_ADJT
2022-01-16 07:05:05 +00:00
uv *= STScale;
2023-01-31 10:50:45 +00:00
#else
#if PS_ADJS
uv.x = (uv.x - STRange.x) * STRange.z;
#else
uv.x = uv.x * STScale.x;
#endif
#if PS_ADJT
uv.y = (uv.y - STRange.y) * STRange.w;
#else
uv.y = uv.y * STScale.y;
#endif
#endif
2021-10-21 08:45:27 +00:00
#if PS_AUTOMATIC_LOD == 1
2023-02-25 03:42:13 +00:00
return texture(Texture, uv);
2021-10-21 08:45:27 +00:00
#elif PS_MANUAL_LOD == 1
2023-02-25 03:42:13 +00:00
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
2024-06-22 21:10:42 +00:00
float K = LODParams.x;
float L = LODParams.y;
float bias = LODParams.z;
float max_lod = LODParams.w;
2023-02-25 03:42:13 +00:00
float gs_lod = K - log2(abs(vsIn.t.w)) * L;
// FIXME max useful ?
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
float lod = min(gs_lod, max_lod) - bias;
return textureLod(Texture, uv, lod);
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
return textureLod(Texture, uv, 0); // No lod
2021-10-21 08:45:27 +00:00
#endif
#endif
}
2023-01-03 11:13:50 +00:00
vec4 sample_p(uint idx)
2021-10-21 08:45:27 +00:00
{
2023-01-03 11:13:50 +00:00
return texelFetch(Palette, ivec2(int(idx), 0), 0);
}
vec4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
2021-10-21 08:45:27 +00:00
}
vec4 clamp_wrap_uv(vec4 uv)
{
2023-01-31 10:50:45 +00:00
vec4 tex_size = WH.xyxy;
2021-10-21 08:45:27 +00:00
#if PS_WMS == PS_WMT
{
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMS == 0
{
uv = fract(uv);
}
#elif PS_REGION_RECT == 1 && PS_WMS == 1
{
uv = clamp(uv, vec4(0.0f), vec4(1.0f));
}
#elif PS_WMS == 2
2021-10-21 08:45:27 +00:00
{
uv = clamp(uv, MinMax.xyxy, MinMax.zwzw);
}
#elif PS_WMS == 3
{
#if PS_FST == 0
// wrap negative uv coords to avoid an off by one error that shifted
// textures. Fixes Xenosaga's hair issue.
uv = fract(uv);
#endif
2023-01-31 10:50:45 +00:00
uv = vec4((uvec4(uv * tex_size) & floatBitsToUint(MinMax.xyxy)) | floatBitsToUint(MinMax.zwzw)) / tex_size;
2021-10-21 08:45:27 +00:00
}
#endif
}
#else
{
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMS == 0
{
uv.xz = fract(uv.xz);
}
#elif PS_REGION_RECT == 1 && PS_WMS == 1
{
uv.xz = clamp(uv.xz, vec2(0.0f), vec2(1.0f));
}
#elif PS_WMS == 2
2021-10-21 08:45:27 +00:00
{
uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
}
#elif PS_WMS == 3
{
#if PS_FST == 0
uv.xz = fract(uv.xz);
#endif
2023-01-31 10:50:45 +00:00
uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & floatBitsToUint(MinMax.xx)) | floatBitsToUint(MinMax.zz)) / tex_size.xx;
2021-10-21 08:45:27 +00:00
}
#endif
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1 && PS_WMT == 0
{
uv.yw = fract(uv.yw);
}
#elif PS_REGION_RECT == 1 && PS_WMT == 1
{
uv.yw = clamp(uv.yw, vec2(0.0f), vec2(1.0f));
}
#elif PS_WMT == 2
2021-10-21 08:45:27 +00:00
{
uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
}
#elif PS_WMT == 3
{
#if PS_FST == 0
uv.yw = fract(uv.yw);
#endif
2023-01-31 10:50:45 +00:00
uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & floatBitsToUint(MinMax.yy)) | floatBitsToUint(MinMax.ww)) / tex_size.yy;
2021-10-21 08:45:27 +00:00
}
#endif
}
#endif
2023-03-17 13:20:06 +00:00
#if PS_REGION_RECT == 1
// Normalized -> Integer Coordinates.
uv = clamp(uv * WH.zwzw + STRange.xyxy, STRange.xyxy, STRange.zwzw);
#endif
2021-10-21 08:45:27 +00:00
return uv;
}
mat4 sample_4c(vec4 uv)
{
mat4 c;
c[0] = sample_c(uv.xy);
c[1] = sample_c(uv.zy);
c[2] = sample_c(uv.xw);
c[3] = sample_c(uv.zw);
return c;
}
2023-01-03 11:13:50 +00:00
uvec4 sample_4_index(vec4 uv)
2021-10-21 08:45:27 +00:00
{
vec4 c;
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
// Denormalize value
2024-03-03 12:43:05 +00:00
#if PS_RTA_SRC_CORRECTION
2024-04-28 10:12:34 +00:00
uvec4 i = uvec4(round(c * 128.25f));
2024-03-03 12:43:05 +00:00
#else
2023-01-19 18:21:11 +00:00
uvec4 i = uvec4(c * 255.5f);
2024-03-03 12:43:05 +00:00
#endif
2021-10-21 08:45:27 +00:00
#if PS_PAL_FMT == 1
// 4HL
2023-01-19 18:21:11 +00:00
return i & 0xFu;
2021-10-21 08:45:27 +00:00
#elif PS_PAL_FMT == 2
// 4HH
2023-01-19 18:21:11 +00:00
return i >> 4u;
2023-01-03 11:13:50 +00:00
#else
// 8
return i;
2021-10-21 08:45:27 +00:00
#endif
}
2023-01-03 11:13:50 +00:00
mat4 sample_4p(uvec4 u)
2021-10-21 08:45:27 +00:00
{
mat4 c;
c[0] = sample_p(u.x);
c[1] = sample_p(u.y);
c[2] = sample_p(u.z);
c[3] = sample_p(u.w);
return c;
}
int fetch_raw_depth(ivec2 xy)
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB
2022-01-05 11:13:27 +00:00
vec4 col = sample_from_rt();
2022-01-30 07:10:10 +00:00
#else
vec4 col = texelFetch(Texture, xy, 0);
#endif
2021-10-21 08:45:27 +00:00
return int(col.r * exp2(32.0f));
}
vec4 fetch_raw_color(ivec2 xy)
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB
2022-01-05 11:13:27 +00:00
return sample_from_rt();
2022-01-30 07:10:10 +00:00
#else
return texelFetch(Texture, xy, 0);
#endif
2021-10-21 08:45:27 +00:00
}
vec4 fetch_c(ivec2 uv)
{
2023-03-12 11:05:25 +00:00
#if PS_TEX_IS_FB
return sample_from_rt();
#else
2021-10-21 08:45:27 +00:00
return texelFetch(Texture, uv, 0);
2023-03-12 11:05:25 +00:00
#endif
2021-10-21 08:45:27 +00:00
}
//////////////////////////////////////////////////////////////////////
// Depth sampling
//////////////////////////////////////////////////////////////////////
ivec2 clamp_wrap_uv_depth(ivec2 uv)
{
2023-01-31 10:50:45 +00:00
ivec4 mask = floatBitsToInt(MinMax) << 4;
2021-10-21 08:45:27 +00:00
#if (PS_WMS == PS_WMT)
{
#if (PS_WMS == 2)
{
uv = clamp(uv, mask.xy, mask.zw);
}
#elif (PS_WMS == 3)
{
uv = (uv & mask.xy) | mask.zw;
}
#endif
}
#else
{
#if (PS_WMS == 2)
{
uv.x = clamp(uv.x, mask.x, mask.z);
}
#elif (PS_WMS == 3)
{
uv.x = (uv.x & mask.x) | mask.z;
}
#endif
#if (PS_WMT == 2)
{
uv.y = clamp(uv.y, mask.y, mask.w);
}
#elif (PS_WMT == 3)
{
uv.y = (uv.y & mask.y) | mask.w;
}
#endif
}
#endif
return uv;
}
vec4 sample_depth(vec2 st, ivec2 pos)
{
2023-03-12 11:05:25 +00:00
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScaledScaleFactor);
2021-10-21 08:45:27 +00:00
2023-04-08 08:11:35 +00:00
#if PS_REGION_RECT == 1
uv_f = clamp(uv_f + STRange.xy, STRange.xy, STRange.zw);
#endif
ivec2 uv = ivec2(uv_f);
2021-10-21 08:45:27 +00:00
vec4 t = vec4(0.0f);
#if (PS_TALES_OF_ABYSS_HLE == 1)
{
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth(pos);
// Convert msb based on the palette
t = texelFetch(Palette, ivec2((depth >> 8) & 0xFF, 0), 0) * 255.0f;
}
#elif (PS_URBAN_CHAOS_HLE == 1)
{
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
// So it will do a first channel trick to extract lsb, value is right-shifted.
// Then a new channel trick to extract msb which will shifted to the left.
// OpenGL uses a vec32 format for the depth so it requires a couple of conversion.
// To be faster both steps (msb&lsb) are done in a single pass.
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth(pos);
// Convert lsb based on the palette
2022-01-30 07:10:10 +00:00
t = texelFetch(Palette, ivec2(depth & 0xFF, 0), 0) * 255.0f;
2021-10-21 08:45:27 +00:00
// Msb is easier
float green = float(((depth >> 8) & 0xFF) * 36.0f);
green = min(green, 255.0f);
t.g += green;
}
#elif (PS_DEPTH_FMT == 1)
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float32_rgba8 of convert
2021-10-21 08:45:27 +00:00
// Convert a vec32 depth texture into a RGBA color texture
2022-02-12 04:50:35 +00:00
uint d = uint(fetch_c(uv).r * exp2(32.0f));
t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
2021-10-21 08:45:27 +00:00
}
#elif (PS_DEPTH_FMT == 2)
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float16_rgb5a1 of convert
2021-10-21 08:45:27 +00:00
// Convert a vec32 (only 16 lsb) depth into a RGB5A1 color texture
2022-02-12 04:50:35 +00:00
uint d = uint(fetch_c(uv).r * exp2(32.0f));
t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
2021-10-21 08:45:27 +00:00
}
#elif (PS_DEPTH_FMT == 3)
{
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
t = fetch_c(uv) * 255.0f;
}
#endif
#if (PS_AEM_FMT == FMT_24)
{
t.a = ((PS_AEM == 0) || any(bvec3(t.rgb))) ? 255.0f * TA.x : 0.0f;
}
#elif (PS_AEM_FMT == FMT_16)
{
t.a = t.a >= 128.0f ? 255.0f * TA.y : ((PS_AEM == 0) || any(bvec3(t.rgb))) ? 255.0f * TA.x : 0.0f;
}
2024-01-30 15:03:16 +00:00
#elif PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE
{
t = trunc(sample_4p(uvec4(t.aaaa))[0] * 255.0f + 0.05f);
}
2021-10-21 08:45:27 +00:00
#endif
return t;
}
//////////////////////////////////////////////////////////////////////
// Fetch a Single Channel
//////////////////////////////////////////////////////////////////////
vec4 fetch_red(ivec2 xy)
{
vec4 rt;
#if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2)
int depth = (fetch_raw_depth(xy)) & 0xFF;
rt = vec4(float(depth) / 255.0f);
#else
rt = fetch_raw_color(xy);
#endif
2023-01-03 11:13:50 +00:00
return sample_p_norm(rt.r) * 255.0f;
2021-10-21 08:45:27 +00:00
}
vec4 fetch_green(ivec2 xy)
{
vec4 rt;
#if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2)
int depth = (fetch_raw_depth(xy) >> 8) & 0xFF;
rt = vec4(float(depth) / 255.0f);
#else
rt = fetch_raw_color(xy);
#endif
2023-01-03 11:13:50 +00:00
return sample_p_norm(rt.g) * 255.0f;
2021-10-21 08:45:27 +00:00
}
vec4 fetch_blue(ivec2 xy)
{
vec4 rt;
#if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2)
int depth = (fetch_raw_depth(xy) >> 16) & 0xFF;
rt = vec4(float(depth) / 255.0f);
#else
rt = fetch_raw_color(xy);
#endif
2023-01-03 11:13:50 +00:00
return sample_p_norm(rt.b) * 255.0f;
2021-10-21 08:45:27 +00:00
}
vec4 fetch_alpha(ivec2 xy)
{
vec4 rt = fetch_raw_color(xy);
2023-01-03 11:13:50 +00:00
return sample_p_norm(rt.a) * 255.0f;
2021-10-21 08:45:27 +00:00
}
vec4 fetch_rgb(ivec2 xy)
{
vec4 rt = fetch_raw_color(xy);
2023-01-03 11:13:50 +00:00
vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0);
2021-10-21 08:45:27 +00:00
return c * 255.0f;
}
vec4 fetch_gXbY(ivec2 xy)
{
#if (PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2)
int depth = fetch_raw_depth(xy);
int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF;
return vec4(bg);
#else
ivec4 rt = ivec4(fetch_raw_color(xy) * 255.0);
int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z;
int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x;
return vec4(float(green | blue));
#endif
}
vec4 sample_color(vec2 st)
{
#if PS_TCOFFSETHACK
st += TC_OffsetHack.xy;
#endif
vec4 t;
mat4 c;
vec2 dd;
2023-03-17 13:20:06 +00:00
#if PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2
2021-10-21 08:45:27 +00:00
{
c[0] = sample_c(st);
}
#else
{
vec4 uv;
#if PS_LTF
{
uv = st.xyxy + HalfTexel;
dd = fract(uv.xy * WH.zw);
#if PS_FST == 0
{
dd = clamp(dd, vec2(0.0f), vec2(0.9999999f));
}
#endif
}
#else
{
uv = st.xyxy;
}
#endif
uv = clamp_wrap_uv(uv);
#if PS_PAL_FMT != 0
c = sample_4p(sample_4_index(uv));
#else
c = sample_4c(uv);
#endif
}
#endif
for (uint i = 0; i < 4; i++)
{
#if (PS_AEM_FMT == FMT_24)
c[i].a = (PS_AEM == 0 || any(bvec3(c[i].rgb))) ? TA.x : 0.0f;
#elif (PS_AEM_FMT == FMT_16)
2024-01-14 04:41:24 +00:00
c[i].a = (c[i].a >= 0.5) ? TA.y : ((PS_AEM == 0 || any(bvec3(ivec3(c[i].rgb * 255.0f) & ivec3(0xF8)))) ? TA.x : 0.0f);
2021-10-21 08:45:27 +00:00
#endif
}
#if PS_LTF
{
t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
}
#else
{
t = c[0];
}
#endif
2024-03-03 12:43:05 +00:00
#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION
t.a = t.a * (128.5f / 255.0f);
#endif
2021-10-21 08:45:27 +00:00
return trunc(t * 255.0f + 0.05f);
}
2023-03-26 10:09:42 +00:00
#endif // NEEDS_TEX
2021-10-21 08:45:27 +00:00
vec4 tfx(vec4 T, vec4 C)
{
vec4 C_out;
2023-07-28 13:23:53 +00:00
vec4 FxT = trunc((C * T) / 128.0f);
2021-10-21 08:45:27 +00:00
#if (PS_TFX == 0)
C_out = FxT;
#elif (PS_TFX == 1)
C_out = T;
#elif (PS_TFX == 2)
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a + C.a;
#elif (PS_TFX == 3)
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a;
#else
C_out = C;
#endif
#if (PS_TCC == 0)
C_out.a = C.a;
#endif
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
// Clamp only when it is useful
C_out = min(C_out, 255.0f);
#endif
return C_out;
}
2024-03-27 09:33:55 +00:00
bool atst(vec4 C)
2021-10-21 08:45:27 +00:00
{
float a = C.a;
2024-03-27 09:33:55 +00:00
#if (PS_ATST == 1)
2021-10-21 08:45:27 +00:00
{
2024-03-27 09:33:55 +00:00
return (a <= AREF);
2021-10-21 08:45:27 +00:00
}
#elif (PS_ATST == 2)
{
2024-03-27 09:33:55 +00:00
return (a >= AREF);
2021-10-21 08:45:27 +00:00
}
#elif (PS_ATST == 3)
{
2024-03-27 09:33:55 +00:00
return (abs(a - AREF) <= 0.5f);
2021-10-21 08:45:27 +00:00
}
#elif (PS_ATST == 4)
{
2024-03-27 09:33:55 +00:00
return (abs(a - AREF) >= 0.5f);
}
#else
{
// nothing to do
return true;
2021-10-21 08:45:27 +00:00
}
#endif
}
vec4 fog(vec4 c, float f)
{
#if PS_FOG
c.rgb = trunc(mix(FogColor, c.rgb, f));
#endif
return c;
}
vec4 ps_color()
{
2023-01-31 10:50:45 +00:00
#if PS_FST == 0
2021-10-21 08:45:27 +00:00
vec2 st = vsIn.t.xy / vsIn.t.w;
vec2 st_int = vsIn.ti.zw / vsIn.t.w;
#else
vec2 st = vsIn.ti.xy;
vec2 st_int = vsIn.ti.zw;
#endif
2023-03-26 10:09:42 +00:00
#if !NEEDS_TEX
vec4 T = vec4(0.0f);
#elif PS_CHANNEL_FETCH == 1
2021-10-21 08:45:27 +00:00
vec4 T = fetch_red(ivec2(gl_FragCoord.xy));
#elif PS_CHANNEL_FETCH == 2
vec4 T = fetch_green(ivec2(gl_FragCoord.xy));
#elif PS_CHANNEL_FETCH == 3
vec4 T = fetch_blue(ivec2(gl_FragCoord.xy));
#elif PS_CHANNEL_FETCH == 4
vec4 T = fetch_alpha(ivec2(gl_FragCoord.xy));
#elif PS_CHANNEL_FETCH == 5
vec4 T = fetch_rgb(ivec2(gl_FragCoord.xy));
#elif PS_CHANNEL_FETCH == 6
vec4 T = fetch_gXbY(ivec2(gl_FragCoord.xy));
#elif PS_DEPTH_FMT > 0
vec4 T = sample_depth(st_int, ivec2(gl_FragCoord.xy));
#else
vec4 T = sample_color(st);
#endif
2024-06-04 12:59:36 +00:00
#if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME
2024-01-07 10:39:13 +00:00
uvec4 denorm_c_before = uvec4(T);
2024-03-03 18:02:03 +00:00
#if (PS_PROCESS_BA & SHUFFLE_READ)
2024-08-10 20:08:39 +00:00
T.r = float((denorm_c_before.b << 3) & 0xF8u);
T.g = float(((denorm_c_before.b >> 2) & 0x38u) | ((denorm_c_before.a << 6) & 0xC0u));
T.b = float((denorm_c_before.a << 1) & 0xF8u);
T.a = float(denorm_c_before.a & 0x80u);
2024-01-07 10:39:13 +00:00
#else
2024-08-10 20:08:39 +00:00
T.r = float((denorm_c_before.r << 3) & 0xF8u);
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0u));
T.b = float((denorm_c_before.g << 1) & 0xF8u);
T.a = float(denorm_c_before.g & 0x80u);
2024-01-07 10:39:13 +00:00
#endif
2024-06-04 12:59:36 +00:00
T.a = ((T.a >= 127.5f) ? TA.y : ((PS_AEM == 0 || any(bvec3(ivec3(T.rgb) & ivec3(0xF8)))) ? TA.x : 0.0f)) * 255.0f;
2024-01-07 10:39:13 +00:00
#endif
2021-10-21 08:45:27 +00:00
vec4 C = tfx(T, vsIn.c);
C = fog(C, vsIn.t.z);
return C;
}
void ps_fbmask(inout vec4 C)
{
#if PS_FBMASK
2022-01-05 11:13:27 +00:00
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
2022-10-09 05:51:41 +00:00
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));
2021-10-21 08:45:27 +00:00
#endif
}
2024-03-07 14:45:25 +00:00
void ps_dither(inout vec3 C, float As)
2021-10-21 08:45:27 +00:00
{
2024-05-23 14:09:57 +00:00
#if PS_DITHER > 0 && PS_DITHER < 3
2021-10-21 08:45:27 +00:00
ivec2 fpos;
#if PS_DITHER == 2
fpos = ivec2(gl_FragCoord.xy);
#else
2023-03-12 11:05:25 +00:00
fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
2021-10-21 08:45:27 +00:00
#endif
2023-02-25 04:01:04 +00:00
float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
2024-03-05 19:53:23 +00:00
// The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend
// so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither.
#if PS_DITHER_ADJUST
2024-03-07 14:45:25 +00:00
#if PS_BLEND_C == 2
float Alpha = Af;
#else
float Alpha = As;
#endif
value *= Alpha > 0.0f ? min(1.0f / Alpha, 1.0f) : 1.0f;
2024-03-05 19:53:23 +00:00
#endif
2023-02-25 04:01:04 +00:00
#if PS_ROUND_INV
C -= value;
#else
C += value;
#endif
2021-10-21 08:45:27 +00:00
#endif
}
void ps_color_clamp_wrap(inout vec3 C)
{
2023-02-25 03:42:13 +00:00
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
2024-05-23 14:09:57 +00:00
#if SW_BLEND || (PS_DITHER > 0 && PS_DITHER < 3) || PS_FBMASK
2021-10-21 08:45:27 +00:00
2023-10-19 09:23:59 +00:00
#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV
2023-02-25 04:01:04 +00:00
C += 7.0f; // Need to round up, not down since the shader will invert
#endif
2023-02-25 03:42:13 +00:00
// Correct the Color value based on the output format
2021-10-21 08:45:27 +00:00
#if PS_COLCLIP == 0 && PS_HDR == 0
2023-02-25 03:42:13 +00:00
// Standard Clamp
C = clamp(C, vec3(0.0f), vec3(255.0f));
2021-10-21 08:45:27 +00:00
#endif
2023-02-25 03:42:13 +00:00
// FIXME rouding of negative float?
// compiler uses trunc but it might need floor
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
2024-05-23 14:09:57 +00:00
#if PS_DST_FMT == FMT_16 && PS_DITHER != 3 && (PS_BLEND_MIX == 0 || PS_DITHER > 0)
2023-02-25 03:42:13 +00:00
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8));
2022-10-09 05:51:41 +00:00
#elif PS_COLCLIP == 1 || PS_HDR == 1
2023-02-25 03:42:13 +00:00
C = vec3(ivec3(C) & ivec3(0xFF));
2021-10-21 08:45:27 +00:00
#endif
#endif
}
2023-02-21 16:37:33 +00:00
void ps_blend(inout vec4 Color, inout vec4 As_rgba)
2021-10-21 08:45:27 +00:00
{
2023-02-21 16:37:33 +00:00
float As = As_rgba.a;
2021-10-21 08:45:27 +00:00
#if SW_BLEND
2022-01-08 17:43:28 +00:00
// PABE
#if PS_PABE
2024-08-20 13:40:50 +00:00
// As_rgba needed for accumulation blend to manipulate Cd
2023-02-25 03:42:13 +00:00
// No blending so early exit
if (As < 1.0f)
2024-08-20 13:40:50 +00:00
{
As_rgba.rgb = vec3(0.0f);
2023-02-25 03:42:13 +00:00
return;
2024-08-20 13:40:50 +00:00
}
As_rgba.rgb = vec3(1.0f);
2022-01-08 17:43:28 +00:00
#endif
2021-10-21 08:45:27 +00:00
#if PS_FEEDBACK_LOOP_IS_NEEDED
2024-02-24 21:19:38 +00:00
vec4 RT = sample_from_rt();
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
// Not used, but we define it to make the selection below simpler.
vec4 RT = vec4(0.0f);
2021-10-21 08:45:27 +00:00
#endif
2024-02-24 21:19:38 +00:00
#if PS_RTA_CORRECTION
2024-03-03 12:43:05 +00:00
float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f;
2024-02-24 21:19:38 +00:00
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
2024-04-02 20:26:50 +00:00
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
2024-08-10 20:08:39 +00:00
RT.r = float((denorm_rt.b << 3) & 0xF8u);
RT.g = float(((denorm_rt.b >> 2) & 0x38u) | ((denorm_rt.a << 6) & 0xC0u));
RT.b = float((denorm_rt.a << 1) & 0xF8u);
RT.a = float(denorm_rt.a & 0x80u);
2024-04-02 20:26:50 +00:00
#else
2024-08-10 20:08:39 +00:00
RT.r = float((denorm_rt.r << 3) & 0xF8u);
RT.g = float(((denorm_rt.r >> 2) & 0x38u) | ((denorm_rt.g << 6) & 0xC0u));
RT.b = float((denorm_rt.g << 1) & 0xF8u);
RT.a = float(denorm_rt.g & 0x80u);
2024-04-02 20:26:50 +00:00
#endif
#endif
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// Let the compiler do its jobs !
2024-02-24 21:19:38 +00:00
vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
2023-02-25 03:42:13 +00:00
vec3 Cs = Color.rgb;
2021-10-21 08:45:27 +00:00
#if PS_BLEND_A == 0
2023-02-25 03:42:13 +00:00
vec3 A = Cs;
2021-10-21 08:45:27 +00:00
#elif PS_BLEND_A == 1
2023-02-25 03:42:13 +00:00
vec3 A = Cd;
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
vec3 A = vec3(0.0f);
2021-10-21 08:45:27 +00:00
#endif
#if PS_BLEND_B == 0
2023-02-25 03:42:13 +00:00
vec3 B = Cs;
2021-10-21 08:45:27 +00:00
#elif PS_BLEND_B == 1
2023-02-25 03:42:13 +00:00
vec3 B = Cd;
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
vec3 B = vec3(0.0f);
2021-10-21 08:45:27 +00:00
#endif
#if PS_BLEND_C == 0
2023-02-25 03:42:13 +00:00
float C = As;
2021-10-21 08:45:27 +00:00
#elif PS_BLEND_C == 1
2023-02-25 03:42:13 +00:00
float C = Ad;
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
float C = Af;
2021-10-21 08:45:27 +00:00
#endif
#if PS_BLEND_D == 0
2023-02-25 03:42:13 +00:00
vec3 D = Cs;
2021-10-21 08:45:27 +00:00
#elif PS_BLEND_D == 1
2023-02-25 03:42:13 +00:00
vec3 D = Cd;
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
vec3 D = vec3(0.0f);
2021-10-21 08:45:27 +00:00
#endif
// As/Af clamp alpha for Blend mix
2023-03-10 09:41:09 +00:00
// We shouldn't clamp blend mix with blend hw 1 as we want alpha higher
2022-08-09 07:56:00 +00:00
float C_clamped = C;
2024-03-09 19:51:52 +00:00
#if PS_BLEND_MIX > 0 && PS_BLEND_HW != 1 && PS_BLEND_HW != 2
2023-02-25 03:42:13 +00:00
C_clamped = min(C_clamped, 1.0f);
2021-10-21 08:45:27 +00:00
#endif
#if PS_BLEND_A == PS_BLEND_B
2023-02-25 03:42:13 +00:00
Color.rgb = D;
2022-08-16 09:11:17 +00:00
// In blend_mix, HW adds on some alpha factor * dst.
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
// Instead, apply an offset to convert HW's round to a floor.
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
#elif PS_BLEND_MIX == 2
2022-08-09 07:56:00 +00:00
Color.rgb = ((A - B) * C_clamped + D) + (124.0f/256.0f);
2022-08-16 09:11:17 +00:00
#elif PS_BLEND_MIX == 1
2022-08-09 07:56:00 +00:00
Color.rgb = ((A - B) * C_clamped + D) - (124.0f/256.0f);
2021-10-21 08:45:27 +00:00
#else
Color.rgb = trunc((A - B) * C + D);
#endif
2023-03-10 09:41:09 +00:00
#if PS_BLEND_HW == 1
2023-02-26 20:59:24 +00:00
// As or Af
As_rgba.rgb = vec3(C);
2023-02-25 03:42:13 +00:00
// Subtract 1 for alpha to compensate for the changed equation,
// if c.rgb > 255.0f then we further need to adjust alpha accordingly,
// we pick the lowest overflow from all colors because it's the safest,
// we divide by 255 the color because we don't know Cd value,
// changed alpha should only be done for hw blend.
vec3 alpha_compensate = max(vec3(1.0f), Color.rgb / vec3(255.0f));
As_rgba.rgb -= alpha_compensate;
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 2
2024-03-09 19:51:52 +00:00
// Since we can't do Cd*(Aalpha + 1) - Cs*Alpha in hw blend
// what we can do is adjust the Cs value that will be
// subtracted, this way we can get a better result in hw blend.
// Result is still wrong but less wrong than before.
float division_alpha = 1.0f + C;
Color.rgb /= vec3(division_alpha);
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 3
2023-02-26 20:59:24 +00:00
// As, Ad or Af clamped.
As_rgba.rgb = vec3(C_clamped);
// Cs*(Alpha + 1) might overflow, if it does then adjust alpha value
// that is sent on second output to compensate.
vec3 overflow_check = (Color.rgb - vec3(255.0f)) / 255.0f;
vec3 alpha_compensate = max(vec3(0.0f), overflow_check);
As_rgba.rgb -= alpha_compensate;
2022-08-05 18:54:25 +00:00
#endif
2022-01-23 11:39:01 +00:00
#else
2024-08-25 05:36:35 +00:00
#if PS_BLEND_C == 2
vec3 Alpha = vec3(Af);
#else
vec3 Alpha = vec3(As);
#endif
2023-03-10 12:02:18 +00:00
#if PS_BLEND_HW == 1
2022-01-26 02:05:06 +00:00
// Needed for Cd * (As/Ad/F + 1) blending modes
2022-01-24 10:11:38 +00:00
Color.rgb = vec3(255.0f);
2023-03-10 12:02:18 +00:00
#elif PS_BLEND_HW == 2
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
// Cd*As,Cd*Ad or Cd*F
2022-01-23 11:39:01 +00:00
Color.rgb = max(vec3(0.0f), (Alpha - vec3(1.0f)));
Color.rgb *= vec3(255.0f);
2024-02-24 21:26:55 +00:00
#elif PS_BLEND_HW == 3 && PS_RTA_CORRECTION == 0
2022-01-26 02:05:06 +00:00
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
2023-03-06 01:40:34 +00:00
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value when rgb are below 128.
// When any color channel is higher than 128 then adjust the compensation automatically
// to give us more accurate colors, otherwise they will be wrong.
// The higher the value (>128) the lower the compensation will be.
float max_color = max(max(Color.r, Color.g), Color.b);
float color_compensate = 255.0f / max(128.0f, max_color);
Color.rgb *= vec3(color_compensate);
2024-04-18 22:47:45 +00:00
#elif PS_BLEND_HW == 4
2024-08-25 05:36:35 +00:00
// Needed for Cd * (1 - Ad) and Cd*(1 + Alpha).
2024-06-02 10:25:00 +00:00
2024-08-25 05:36:35 +00:00
As_rgba.rgb = Alpha * vec3(128.0f / 255.0f);
2024-06-02 10:25:00 +00:00
Color.rgb = vec3(127.5f);
2024-08-25 05:36:35 +00:00
#elif PS_BLEND_HW == 5
// Needed for Cs*Alpha + Cd*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = (Alpha - vec3(0.5f));
Color.rgb = (Color.rgb * Alpha);
#elif PS_BLEND_HW == 6
// Needed for Cd*Alpha + Cs*(1 - Alpha).
Alpha *= vec3(128.0f / 255.0f);
As_rgba.rgb = Alpha;
Color.rgb *= (Alpha - vec3(0.5f));
2022-01-23 11:39:01 +00:00
#endif
2021-10-21 08:45:27 +00:00
#endif
}
void main()
{
#if PS_SCANMSK & 2
// fail depth test on prohibited lines
2023-02-25 03:42:13 +00:00
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
2021-10-21 08:45:27 +00:00
discard;
#endif
2022-09-01 03:56:34 +00:00
#if PS_DATE >= 5
2021-10-21 08:45:27 +00:00
#if PS_WRITE_RG == 1
2023-02-25 03:42:13 +00:00
// Pseudo 16 bits access.
float rt_a = sample_from_rt().g;
2021-10-21 08:45:27 +00:00
#else
2023-02-25 03:42:13 +00:00
float rt_a = sample_from_rt().a;
2021-10-21 08:45:27 +00:00
#endif
#if (PS_DATE & 3) == 1
2023-02-25 03:42:13 +00:00
// DATM == 0: Pixel with alpha equal to 1 will failed
2024-02-23 20:00:40 +00:00
#if PS_RTA_CORRECTION
bool bad = (254.5f / 255.0f) < rt_a;
#else
bool bad = (127.5f / 255.0f) < rt_a;
#endif
2021-10-21 08:45:27 +00:00
#elif (PS_DATE & 3) == 2
2023-02-25 03:42:13 +00:00
// DATM == 1: Pixel with alpha equal to 0 will failed
2024-02-23 20:00:40 +00:00
#if PS_RTA_CORRECTION
bool bad = rt_a < (254.5f / 255.0f);
#else
bool bad = rt_a < (127.5f / 255.0f);
#endif
2021-10-21 08:45:27 +00:00
#endif
2023-02-25 03:42:13 +00:00
if (bad) {
discard;
}
2021-10-21 08:45:27 +00:00
2022-09-01 03:56:34 +00:00
#endif // PS_DATE >= 5
2021-10-21 08:45:27 +00:00
#if PS_DATE == 3
2023-02-25 03:42:13 +00:00
int stencil_ceil = int(texelFetch(PrimMinTexture, ivec2(gl_FragCoord.xy), 0).r);
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
if (gl_PrimitiveID > stencil_ceil) {
discard;
}
2021-10-21 08:45:27 +00:00
#endif
vec4 C = ps_color();
2024-03-27 09:33:55 +00:00
bool atst_pass = atst(C);
#if PS_AFAIL == 0 // KEEP or ATST off
if (!atst_pass)
discard;
#endif
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// Must be done before alpha correction
2022-07-16 17:26:29 +00:00
2023-02-25 03:42:13 +00:00
// AA (Fixed one) will output a coverage of 1.0 as alpha
2022-07-16 17:26:29 +00:00
#if PS_FIXED_ONE_A
2023-02-25 03:42:13 +00:00
C.a = 128.0f;
2022-07-16 17:26:29 +00:00
#endif
2024-02-24 21:19:38 +00:00
#if SW_AD_TO_HW
#if PS_RTA_CORRECTION
2024-03-03 12:43:05 +00:00
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
2024-02-24 21:19:38 +00:00
#else
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
#endif
2023-02-25 03:42:13 +00:00
vec4 alpha_blend = vec4(RT.a / 128.0f);
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
#else
2023-02-25 03:42:13 +00:00
vec4 alpha_blend = vec4(C.a / 128.0f);
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
#endif
2021-10-21 08:45:27 +00:00
2024-03-27 09:33:55 +00:00
// Correct the ALPHA value based on the output format
2023-10-19 09:23:59 +00:00
#if (PS_DST_FMT == FMT_16)
2023-02-25 03:42:13 +00:00
float A_one = 128.0f; // alpha output will be 0x80
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
2023-10-19 09:23:59 +00:00
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
2023-02-25 03:42:13 +00:00
if(C.a < 128.0f) C.a += 128.0f;
2021-10-21 08:45:27 +00:00
#endif
2023-02-25 03:42:13 +00:00
// Get first primitive that will write a failling alpha value
2022-09-01 03:56:34 +00:00
#if PS_DATE == 1
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
2021-10-21 08:45:27 +00:00
o_col0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
2022-09-01 03:56:34 +00:00
#elif PS_DATE == 2
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
o_col0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
2021-10-21 08:45:27 +00:00
#else
ps_blend(C, alpha_blend);
2024-01-07 10:39:13 +00:00
#if PS_SHUFFLE
2024-06-04 12:59:36 +00:00
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME
2024-01-07 10:39:13 +00:00
uvec4 denorm_c_after = uvec4(C);
2024-03-03 18:02:03 +00:00
#if (PS_PROCESS_BA & SHUFFLE_READ)
2024-08-10 20:08:39 +00:00
C.b = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u));
C.a = float(((denorm_c_after.g >> 6) & 0x3u) | ((denorm_c_after.b >> 1) & 0x7Cu) | (denorm_c_after.a & 0x80u));
2024-01-07 10:39:13 +00:00
#else
2024-08-10 20:08:39 +00:00
C.r = float(((denorm_c_after.r >> 3) & 0x1Fu) | ((denorm_c_after.g << 2) & 0xE0u));
C.g = float(((denorm_c_after.g >> 6) & 0x3u) | ((denorm_c_after.b >> 1) & 0x7Cu) | (denorm_c_after.a & 0x80u));
2024-01-07 10:39:13 +00:00
#endif
#endif
2024-06-04 12:59:36 +00:00
2024-01-07 10:39:13 +00:00
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
2024-03-03 18:02:03 +00:00
#if (PS_PROCESS_BA & SHUFFLE_READ)
2024-06-04 12:59:36 +00:00
uvec4 denorm_c = uvec4(C);
2024-01-07 10:39:13 +00:00
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
2024-06-04 12:59:36 +00:00
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
2024-01-07 10:39:13 +00:00
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
2024-06-04 12:59:36 +00:00
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
2024-01-07 10:39:13 +00:00
// Write RB part. Mask will take care of the correct destination
2024-03-03 18:02:03 +00:00
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
2024-06-04 12:59:36 +00:00
float g_temp = C.g;
2024-03-03 18:02:03 +00:00
2024-06-04 12:59:36 +00:00
C.g = C.a;
C.a = g_temp;
2024-03-03 18:02:03 +00:00
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
2024-06-04 12:59:36 +00:00
C.ga = C.aa;
2024-03-03 18:02:03 +00:00
#else
C.rb = C.rr;
2024-06-04 12:59:36 +00:00
C.ga = C.gg;
2024-03-03 18:02:03 +00:00
#endif // PS_PROCESS_BA
#endif // PS_SHUFFLE_ACROSS
2024-01-07 10:39:13 +00:00
#endif // PS_SHUFFLE
2024-03-05 19:53:23 +00:00
ps_dither(C.rgb, alpha_blend.a);
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
// Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C.rgb);
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
ps_fbmask(C);
2021-10-21 08:45:27 +00:00
2024-03-27 09:33:55 +00:00
#if PS_AFAIL == 3 // RGB_ONLY
// Use alpha blend factor to determine whether to update A.
alpha_blend.a = float(atst_pass);
#endif
2023-02-25 03:42:13 +00:00
#if !PS_NO_COLOR
2024-02-09 05:49:03 +00:00
#if PS_RTA_CORRECTION
o_col0.a = C.a / 128.0f;
#else
o_col0.a = C.a / 255.0f;
#endif
2023-02-25 03:42:13 +00:00
#if PS_HDR == 1
2024-02-09 05:49:03 +00:00
o_col0.rgb = vec3(C.rgb / 65535.0f);
2023-02-25 03:42:13 +00:00
#else
2024-02-09 05:49:03 +00:00
o_col0.rgb = C.rgb / 255.0f;
2023-02-25 03:42:13 +00:00
#endif
2024-03-27 05:29:56 +00:00
#if !PS_NO_COLOR1
2023-02-25 03:42:13 +00:00
o_col1 = alpha_blend;
#endif
#endif
2021-12-31 07:29:26 +00:00
2023-02-25 03:42:13 +00:00
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
#endif
2021-10-21 08:45:27 +00:00
2023-02-25 03:42:13 +00:00
#endif // PS_DATE
2021-10-21 08:45:27 +00:00
}
#endif