2011-02-19 10:57:28 +00:00
|
|
|
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
|
2018-12-20 17:41:30 +00:00
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
#define FMT_32 0
|
|
|
|
#define FMT_24 1
|
|
|
|
#define FMT_16 2
|
|
|
|
|
2020-05-22 22:58:53 +00:00
|
|
|
#ifndef VS_TME
|
2021-12-23 11:35:05 +00:00
|
|
|
#define VS_IIP 0
|
2011-02-19 10:57:28 +00:00
|
|
|
#define VS_TME 1
|
|
|
|
#define VS_FST 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef GS_IIP
|
|
|
|
#define GS_IIP 0
|
|
|
|
#define GS_PRIM 3
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PS_FST
|
2021-12-23 11:35:05 +00:00
|
|
|
#define PS_IIP 0
|
2011-02-19 10:57:28 +00:00
|
|
|
#define PS_FST 0
|
|
|
|
#define PS_WMS 0
|
|
|
|
#define PS_WMT 0
|
2021-12-13 05:33:09 +00:00
|
|
|
#define PS_AEM_FMT FMT_32
|
2011-02-19 10:57:28 +00:00
|
|
|
#define PS_AEM 0
|
|
|
|
#define PS_TFX 0
|
|
|
|
#define PS_TCC 1
|
|
|
|
#define PS_ATST 1
|
|
|
|
#define PS_FOG 0
|
2021-12-23 11:35:05 +00:00
|
|
|
#define PS_IIP 0
|
2022-01-26 02:05:06 +00:00
|
|
|
#define PS_CLR_HW 0
|
2011-02-19 10:57:28 +00:00
|
|
|
#define PS_FBA 0
|
2019-04-17 20:14:17 +00:00
|
|
|
#define PS_FBMASK 0
|
2011-02-19 10:57:28 +00:00
|
|
|
#define PS_LTF 1
|
2013-02-12 10:57:48 +00:00
|
|
|
#define PS_TCOFFSETHACK 0
|
2012-07-19 20:40:42 +00:00
|
|
|
#define PS_POINT_SAMPLER 0
|
2015-06-09 23:17:26 +00:00
|
|
|
#define PS_SHUFFLE 0
|
|
|
|
#define PS_READ_BA 0
|
2018-12-18 07:31:20 +00:00
|
|
|
#define PS_DFMT 0
|
2018-12-12 17:52:57 +00:00
|
|
|
#define PS_DEPTH_FMT 0
|
2017-02-17 09:59:21 +00:00
|
|
|
#define PS_PAL_FMT 0
|
2018-12-11 00:04:31 +00:00
|
|
|
#define PS_CHANNEL_FETCH 0
|
2018-12-13 01:52:06 +00:00
|
|
|
#define PS_TALES_OF_ABYSS_HLE 0
|
|
|
|
#define PS_URBAN_CHAOS_HLE 0
|
2019-02-20 11:11:23 +00:00
|
|
|
#define PS_INVALID_TEX0 0
|
2018-12-16 07:45:49 +00:00
|
|
|
#define PS_SCALE_FACTOR 1
|
2019-06-06 15:56:22 +00:00
|
|
|
#define PS_HDR 0
|
2019-06-26 14:07:03 +00:00
|
|
|
#define PS_COLCLIP 0
|
2019-06-06 15:56:22 +00:00
|
|
|
#define PS_BLEND_A 0
|
|
|
|
#define PS_BLEND_B 0
|
|
|
|
#define PS_BLEND_C 0
|
|
|
|
#define PS_BLEND_D 0
|
2022-02-18 11:24:32 +00:00
|
|
|
#define PS_BLEND_MIX 0
|
2021-02-04 01:39:20 +00:00
|
|
|
#define PS_PABE 0
|
2019-09-15 18:49:34 +00:00
|
|
|
#define PS_DITHER 0
|
2020-06-06 15:21:03 +00:00
|
|
|
#define PS_ZCLAMP 0
|
2021-12-22 10:56:38 +00:00
|
|
|
#define PS_SCANMSK 0
|
2022-01-09 06:46:40 +00:00
|
|
|
#define PS_AUTOMATIC_LOD 0
|
|
|
|
#define PS_MANUAL_LOD 0
|
2022-01-30 07:10:10 +00:00
|
|
|
#define PS_TEX_IS_FB 0
|
2011-02-19 10:57:28 +00:00
|
|
|
#endif
|
|
|
|
|
2019-06-06 15:56:22 +00:00
|
|
|
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
|
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
struct VS_INPUT
|
|
|
|
{
|
2012-01-19 04:53:36 +00:00
|
|
|
float2 st : TEXCOORD0;
|
2019-08-25 18:14:50 +00:00
|
|
|
uint4 c : COLOR0;
|
2012-01-19 04:53:36 +00:00
|
|
|
float q : TEXCOORD1;
|
2011-02-19 10:57:28 +00:00
|
|
|
uint2 p : POSITION0;
|
|
|
|
uint z : POSITION1;
|
2012-01-19 04:53:36 +00:00
|
|
|
uint2 uv : TEXCOORD2;
|
2011-02-19 10:57:28 +00:00
|
|
|
float4 f : COLOR1;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct VS_OUTPUT
|
|
|
|
{
|
|
|
|
float4 p : SV_Position;
|
|
|
|
float4 t : TEXCOORD0;
|
2018-12-18 16:58:35 +00:00
|
|
|
float4 ti : TEXCOORD2;
|
2021-12-23 11:35:05 +00:00
|
|
|
|
|
|
|
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
|
2011-02-19 10:57:28 +00:00
|
|
|
float4 c : COLOR0;
|
2021-12-23 11:35:05 +00:00
|
|
|
#else
|
|
|
|
nointerpolation float4 c : COLOR0;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct PS_INPUT
|
|
|
|
{
|
|
|
|
float4 p : SV_Position;
|
|
|
|
float4 t : TEXCOORD0;
|
2018-12-18 16:58:35 +00:00
|
|
|
float4 ti : TEXCOORD2;
|
2021-12-23 11:35:05 +00:00
|
|
|
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
|
2011-02-19 10:57:28 +00:00
|
|
|
float4 c : COLOR0;
|
2021-12-23 11:35:05 +00:00
|
|
|
#else
|
|
|
|
nointerpolation float4 c : COLOR0;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct PS_OUTPUT
|
|
|
|
{
|
|
|
|
float4 c0 : SV_Target0;
|
|
|
|
float4 c1 : SV_Target1;
|
2020-06-06 15:21:03 +00:00
|
|
|
#if PS_ZCLAMP
|
|
|
|
float depth : SV_Depth;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
Texture2D<float4> Texture : register(t0);
|
|
|
|
Texture2D<float4> Palette : register(t1);
|
2022-01-30 07:10:10 +00:00
|
|
|
Texture2D<float4> RtTexture : register(t2);
|
2011-02-19 10:57:28 +00:00
|
|
|
SamplerState TextureSampler : register(s0);
|
|
|
|
SamplerState PaletteSampler : register(s1);
|
|
|
|
|
|
|
|
cbuffer cb0
|
|
|
|
{
|
2021-12-21 07:41:45 +00:00
|
|
|
float2 VertexScale;
|
|
|
|
float2 VertexOffset;
|
|
|
|
float2 TextureScale;
|
|
|
|
float2 TextureOffset;
|
|
|
|
float2 PointSize;
|
2020-05-23 18:37:51 +00:00
|
|
|
uint MaxDepth;
|
2021-12-21 07:41:45 +00:00
|
|
|
uint pad_cb0;
|
2011-02-19 10:57:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
cbuffer cb1
|
|
|
|
{
|
|
|
|
float3 FogColor;
|
|
|
|
float AREF;
|
|
|
|
float4 WH;
|
|
|
|
float2 TA;
|
2021-12-21 07:41:45 +00:00
|
|
|
float MaxDepthPS;
|
|
|
|
float Af;
|
2011-02-19 10:57:28 +00:00
|
|
|
uint4 MskFix;
|
2019-04-17 20:14:17 +00:00
|
|
|
uint4 FbMask;
|
2021-12-21 07:41:45 +00:00
|
|
|
float4 HalfTexel;
|
|
|
|
float4 MinMax;
|
|
|
|
int4 ChannelShuffle;
|
|
|
|
float2 TC_OffsetHack;
|
2022-01-16 07:05:05 +00:00
|
|
|
float2 STScale;
|
2019-09-15 18:49:34 +00:00
|
|
|
float4x4 DitherMatrix;
|
2011-02-19 10:57:28 +00:00
|
|
|
};
|
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
float4 sample_c(float2 uv, float uv_w)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
return RtTexture.Load(int3(int2(uv * WH.zw), 0));
|
|
|
|
#else
|
2018-08-21 22:51:19 +00:00
|
|
|
if (PS_POINT_SAMPLER)
|
2012-07-19 20:40:42 +00:00
|
|
|
{
|
2018-10-09 07:07:45 +00:00
|
|
|
// Weird issue with ATI/AMD cards,
|
2012-07-19 20:40:42 +00:00
|
|
|
// it looks like they add 127/128 of a texel to sampling coordinates
|
|
|
|
// occasionally causing point sampling to erroneously round up.
|
|
|
|
// I'm manually adjusting coordinates to the centre of texels here,
|
|
|
|
// though the centre is just paranoia, the top left corner works fine.
|
2018-10-09 07:07:45 +00:00
|
|
|
// As of 2018 this issue is still present.
|
2012-07-19 20:40:42 +00:00
|
|
|
uv = (trunc(uv * WH.zw) + float2(0.5, 0.5)) / WH.zw;
|
|
|
|
}
|
2022-01-16 07:05:05 +00:00
|
|
|
uv *= STScale;
|
2022-01-09 06:46:40 +00:00
|
|
|
|
|
|
|
#if PS_AUTOMATIC_LOD == 1
|
2011-02-19 10:57:28 +00:00
|
|
|
return Texture.Sample(TextureSampler, uv);
|
2022-01-09 06:46:40 +00:00
|
|
|
#elif PS_MANUAL_LOD == 1
|
|
|
|
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
|
|
|
|
float K = MinMax.x;
|
|
|
|
float L = MinMax.y;
|
|
|
|
float bias = MinMax.z;
|
|
|
|
float max_lod = MinMax.w;
|
|
|
|
|
|
|
|
float gs_lod = K - log2(abs(uv_w)) * L;
|
|
|
|
// FIXME max useful ?
|
|
|
|
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
|
|
|
|
float lod = min(gs_lod, max_lod) - bias;
|
|
|
|
|
|
|
|
return Texture.SampleLevel(TextureSampler, uv, lod);
|
|
|
|
#else
|
|
|
|
return Texture.SampleLevel(TextureSampler, uv, 0); // No lod
|
|
|
|
#endif
|
2022-01-30 07:10:10 +00:00
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
float4 sample_p(float u)
|
|
|
|
{
|
2012-06-10 16:04:47 +00:00
|
|
|
return Palette.Sample(PaletteSampler, u);
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 clamp_wrap_uv(float4 uv)
|
2018-12-16 07:45:49 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 tex_size;
|
2018-12-16 07:45:49 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
if (PS_INVALID_TEX0 == 1)
|
|
|
|
tex_size = WH.zwzw;
|
|
|
|
else
|
|
|
|
tex_size = WH.xyxy;
|
2018-12-12 17:52:57 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
if(PS_WMS == PS_WMT)
|
2018-12-12 17:52:57 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
if(PS_WMS == 2)
|
|
|
|
{
|
|
|
|
uv = clamp(uv, MinMax.xyxy, MinMax.zwzw);
|
|
|
|
}
|
|
|
|
else if(PS_WMS == 3)
|
|
|
|
{
|
|
|
|
#if PS_FST == 0
|
|
|
|
// wrap negative uv coords to avoid an off by one error that shifted
|
|
|
|
// textures. Fixes Xenosaga's hair issue.
|
|
|
|
uv = frac(uv);
|
|
|
|
#endif
|
|
|
|
uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size;
|
|
|
|
}
|
2018-12-12 17:52:57 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
if(PS_WMS == 2)
|
|
|
|
{
|
|
|
|
uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
|
|
|
|
}
|
|
|
|
else if(PS_WMS == 3)
|
|
|
|
{
|
|
|
|
#if PS_FST == 0
|
|
|
|
uv.xz = frac(uv.xz);
|
|
|
|
#endif
|
|
|
|
uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx;
|
|
|
|
}
|
|
|
|
if(PS_WMT == 2)
|
|
|
|
{
|
|
|
|
uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
|
|
|
|
}
|
|
|
|
else if(PS_WMT == 3)
|
|
|
|
{
|
|
|
|
#if PS_FST == 0
|
|
|
|
uv.yw = frac(uv.yw);
|
|
|
|
#endif
|
|
|
|
uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy;
|
|
|
|
}
|
2018-12-12 17:52:57 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
return uv;
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
float4x4 sample_4c(float4 uv, float uv_w)
|
2018-12-11 00:04:31 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
float4x4 c;
|
2018-12-12 17:52:57 +00:00
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
c[0] = sample_c(uv.xy, uv_w);
|
|
|
|
c[1] = sample_c(uv.zy, uv_w);
|
|
|
|
c[2] = sample_c(uv.xw, uv_w);
|
|
|
|
c[3] = sample_c(uv.zw, uv_w);
|
2019-08-25 18:01:29 +00:00
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
float4 sample_4_index(float4 uv, float uv_w)
|
2019-08-25 18:01:29 +00:00
|
|
|
{
|
|
|
|
float4 c;
|
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
c.x = sample_c(uv.xy, uv_w).a;
|
|
|
|
c.y = sample_c(uv.zy, uv_w).a;
|
|
|
|
c.z = sample_c(uv.xw, uv_w).a;
|
|
|
|
c.w = sample_c(uv.zw, uv_w).a;
|
2019-08-25 18:01:29 +00:00
|
|
|
|
|
|
|
// Denormalize value
|
|
|
|
uint4 i = uint4(c * 255.0f + 0.5f);
|
|
|
|
|
|
|
|
if (PS_PAL_FMT == 1)
|
2018-12-12 17:52:57 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
// 4HL
|
|
|
|
c = float4(i & 0xFu) / 255.0f;
|
2018-12-12 17:52:57 +00:00
|
|
|
}
|
2019-08-25 18:01:29 +00:00
|
|
|
else if (PS_PAL_FMT == 2)
|
2018-12-12 17:52:57 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
// 4HH
|
|
|
|
c = float4(i >> 4u) / 255.0f;
|
2018-12-12 17:52:57 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
// Most of texture will hit this code so keep normalized float value
|
|
|
|
// 8 bits
|
|
|
|
return c * 255./256 + 0.5/256;
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4x4 sample_4p(float4 u)
|
2018-12-11 00:04:31 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
float4x4 c;
|
|
|
|
|
|
|
|
c[0] = sample_p(u.x);
|
|
|
|
c[1] = sample_p(u.y);
|
|
|
|
c[2] = sample_p(u.z);
|
|
|
|
c[3] = sample_p(u.w);
|
|
|
|
|
|
|
|
return c;
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
int fetch_raw_depth(int2 xy)
|
2018-12-11 00:04:31 +00:00
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
float4 col = RtTexture.Load(int3(xy, 0));
|
|
|
|
#else
|
|
|
|
float4 col = Texture.Load(int3(xy, 0));
|
|
|
|
#endif
|
2019-08-25 18:01:29 +00:00
|
|
|
return (int)(col.r * exp2(32.0f));
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_raw_color(int2 xy)
|
2018-12-11 00:04:31 +00:00
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
#if PS_TEX_IS_FB == 1
|
|
|
|
return RtTexture.Load(int3(xy, 0));
|
|
|
|
#else
|
|
|
|
return Texture.Load(int3(xy, 0));
|
|
|
|
#endif
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_c(int2 uv)
|
2018-12-11 00:04:31 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
return Texture.Load(int3(uv, 0));
|
2018-12-11 00:04:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Depth sampling
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
2017-02-17 09:59:21 +00:00
|
|
|
|
2018-12-16 07:45:49 +00:00
|
|
|
int2 clamp_wrap_uv_depth(int2 uv)
|
|
|
|
{
|
|
|
|
int4 mask = (int4)MskFix << 4;
|
|
|
|
if (PS_WMS == PS_WMT)
|
|
|
|
{
|
|
|
|
if (PS_WMS == 2)
|
|
|
|
{
|
|
|
|
uv = clamp(uv, mask.xy, mask.zw);
|
|
|
|
}
|
|
|
|
else if (PS_WMS == 3)
|
|
|
|
{
|
|
|
|
uv = (uv & mask.xy) | mask.zw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (PS_WMS == 2)
|
|
|
|
{
|
|
|
|
uv.x = clamp(uv.x, mask.x, mask.z);
|
|
|
|
}
|
|
|
|
else if (PS_WMS == 3)
|
|
|
|
{
|
|
|
|
uv.x = (uv.x & mask.x) | mask.z;
|
|
|
|
}
|
|
|
|
if (PS_WMT == 2)
|
|
|
|
{
|
|
|
|
uv.y = clamp(uv.y, mask.y, mask.w);
|
|
|
|
}
|
|
|
|
else if (PS_WMT == 3)
|
|
|
|
{
|
|
|
|
uv.y = (uv.y & mask.y) | mask.w;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return uv;
|
|
|
|
}
|
|
|
|
|
2018-12-18 16:58:35 +00:00
|
|
|
float4 sample_depth(float2 st, float2 pos)
|
2018-12-13 01:52:06 +00:00
|
|
|
{
|
2018-12-16 07:45:49 +00:00
|
|
|
float2 uv_f = (float2)clamp_wrap_uv_depth(int2(st)) * (float2)PS_SCALE_FACTOR * (float2)(1.0f / 16.0f);
|
|
|
|
int2 uv = (int2)uv_f;
|
|
|
|
|
2018-12-13 01:52:06 +00:00
|
|
|
float4 t = (float4)(0.0f);
|
|
|
|
|
|
|
|
if (PS_TALES_OF_ABYSS_HLE == 1)
|
|
|
|
{
|
|
|
|
// Warning: UV can't be used in channel effect
|
|
|
|
int depth = fetch_raw_depth(pos);
|
|
|
|
|
|
|
|
// Convert msb based on the palette
|
2019-08-25 18:14:50 +00:00
|
|
|
t = Palette.Load(int3((depth >> 8) & 0xFF, 0, 0)) * 255.0f;
|
2018-12-13 01:52:06 +00:00
|
|
|
}
|
|
|
|
else if (PS_URBAN_CHAOS_HLE == 1)
|
|
|
|
{
|
|
|
|
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
|
|
|
|
// So it will do a first channel trick to extract lsb, value is right-shifted.
|
|
|
|
// Then a new channel trick to extract msb which will shifted to the left.
|
|
|
|
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
|
|
|
|
// To be faster both steps (msb&lsb) are done in a single pass.
|
|
|
|
|
|
|
|
// Warning: UV can't be used in channel effect
|
|
|
|
int depth = fetch_raw_depth(pos);
|
2019-08-25 18:01:29 +00:00
|
|
|
|
2018-12-13 01:52:06 +00:00
|
|
|
// Convert lsb based on the palette
|
2019-08-25 18:14:50 +00:00
|
|
|
t = Palette.Load(int3(depth & 0xFF, 0, 0)) * 255.0f;
|
2018-12-13 01:52:06 +00:00
|
|
|
|
|
|
|
// Msb is easier
|
|
|
|
float green = (float)((depth >> 8) & 0xFF) * 36.0f;
|
|
|
|
green = min(green, 255.0f);
|
2019-08-25 18:14:50 +00:00
|
|
|
t.g += green;
|
2018-12-13 01:52:06 +00:00
|
|
|
}
|
2018-12-16 07:45:49 +00:00
|
|
|
else if (PS_DEPTH_FMT == 1)
|
|
|
|
{
|
|
|
|
// Based on ps_main11 of convert
|
|
|
|
|
|
|
|
// Convert a FLOAT32 depth texture into a RGBA color texture
|
2022-02-12 04:50:35 +00:00
|
|
|
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
|
|
|
t = float4(uint4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
|
2018-12-16 07:45:49 +00:00
|
|
|
}
|
|
|
|
else if (PS_DEPTH_FMT == 2)
|
|
|
|
{
|
|
|
|
// Based on ps_main12 of convert
|
|
|
|
|
|
|
|
// Convert a FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
|
2022-02-12 04:50:35 +00:00
|
|
|
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
|
|
|
t = float4(uint4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u));
|
2018-12-16 07:45:49 +00:00
|
|
|
}
|
|
|
|
else if (PS_DEPTH_FMT == 3)
|
|
|
|
{
|
|
|
|
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
|
2019-08-25 18:14:50 +00:00
|
|
|
t = fetch_c(uv) * 255.0f;
|
2018-12-16 07:45:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (PS_AEM_FMT == FMT_24)
|
|
|
|
{
|
|
|
|
t.a = ((PS_AEM == 0) || any(bool3(t.rgb))) ? 255.0f * TA.x : 0.0f;
|
|
|
|
}
|
|
|
|
else if (PS_AEM_FMT == FMT_16)
|
|
|
|
{
|
|
|
|
t.a = t.a >= 128.0f ? 255.0f * TA.y : ((PS_AEM == 0) || any(bool3(t.rgb))) ? 255.0f * TA.x : 0.0f;
|
|
|
|
}
|
2018-12-13 01:52:06 +00:00
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Fetch a Single Channel
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
2019-02-20 11:11:23 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_red(int2 xy)
|
|
|
|
{
|
|
|
|
float4 rt;
|
2019-02-20 11:11:23 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
if ((PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2))
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
int depth = (fetch_raw_depth(xy)) & 0xFF;
|
|
|
|
rt = (float4)(depth) / 255.0f;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
rt = fetch_raw_color(xy);
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
return sample_p(rt.r) * 255.0f;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2021-12-05 04:55:57 +00:00
|
|
|
float4 fetch_green(int2 xy)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 rt;
|
2018-05-27 09:39:37 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
if ((PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2))
|
2018-05-27 09:39:37 +00:00
|
|
|
{
|
2021-12-05 04:55:57 +00:00
|
|
|
int depth = (fetch_raw_depth(xy) >> 8) & 0xFF;
|
2019-08-25 18:01:29 +00:00
|
|
|
rt = (float4)(depth) / 255.0f;
|
2018-05-27 09:39:37 +00:00
|
|
|
}
|
2019-08-25 18:01:29 +00:00
|
|
|
else
|
2018-05-27 09:39:37 +00:00
|
|
|
{
|
2019-08-25 18:01:29 +00:00
|
|
|
rt = fetch_raw_color(xy);
|
2018-05-27 09:39:37 +00:00
|
|
|
}
|
|
|
|
|
2021-12-05 04:55:57 +00:00
|
|
|
return sample_p(rt.g) * 255.0f;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2021-12-05 04:55:57 +00:00
|
|
|
float4 fetch_blue(int2 xy)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2021-12-05 04:55:57 +00:00
|
|
|
float4 rt;
|
|
|
|
|
|
|
|
if ((PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2))
|
|
|
|
{
|
|
|
|
int depth = (fetch_raw_depth(xy) >> 16) & 0xFF;
|
|
|
|
rt = (float4)(depth) / 255.0f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rt = fetch_raw_color(xy);
|
|
|
|
}
|
|
|
|
|
|
|
|
return sample_p(rt.b) * 255.0f;
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_alpha(int2 xy)
|
|
|
|
{
|
|
|
|
float4 rt = fetch_raw_color(xy);
|
2019-08-25 18:14:50 +00:00
|
|
|
return sample_p(rt.a) * 255.0f;
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_rgb(int2 xy)
|
|
|
|
{
|
|
|
|
float4 rt = fetch_raw_color(xy);
|
|
|
|
float4 c = float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0);
|
2019-08-25 18:14:50 +00:00
|
|
|
return c * 255.0f;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
float4 fetch_gXbY(int2 xy)
|
|
|
|
{
|
|
|
|
if ((PS_DEPTH_FMT == 1) || (PS_DEPTH_FMT == 2))
|
|
|
|
{
|
|
|
|
int depth = fetch_raw_depth(xy);
|
|
|
|
int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF;
|
|
|
|
return (float4)(bg);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int4 rt = (int4)(fetch_raw_color(xy) * 255.0);
|
|
|
|
int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z;
|
|
|
|
int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x;
|
2019-08-25 18:14:50 +00:00
|
|
|
return (float4)(green | blue);
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-09 06:46:40 +00:00
|
|
|
float4 sample_color(float2 st, float uv_w)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2013-02-12 10:57:48 +00:00
|
|
|
#if PS_TCOFFSETHACK
|
|
|
|
st += TC_OffsetHack.xy;
|
2018-10-02 19:43:05 +00:00
|
|
|
#endif
|
2012-06-13 15:36:10 +00:00
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
float4 t;
|
2012-06-13 15:36:10 +00:00
|
|
|
float4x4 c;
|
|
|
|
float2 dd;
|
|
|
|
|
2017-02-17 09:59:21 +00:00
|
|
|
if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2022-01-09 06:46:40 +00:00
|
|
|
c[0] = sample_c(st, uv_w);
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
float4 uv;
|
2012-06-13 15:36:10 +00:00
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
if(PS_LTF)
|
|
|
|
{
|
|
|
|
uv = st.xyxy + HalfTexel;
|
2012-07-19 20:40:42 +00:00
|
|
|
dd = frac(uv.xy * WH.zw);
|
2019-08-25 18:14:50 +00:00
|
|
|
|
2018-09-26 22:59:31 +00:00
|
|
|
if(PS_FST == 0)
|
|
|
|
{
|
2019-08-25 18:14:50 +00:00
|
|
|
dd = clamp(dd, (float2)0.0f, (float2)0.9999999f);
|
2018-09-26 22:59:31 +00:00
|
|
|
}
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
uv = st.xyxy;
|
|
|
|
}
|
|
|
|
|
2017-02-17 09:59:21 +00:00
|
|
|
uv = clamp_wrap_uv(uv);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2017-02-17 09:59:21 +00:00
|
|
|
#if PS_PAL_FMT != 0
|
2022-01-09 06:46:40 +00:00
|
|
|
c = sample_4p(sample_4_index(uv, uv_w));
|
2017-02-17 09:59:21 +00:00
|
|
|
#else
|
2022-01-09 06:46:40 +00:00
|
|
|
c = sample_4c(uv, uv_w);
|
2017-02-17 09:59:21 +00:00
|
|
|
#endif
|
2012-06-13 15:36:10 +00:00
|
|
|
}
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2012-06-13 15:36:10 +00:00
|
|
|
[unroll]
|
|
|
|
for (uint i = 0; i < 4; i++)
|
|
|
|
{
|
2018-12-19 23:54:51 +00:00
|
|
|
if(PS_AEM_FMT == FMT_24)
|
2012-06-13 15:36:10 +00:00
|
|
|
{
|
|
|
|
c[i].a = !PS_AEM || any(c[i].rgb) ? TA.x : 0;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
2017-02-17 09:59:21 +00:00
|
|
|
else if(PS_AEM_FMT == FMT_16)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2018-10-02 19:43:05 +00:00
|
|
|
c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(c[i].rgb) ? TA.x : 0;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
}
|
2012-06-11 03:27:16 +00:00
|
|
|
|
2012-06-13 15:36:10 +00:00
|
|
|
if(PS_LTF)
|
2018-10-02 19:43:05 +00:00
|
|
|
{
|
2012-06-13 15:36:10 +00:00
|
|
|
t = lerp(lerp(c[0], c[1], dd.x), lerp(c[2], c[3], dd.x), dd.y);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t = c[0];
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
return trunc(t * 255.0f + 0.05f);
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 tfx(float4 T, float4 C)
|
|
|
|
{
|
|
|
|
float4 C_out;
|
|
|
|
float4 FxT = trunc(trunc(C) * T / 128.0f);
|
|
|
|
|
|
|
|
#if (PS_TFX == 0)
|
|
|
|
C_out = FxT;
|
|
|
|
#elif (PS_TFX == 1)
|
|
|
|
C_out = T;
|
|
|
|
#elif (PS_TFX == 2)
|
|
|
|
C_out.rgb = FxT.rgb + C.a;
|
|
|
|
C_out.a = T.a + C.a;
|
|
|
|
#elif (PS_TFX == 3)
|
|
|
|
C_out.rgb = FxT.rgb + C.a;
|
|
|
|
C_out.a = T.a;
|
|
|
|
#else
|
|
|
|
C_out = C;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
#if (PS_TCC == 0)
|
|
|
|
C_out.a = C.a;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
|
|
|
|
// Clamp only when it is useful
|
|
|
|
C_out = min(C_out, 255.0f);
|
|
|
|
#endif
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
return C_out;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
void atst(float4 C)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2019-08-25 18:14:50 +00:00
|
|
|
float a = C.a;
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2016-08-14 16:35:42 +00:00
|
|
|
if(PS_ATST == 0)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
|
|
|
// nothing to do
|
|
|
|
}
|
2016-08-14 16:35:42 +00:00
|
|
|
else if(PS_ATST == 1)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2016-08-14 16:35:42 +00:00
|
|
|
if (a > AREF) discard;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
2016-08-14 16:35:42 +00:00
|
|
|
else if(PS_ATST == 2)
|
2011-02-19 10:57:28 +00:00
|
|
|
{
|
2018-10-02 19:43:05 +00:00
|
|
|
if (a < AREF) discard;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
2016-08-14 16:35:42 +00:00
|
|
|
else if(PS_ATST == 3)
|
2012-03-08 17:43:21 +00:00
|
|
|
{
|
2016-08-14 16:35:42 +00:00
|
|
|
if (abs(a - AREF) > 0.5f) discard;
|
2012-07-23 16:39:56 +00:00
|
|
|
}
|
2016-08-14 16:35:42 +00:00
|
|
|
else if(PS_ATST == 4)
|
2012-07-23 16:39:56 +00:00
|
|
|
{
|
2016-08-14 16:35:42 +00:00
|
|
|
if (abs(a - AREF) < 0.5f) discard;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
float4 fog(float4 c, float f)
|
|
|
|
{
|
|
|
|
if(PS_FOG)
|
|
|
|
{
|
2020-02-28 01:08:47 +00:00
|
|
|
c.rgb = trunc(lerp(FogColor, c.rgb, f));
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
float4 ps_color(PS_INPUT input)
|
|
|
|
{
|
2019-02-20 11:11:23 +00:00
|
|
|
#if PS_FST == 0 && PS_INVALID_TEX0 == 1
|
|
|
|
// Re-normalize coordinate from invalid GS to corrected texture size
|
|
|
|
float2 st = (input.t.xy * WH.xy) / (input.t.w * WH.zw);
|
|
|
|
// no st_int yet
|
|
|
|
#elif PS_FST == 0
|
2019-02-21 12:10:41 +00:00
|
|
|
float2 st = input.t.xy / input.t.w;
|
|
|
|
float2 st_int = input.ti.zw / input.t.w;
|
2018-12-18 16:58:35 +00:00
|
|
|
#else
|
|
|
|
float2 st = input.ti.xy;
|
|
|
|
float2 st_int = input.ti.zw;
|
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2018-12-11 00:04:31 +00:00
|
|
|
#if PS_CHANNEL_FETCH == 1
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_red(int2(input.p.xy));
|
2018-12-11 00:04:31 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 2
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_green(int2(input.p.xy));
|
2018-12-11 00:04:31 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 3
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_blue(int2(input.p.xy));
|
2018-12-11 00:04:31 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 4
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_alpha(int2(input.p.xy));
|
2018-12-11 00:04:31 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 5
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_rgb(int2(input.p.xy));
|
2018-12-11 00:04:31 +00:00
|
|
|
#elif PS_CHANNEL_FETCH == 6
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = fetch_gXbY(int2(input.p.xy));
|
2018-12-13 01:52:06 +00:00
|
|
|
#elif PS_DEPTH_FMT > 0
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 T = sample_depth(st_int, input.p.xy);
|
2018-12-11 00:04:31 +00:00
|
|
|
#else
|
2022-01-09 06:46:40 +00:00
|
|
|
float4 T = sample_color(st, input.t.w);
|
2018-12-11 00:04:31 +00:00
|
|
|
#endif
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
float4 C = tfx(T, input.c);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
atst(C);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
C = fog(C, input.t.z);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
return C;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
void ps_fbmask(inout float4 C, float2 pos_xy)
|
|
|
|
{
|
|
|
|
if (PS_FBMASK)
|
|
|
|
{
|
2022-01-30 07:10:10 +00:00
|
|
|
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
|
2019-08-25 18:14:50 +00:00
|
|
|
C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask));
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-15 18:49:34 +00:00
|
|
|
void ps_dither(inout float3 C, float2 pos_xy)
|
|
|
|
{
|
2020-07-15 12:51:03 +00:00
|
|
|
if (PS_DITHER)
|
|
|
|
{
|
|
|
|
int2 fpos;
|
|
|
|
|
|
|
|
if (PS_DITHER == 2)
|
|
|
|
fpos = int2(pos_xy);
|
|
|
|
else
|
|
|
|
fpos = int2(pos_xy / (float)PS_SCALE_FACTOR);
|
|
|
|
|
2021-12-19 21:24:30 +00:00
|
|
|
C += DitherMatrix[fpos.x & 3][fpos.y & 3];
|
2020-07-15 12:51:03 +00:00
|
|
|
}
|
2019-09-15 18:49:34 +00:00
|
|
|
}
|
|
|
|
|
2021-11-30 11:57:51 +00:00
|
|
|
void ps_color_clamp_wrap(inout float3 C)
|
|
|
|
{
|
|
|
|
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
|
|
|
|
// so we need to limit the color depth on dithered items
|
2022-02-11 17:02:51 +00:00
|
|
|
if (SW_BLEND || PS_DITHER || PS_FBMASK)
|
2021-11-30 11:57:51 +00:00
|
|
|
{
|
|
|
|
// Standard Clamp
|
|
|
|
if (PS_COLCLIP == 0 && PS_HDR == 0)
|
|
|
|
C = clamp(C, (float3)0.0f, (float3)255.0f);
|
|
|
|
|
|
|
|
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
|
2022-02-18 11:25:39 +00:00
|
|
|
if (PS_DFMT == FMT_16 && PS_BLEND_MIX == 0)
|
2021-11-30 11:57:51 +00:00
|
|
|
C = (float3)((int3)C & (int3)0xF8);
|
|
|
|
else if (PS_COLCLIP == 1 && PS_HDR == 0)
|
|
|
|
C = (float3)((int3)C & (int3)0xFF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
void ps_blend(inout float4 Color, float As, float2 pos_xy)
|
|
|
|
{
|
|
|
|
if (SW_BLEND)
|
|
|
|
{
|
2022-01-08 17:43:28 +00:00
|
|
|
// PABE
|
|
|
|
if (PS_PABE)
|
|
|
|
{
|
|
|
|
// No blending so early exit
|
|
|
|
if (As < 1.0f)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-01-30 07:10:10 +00:00
|
|
|
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
|
2019-08-25 18:01:29 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
float Ad = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
|
2019-08-25 18:01:29 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
float3 Cd = RT.rgb;
|
|
|
|
float3 Cs = Color.rgb;
|
2019-08-25 18:01:29 +00:00
|
|
|
|
|
|
|
float3 A = (PS_BLEND_A == 0) ? Cs : ((PS_BLEND_A == 1) ? Cd : (float3)0.0f);
|
|
|
|
float3 B = (PS_BLEND_B == 0) ? Cs : ((PS_BLEND_B == 1) ? Cd : (float3)0.0f);
|
2022-01-23 11:39:01 +00:00
|
|
|
float C = (PS_BLEND_C == 0) ? As : ((PS_BLEND_C == 1) ? Ad : Af);
|
2019-08-25 18:01:29 +00:00
|
|
|
float3 D = (PS_BLEND_D == 0) ? Cs : ((PS_BLEND_D == 1) ? Cd : (float3)0.0f);
|
|
|
|
|
2021-12-26 17:12:09 +00:00
|
|
|
// As/Af clamp alpha for Blend mix
|
2022-02-18 11:24:32 +00:00
|
|
|
if (PS_BLEND_MIX)
|
2021-12-26 17:12:09 +00:00
|
|
|
C = min(C, (float)1.0f);
|
|
|
|
|
2021-11-22 05:48:16 +00:00
|
|
|
Color.rgb = (PS_BLEND_A == PS_BLEND_B) ? D : trunc(((A - B) * C) + D);
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
2022-01-23 11:39:01 +00:00
|
|
|
else
|
|
|
|
{
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
if (PS_CLR_HW == 1 || PS_CLR_HW == 5)
|
2022-01-23 11:39:01 +00:00
|
|
|
{
|
2022-01-26 02:05:06 +00:00
|
|
|
// Needed for Cd * (As/Ad/F + 1) blending modes
|
|
|
|
|
2022-01-23 11:39:01 +00:00
|
|
|
Color.rgb = (float3)255.0f;
|
|
|
|
}
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
else if (PS_CLR_HW == 2 || PS_CLR_HW == 4)
|
2022-01-23 11:39:01 +00:00
|
|
|
{
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
// Cd*As,Cd*Ad or Cd*F
|
2022-01-23 11:39:01 +00:00
|
|
|
|
2022-02-01 18:19:20 +00:00
|
|
|
float Alpha = PS_BLEND_C == 2 ? Af : As;
|
2022-01-23 11:39:01 +00:00
|
|
|
|
|
|
|
Color.rgb = max((float3)0.0f, (Alpha - (float3)1.0f));
|
|
|
|
Color.rgb *= (float3)255.0f;
|
|
|
|
}
|
2022-02-01 18:19:20 +00:00
|
|
|
else if (PS_CLR_HW == 3)
|
2022-01-26 02:05:06 +00:00
|
|
|
{
|
|
|
|
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
|
|
|
|
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
|
|
|
|
|
|
|
|
Color.rgb *= (255.0f / 128.0f);
|
|
|
|
}
|
2022-01-23 11:39:01 +00:00
|
|
|
}
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
PS_OUTPUT ps_main(PS_INPUT input)
|
|
|
|
{
|
|
|
|
float4 C = ps_color(input);
|
|
|
|
|
|
|
|
PS_OUTPUT output;
|
|
|
|
|
2021-12-22 10:56:38 +00:00
|
|
|
if (PS_SCANMSK & 2)
|
|
|
|
{
|
|
|
|
// fail depth test on prohibited lines
|
|
|
|
if ((int(input.p.y) & 1) == (PS_SCANMSK & 1))
|
|
|
|
discard;
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
if (PS_SHUFFLE)
|
|
|
|
{
|
2019-08-25 18:14:50 +00:00
|
|
|
uint4 denorm_c = uint4(C);
|
2019-08-25 18:01:29 +00:00
|
|
|
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
|
|
|
|
|
|
|
|
// Mask will take care of the correct destination
|
|
|
|
if (PS_READ_BA)
|
|
|
|
C.rb = C.bb;
|
|
|
|
else
|
|
|
|
C.rb = C.rr;
|
|
|
|
|
|
|
|
if (PS_READ_BA)
|
|
|
|
{
|
|
|
|
if (denorm_c.a & 0x80u)
|
2019-08-25 18:14:50 +00:00
|
|
|
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
2019-08-25 18:01:29 +00:00
|
|
|
else
|
2019-08-25 18:14:50 +00:00
|
|
|
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (denorm_c.g & 0x80u)
|
2019-08-25 18:14:50 +00:00
|
|
|
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
2019-08-25 18:01:29 +00:00
|
|
|
else
|
2019-08-25 18:14:50 +00:00
|
|
|
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
2019-08-25 18:01:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Must be done before alpha correction
|
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
|
|
|
float alpha_blend;
|
|
|
|
if (PS_BLEND_C == 1 && PS_CLR_HW > 3)
|
|
|
|
{
|
|
|
|
float4 RT = trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
|
|
|
|
alpha_blend = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
alpha_blend = C.a / 128.0f;
|
|
|
|
}
|
2019-08-25 18:01:29 +00:00
|
|
|
|
|
|
|
// Alpha correction
|
|
|
|
if (PS_DFMT == FMT_16)
|
|
|
|
{
|
2019-08-25 18:14:50 +00:00
|
|
|
float A_one = 128.0f; // alpha output will be 0x80
|
2019-08-25 18:01:29 +00:00
|
|
|
C.a = PS_FBA ? A_one : step(A_one, C.a) * A_one;
|
|
|
|
}
|
|
|
|
else if ((PS_DFMT == FMT_32) && PS_FBA)
|
|
|
|
{
|
2019-08-25 18:14:50 +00:00
|
|
|
float A_one = 128.0f;
|
2019-08-25 18:01:29 +00:00
|
|
|
if (C.a < A_one) C.a += A_one;
|
|
|
|
}
|
|
|
|
|
|
|
|
ps_blend(C, alpha_blend, input.p.xy);
|
|
|
|
|
2021-11-30 11:57:51 +00:00
|
|
|
ps_dither(C.rgb, input.p.xy);
|
2019-08-25 18:01:29 +00:00
|
|
|
|
2021-11-30 11:57:51 +00:00
|
|
|
// Color clamp/wrap needs to be done after sw blending and dithering
|
|
|
|
ps_color_clamp_wrap(C.rgb);
|
|
|
|
|
|
|
|
ps_fbmask(C, input.p.xy);
|
2020-07-15 12:51:03 +00:00
|
|
|
|
2019-08-25 18:14:50 +00:00
|
|
|
output.c0 = C / 255.0f;
|
2019-08-25 18:01:29 +00:00
|
|
|
output.c1 = (float4)(alpha_blend);
|
|
|
|
|
2020-06-06 15:21:03 +00:00
|
|
|
#if PS_ZCLAMP
|
|
|
|
output.depth = min(input.p.z, MaxDepthPS);
|
|
|
|
#endif
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Vertex Shader
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
VS_OUTPUT vs_main(VS_INPUT input)
|
|
|
|
{
|
2020-05-23 18:37:51 +00:00
|
|
|
// Clamp to max depth, gs doesn't wrap
|
|
|
|
input.z = min(input.z, MaxDepth);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
|
|
|
VS_OUTPUT output;
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
|
|
|
|
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
|
|
|
|
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
|
|
|
|
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
|
2018-10-02 19:43:05 +00:00
|
|
|
|
2021-12-21 07:41:45 +00:00
|
|
|
output.p = float4(input.p, input.z, 1.0f) - float4(0.05f, 0.05f, 0, 0);
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2021-12-21 07:41:45 +00:00
|
|
|
output.p.xy = output.p.xy * float2(VertexScale.x, -VertexScale.y) - float2(VertexOffset.x, -VertexOffset.y);
|
|
|
|
output.p.z *= exp2(-32.0f); // integer->float depth
|
2011-02-19 10:57:28 +00:00
|
|
|
|
|
|
|
if(VS_TME)
|
|
|
|
{
|
2021-12-21 07:41:45 +00:00
|
|
|
float2 uv = input.uv - TextureOffset;
|
|
|
|
float2 st = input.st - TextureOffset;
|
2018-12-18 16:58:35 +00:00
|
|
|
|
|
|
|
// Integer nomalized
|
2021-12-21 07:41:45 +00:00
|
|
|
output.ti.xy = uv * TextureScale;
|
2017-02-17 09:59:21 +00:00
|
|
|
|
2018-12-18 16:58:35 +00:00
|
|
|
if (VS_FST)
|
|
|
|
{
|
|
|
|
// Integer integral
|
|
|
|
output.ti.zw = uv;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2018-12-18 16:58:35 +00:00
|
|
|
// float for post-processing in some games
|
2021-12-21 07:41:45 +00:00
|
|
|
output.ti.zw = st / TextureScale;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
2018-12-18 16:58:35 +00:00
|
|
|
// Float coords
|
|
|
|
output.t.xy = st;
|
|
|
|
output.t.w = input.q;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
output.t.xy = 0;
|
|
|
|
output.t.w = 1.0f;
|
2018-12-18 16:58:35 +00:00
|
|
|
output.ti = 0;
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
output.c = input.c;
|
2012-01-08 17:10:00 +00:00
|
|
|
output.t.z = input.f.r;
|
2011-02-19 10:57:28 +00:00
|
|
|
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
2019-08-25 18:01:29 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
// Geometry Shader
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
|
2021-12-23 11:35:05 +00:00
|
|
|
#if GS_PRIM == 0
|
2017-03-03 21:18:49 +00:00
|
|
|
|
|
|
|
[maxvertexcount(6)]
|
|
|
|
void gs_main(point VS_OUTPUT input[1], inout TriangleStream<VS_OUTPUT> stream)
|
|
|
|
{
|
|
|
|
// Transform a point to a NxN sprite
|
|
|
|
VS_OUTPUT Point = input[0];
|
|
|
|
|
|
|
|
// Get new position
|
|
|
|
float4 lt_p = input[0].p;
|
|
|
|
float4 rb_p = input[0].p + float4(PointSize.x, PointSize.y, 0.0f, 0.0f);
|
|
|
|
float4 lb_p = rb_p;
|
|
|
|
float4 rt_p = rb_p;
|
|
|
|
lb_p.x = lt_p.x;
|
|
|
|
rt_p.y = lt_p.y;
|
|
|
|
|
|
|
|
// Triangle 1
|
|
|
|
Point.p = lt_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
|
|
|
|
Point.p = lb_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
|
|
|
|
Point.p = rt_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
|
|
|
|
// Triangle 2
|
|
|
|
Point.p = lb_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
|
|
|
|
Point.p = rt_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
|
|
|
|
Point.p = rb_p;
|
|
|
|
stream.Append(Point);
|
|
|
|
}
|
|
|
|
|
2021-12-23 11:35:05 +00:00
|
|
|
#elif GS_PRIM == 1
|
2017-03-03 21:18:49 +00:00
|
|
|
|
|
|
|
[maxvertexcount(6)]
|
|
|
|
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
|
|
|
|
{
|
|
|
|
// Transform a line to a thick line-sprite
|
|
|
|
VS_OUTPUT left = input[0];
|
|
|
|
VS_OUTPUT right = input[1];
|
|
|
|
float2 lt_p = input[0].p.xy;
|
|
|
|
float2 rt_p = input[1].p.xy;
|
|
|
|
|
|
|
|
// Potentially there is faster math
|
|
|
|
float2 line_vector = normalize(rt_p.xy - lt_p.xy);
|
|
|
|
float2 line_normal = float2(line_vector.y, -line_vector.x);
|
|
|
|
float2 line_width = (line_normal * PointSize) / 2;
|
|
|
|
|
|
|
|
lt_p -= line_width;
|
|
|
|
rt_p -= line_width;
|
|
|
|
float2 lb_p = input[0].p.xy + line_width;
|
|
|
|
float2 rb_p = input[1].p.xy + line_width;
|
|
|
|
|
|
|
|
#if GS_IIP == 0
|
|
|
|
left.c = right.c;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Triangle 1
|
|
|
|
left.p.xy = lt_p;
|
|
|
|
stream.Append(left);
|
|
|
|
|
|
|
|
left.p.xy = lb_p;
|
|
|
|
stream.Append(left);
|
|
|
|
|
|
|
|
right.p.xy = rt_p;
|
|
|
|
stream.Append(right);
|
|
|
|
stream.RestartStrip();
|
|
|
|
|
|
|
|
// Triangle 2
|
|
|
|
left.p.xy = lb_p;
|
|
|
|
stream.Append(left);
|
|
|
|
|
|
|
|
right.p.xy = rt_p;
|
|
|
|
stream.Append(right);
|
|
|
|
|
|
|
|
right.p.xy = rb_p;
|
|
|
|
stream.Append(right);
|
|
|
|
stream.RestartStrip();
|
|
|
|
}
|
|
|
|
|
2011-02-19 10:57:28 +00:00
|
|
|
#elif GS_PRIM == 3
|
|
|
|
|
|
|
|
[maxvertexcount(4)]
|
|
|
|
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
|
|
|
|
{
|
2017-03-03 21:18:49 +00:00
|
|
|
VS_OUTPUT lt = input[0];
|
|
|
|
VS_OUTPUT rb = input[1];
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2017-03-03 21:18:49 +00:00
|
|
|
// flat depth
|
|
|
|
lt.p.z = rb.p.z;
|
|
|
|
// flat fog and texture perspective
|
|
|
|
lt.t.zw = rb.t.zw;
|
|
|
|
|
|
|
|
// flat color
|
|
|
|
lt.c = rb.c;
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2017-03-03 21:18:49 +00:00
|
|
|
// Swap texture and position coordinate
|
|
|
|
VS_OUTPUT lb = rb;
|
|
|
|
lb.p.x = lt.p.x;
|
|
|
|
lb.t.x = lt.t.x;
|
2018-12-18 16:58:35 +00:00
|
|
|
lb.ti.x = lt.ti.x;
|
|
|
|
lb.ti.z = lt.ti.z;
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2017-03-03 21:18:49 +00:00
|
|
|
VS_OUTPUT rt = rb;
|
|
|
|
rt.p.y = lt.p.y;
|
|
|
|
rt.t.y = lt.t.y;
|
2018-12-18 16:58:35 +00:00
|
|
|
rt.ti.y = lt.ti.y;
|
|
|
|
rt.ti.w = lt.ti.w;
|
2011-02-19 10:57:28 +00:00
|
|
|
|
2017-03-03 21:18:49 +00:00
|
|
|
stream.Append(lt);
|
2011-02-19 10:57:28 +00:00
|
|
|
stream.Append(lb);
|
|
|
|
stream.Append(rt);
|
2017-03-03 21:18:49 +00:00
|
|
|
stream.Append(rb);
|
2011-02-19 10:57:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
#endif
|