2011-02-19 10:57:28 +00:00
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
2018-12-20 17:41:30 +00:00
2011-02-19 10:57:28 +00:00
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
2020-05-22 22:58:53 +00:00
#ifndef VS_TME
2021-12-23 11:35:05 +00:00
#define VS_IIP 0
2011-02-19 10:57:28 +00:00
#define VS_TME 1
#define VS_FST 1
#endif
#ifndef GS_IIP
#define GS_IIP 0
#define GS_PRIM 3
2022-09-01 06:50:25 +00:00
#define GS_FORWARD_PRIMID 0
2011-02-19 10:57:28 +00:00
#endif
#ifndef PS_FST
2021-12-23 11:35:05 +00:00
#define PS_IIP 0
2011-02-19 10:57:28 +00:00
#define PS_FST 0
#define PS_WMS 0
#define PS_WMT 0
2023-01-31 10:50:45 +00:00
#define PS_ADJS 0
#define PS_ADJT 0
2021-12-13 05:33:09 +00:00
#define PS_AEM_FMT FMT_32
2011-02-19 10:57:28 +00:00
#define PS_AEM 0
#define PS_TFX 0
#define PS_TCC 1
#define PS_ATST 1
#define PS_FOG 0
2021-12-23 11:35:05 +00:00
#define PS_IIP 0
2022-01-26 02:05:06 +00:00
#define PS_CLR_HW 0
2011-02-19 10:57:28 +00:00
#define PS_FBA 0
2019-04-17 20:14:17 +00:00
#define PS_FBMASK 0
2011-02-19 10:57:28 +00:00
#define PS_LTF 1
2013-02-12 10:57:48 +00:00
#define PS_TCOFFSETHACK 0
2012-07-19 20:40:42 +00:00
#define PS_POINT_SAMPLER 0
2015-06-09 23:17:26 +00:00
#define PS_SHUFFLE 0
#define PS_READ_BA 0
2018-12-18 07:31:20 +00:00
#define PS_DFMT 0
2018-12-12 17:52:57 +00:00
#define PS_DEPTH_FMT 0
2017-02-17 09:59:21 +00:00
#define PS_PAL_FMT 0
2018-12-11 00:04:31 +00:00
#define PS_CHANNEL_FETCH 0
2018-12-13 01:52:06 +00:00
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
2022-07-19 10:46:59 +00:00
#define PS_SCALE_FACTOR 1.0
2019-06-06 15:56:22 +00:00
#define PS_HDR 0
2019-06-26 14:07:03 +00:00
#define PS_COLCLIP 0
2019-06-06 15:56:22 +00:00
#define PS_BLEND_A 0
#define PS_BLEND_B 0
#define PS_BLEND_C 0
#define PS_BLEND_D 0
2022-02-18 11:24:32 +00:00
#define PS_BLEND_MIX 0
2022-07-16 17:26:29 +00:00
#define PS_FIXED_ONE_A 0
2021-02-04 01:39:20 +00:00
#define PS_PABE 0
2019-09-15 18:49:34 +00:00
#define PS_DITHER 0
2020-06-06 15:21:03 +00:00
#define PS_ZCLAMP 0
2021-12-22 10:56:38 +00:00
#define PS_SCANMSK 0
2022-01-09 06:46:40 +00:00
#define PS_AUTOMATIC_LOD 0
#define PS_MANUAL_LOD 0
2022-01-30 07:10:10 +00:00
#define PS_TEX_IS_FB 0
2022-03-20 08:25:25 +00:00
#define PS_NO_COLOR 0
#define PS_NO_COLOR1 0
2021-12-31 07:29:26 +00:00
#define PS_NO_ABLEND 0
#define PS_ONLY_ALPHA 0
2022-03-19 12:19:16 +00:00
#define PS_DATE 0
2011-02-19 10:57:28 +00:00
#endif
2019-06-06 15:56:22 +00:00
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
2022-08-26 15:07:21 +00:00
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
2019-06-06 15:56:22 +00:00
2011-02-19 10:57:28 +00:00
struct VS_INPUT
{
2012-01-19 04:53:36 +00:00
float2 st : TEXCOORD0 ;
2019-08-25 18:14:50 +00:00
uint4 c : COLOR0 ;
2012-01-19 04:53:36 +00:00
float q : TEXCOORD1 ;
2011-02-19 10:57:28 +00:00
uint2 p : POSITION0 ;
uint z : POSITION1 ;
2012-01-19 04:53:36 +00:00
uint2 uv : TEXCOORD2 ;
2011-02-19 10:57:28 +00:00
float4 f : COLOR1 ;
} ;
struct VS_OUTPUT
{
float4 p : SV_Position ;
float4 t : TEXCOORD0 ;
2018-12-18 16:58:35 +00:00
float4 ti : TEXCOORD2 ;
2021-12-23 11:35:05 +00:00
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
2011-02-19 10:57:28 +00:00
float4 c : COLOR0 ;
2021-12-23 11:35:05 +00:00
#else
nointerpolation float4 c : COLOR0 ;
#endif
2011-02-19 10:57:28 +00:00
} ;
struct PS_INPUT
{
float4 p : SV_Position ;
float4 t : TEXCOORD0 ;
2018-12-18 16:58:35 +00:00
float4 ti : TEXCOORD2 ;
2021-12-23 11:35:05 +00:00
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
2011-02-19 10:57:28 +00:00
float4 c : COLOR0 ;
2021-12-23 11:35:05 +00:00
#else
nointerpolation float4 c : COLOR0 ;
#endif
2022-09-01 06:50:25 +00:00
#if (PS_DATE >= 1 && PS_DATE <= 3) || GS_FORWARD_PRIMID
2022-03-19 12:19:16 +00:00
uint primid : SV_PrimitiveID ;
#endif
2011-02-19 10:57:28 +00:00
} ;
struct PS_OUTPUT
{
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2022-09-01 03:56:34 +00:00
#if PS_DATE == 1 || PS_DATE == 2
2022-03-19 12:19:16 +00:00
float c : SV_Target ;
#else
2011-02-19 10:57:28 +00:00
float4 c0 : SV_Target0 ;
2021-12-31 07:29:26 +00:00
#if !PS_NO_COLOR1
2011-02-19 10:57:28 +00:00
float4 c1 : SV_Target1 ;
2021-12-31 07:29:26 +00:00
#endif
2022-03-20 08:25:25 +00:00
#endif
2022-03-19 12:19:16 +00:00
#endif
2020-06-06 15:21:03 +00:00
#if PS_ZCLAMP
float depth : SV_Depth ;
#endif
2011-02-19 10:57:28 +00:00
} ;
Texture2D < float4 > Texture : register ( t0 ) ;
Texture2D < float4 > Palette : register ( t1 ) ;
2022-01-30 07:10:10 +00:00
Texture2D < float4 > RtTexture : register ( t2 ) ;
2022-03-19 12:19:16 +00:00
Texture2D < float > PrimMinTexture : register ( t3 ) ;
2011-02-19 10:57:28 +00:00
SamplerState TextureSampler : register ( s0 ) ;
2022-03-19 12:19:16 +00:00
#ifdef DX12
cbuffer cb0 : register ( b0 )
#else
2011-02-19 10:57:28 +00:00
cbuffer cb0
2022-03-19 12:19:16 +00:00
#endif
2011-02-19 10:57:28 +00:00
{
2021-12-21 07:41:45 +00:00
float2 VertexScale ;
float2 VertexOffset ;
float2 TextureScale ;
float2 TextureOffset ;
float2 PointSize ;
2020-05-23 18:37:51 +00:00
uint MaxDepth ;
2021-12-21 07:41:45 +00:00
uint pad_cb0 ;
2011-02-19 10:57:28 +00:00
} ;
2022-03-19 12:19:16 +00:00
#ifdef DX12
cbuffer cb1 : register ( b1 )
#else
2011-02-19 10:57:28 +00:00
cbuffer cb1
2022-03-19 12:19:16 +00:00
#endif
2011-02-19 10:57:28 +00:00
{
float3 FogColor ;
float AREF ;
float4 WH ;
float2 TA ;
2021-12-21 07:41:45 +00:00
float MaxDepthPS ;
float Af ;
2019-04-17 20:14:17 +00:00
uint4 FbMask ;
2021-12-21 07:41:45 +00:00
float4 HalfTexel ;
float4 MinMax ;
2023-01-31 10:50:45 +00:00
float4 STRange ;
2021-12-21 07:41:45 +00:00
int4 ChannelShuffle ;
float2 TC_OffsetHack ;
2022-01-16 07:05:05 +00:00
float2 STScale ;
2019-09-15 18:49:34 +00:00
float4x4 DitherMatrix ;
2011-02-19 10:57:28 +00:00
} ;
2022-01-09 06:46:40 +00:00
float4 sample_c ( float2 uv , float uv_w )
2011-02-19 10:57:28 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
return RtTexture . Load ( int3 ( int2 ( uv * WH . zw ) , 0 ) ) ;
#else
2018-08-21 22:51:19 +00:00
if ( PS_POINT_SAMPLER )
2012-07-19 20:40:42 +00:00
{
2018-10-09 07:07:45 +00:00
// Weird issue with ATI/AMD cards,
2012-07-19 20:40:42 +00:00
// it looks like they add 127/128 of a texel to sampling coordinates
// occasionally causing point sampling to erroneously round up.
// I'm manually adjusting coordinates to the centre of texels here,
// though the centre is just paranoia, the top left corner works fine.
2018-10-09 07:07:45 +00:00
// As of 2018 this issue is still present.
2012-07-19 20:40:42 +00:00
uv = ( trunc ( uv * WH . zw ) + float2 ( 0.5 , 0.5 ) ) / WH . zw ;
}
2023-01-31 10:50:45 +00:00
#if !PS_ADJS && !PS_ADJT
2022-01-16 07:05:05 +00:00
uv * = STScale ;
2023-01-31 10:50:45 +00:00
#else
# if PS_ADJS
uv . x = ( uv . x - STRange . x ) * STRange . z ;
# else
uv . x = uv . x * STScale . x ;
# endif
# if PS_ADJT
uv . y = ( uv . y - STRange . y ) * STRange . w ;
# else
uv . y = uv . y * STScale . y ;
# endif
#endif
2022-01-09 06:46:40 +00:00
#if PS_AUTOMATIC_LOD == 1
2011-02-19 10:57:28 +00:00
return Texture . Sample ( TextureSampler , uv ) ;
2022-01-09 06:46:40 +00:00
#elif PS_MANUAL_LOD == 1
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
float K = MinMax . x ;
float L = MinMax . y ;
float bias = MinMax . z ;
float max_lod = MinMax . w ;
float gs_lod = K - log2 ( abs ( uv_w ) ) * L ;
// FIXME max useful ?
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
float lod = min ( gs_lod , max_lod ) - bias ;
return Texture . SampleLevel ( TextureSampler , uv , lod ) ;
#else
return Texture . SampleLevel ( TextureSampler , uv , 0 ) ; // No lod
#endif
2022-01-30 07:10:10 +00:00
#endif
2011-02-19 10:57:28 +00:00
}
2023-01-03 11:13:50 +00:00
float4 sample_p ( uint u )
2011-02-19 10:57:28 +00:00
{
2023-01-03 11:13:50 +00:00
return Palette . Load ( int3 ( int ( u ) , 0 , 0 ) ) ;
}
float4 sample_p_norm ( float u )
{
return sample_p ( uint ( u * 255.5f ) ) ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
float4 clamp_wrap_uv ( float4 uv )
2018-12-16 07:45:49 +00:00
{
2023-01-31 10:50:45 +00:00
float4 tex_size = WH . xyxy ;
2018-12-12 17:52:57 +00:00
2019-08-25 18:01:29 +00:00
if ( PS_WMS == PS_WMT )
2018-12-12 17:52:57 +00:00
{
2019-08-25 18:01:29 +00:00
if ( PS_WMS == 2 )
{
uv = clamp ( uv , MinMax . xyxy , MinMax . zwzw ) ;
}
else if ( PS_WMS == 3 )
{
# if PS_FST == 0
// wrap negative uv coords to avoid an off by one error that shifted
// textures. Fixes Xenosaga's hair issue.
uv = frac ( uv ) ;
# endif
2023-01-31 10:50:45 +00:00
uv = ( float4 ) ( ( ( uint4 ) ( uv * tex_size ) & asuint ( MinMax . xyxy ) ) | asuint ( MinMax . zwzw ) ) / tex_size ;
2019-08-25 18:01:29 +00:00
}
2018-12-12 17:52:57 +00:00
}
else
{
2019-08-25 18:01:29 +00:00
if ( PS_WMS == 2 )
{
uv . xz = clamp ( uv . xz , MinMax . xx , MinMax . zz ) ;
}
else if ( PS_WMS == 3 )
{
# if PS_FST == 0
uv . xz = frac ( uv . xz ) ;
# endif
2023-01-31 10:50:45 +00:00
uv . xz = ( float2 ) ( ( ( uint2 ) ( uv . xz * tex_size . xx ) & asuint ( MinMax . xx ) ) | asuint ( MinMax . zz ) ) / tex_size . xx ;
2019-08-25 18:01:29 +00:00
}
if ( PS_WMT == 2 )
{
uv . yw = clamp ( uv . yw , MinMax . yy , MinMax . ww ) ;
}
else if ( PS_WMT == 3 )
{
# if PS_FST == 0
uv . yw = frac ( uv . yw ) ;
# endif
2023-01-31 10:50:45 +00:00
uv . yw = ( float2 ) ( ( ( uint2 ) ( uv . yw * tex_size . yy ) & asuint ( MinMax . yy ) ) | asuint ( MinMax . ww ) ) / tex_size . yy ;
2019-08-25 18:01:29 +00:00
}
2018-12-12 17:52:57 +00:00
}
2019-08-25 18:01:29 +00:00
return uv ;
2018-12-11 00:04:31 +00:00
}
2022-01-09 06:46:40 +00:00
float4x4 sample_4c ( float4 uv , float uv_w )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
float4x4 c ;
2018-12-12 17:52:57 +00:00
2022-01-09 06:46:40 +00:00
c [ 0 ] = sample_c ( uv . xy , uv_w ) ;
c [ 1 ] = sample_c ( uv . zy , uv_w ) ;
c [ 2 ] = sample_c ( uv . xw , uv_w ) ;
c [ 3 ] = sample_c ( uv . zw , uv_w ) ;
2019-08-25 18:01:29 +00:00
return c ;
}
2023-01-03 11:13:50 +00:00
uint4 sample_4_index ( float4 uv , float uv_w )
2019-08-25 18:01:29 +00:00
{
float4 c ;
2022-01-09 06:46:40 +00:00
c . x = sample_c ( uv . xy , uv_w ) . a ;
c . y = sample_c ( uv . zy , uv_w ) . a ;
c . z = sample_c ( uv . xw , uv_w ) . a ;
c . w = sample_c ( uv . zw , uv_w ) . a ;
2019-08-25 18:01:29 +00:00
// Denormalize value
2023-01-03 11:13:50 +00:00
uint4 i = uint4 ( c * 255.5f ) ;
2019-08-25 18:01:29 +00:00
if ( PS_PAL_FMT == 1 )
2018-12-12 17:52:57 +00:00
{
2019-08-25 18:01:29 +00:00
// 4HL
2023-01-03 11:13:50 +00:00
return i & 0xF u ;
2018-12-12 17:52:57 +00:00
}
2019-08-25 18:01:29 +00:00
else if ( PS_PAL_FMT == 2 )
2018-12-12 17:52:57 +00:00
{
2019-08-25 18:01:29 +00:00
// 4HH
2023-01-03 11:13:50 +00:00
return i >> 4 u ;
}
else
{
// 8
return i ;
2018-12-12 17:52:57 +00:00
}
2018-12-11 00:04:31 +00:00
}
2023-01-03 11:13:50 +00:00
float4x4 sample_4p ( uint4 u )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
float4x4 c ;
c [ 0 ] = sample_p ( u . x ) ;
c [ 1 ] = sample_p ( u . y ) ;
c [ 2 ] = sample_p ( u . z ) ;
c [ 3 ] = sample_p ( u . w ) ;
return c ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
int fetch_raw_depth ( int2 xy )
2018-12-11 00:04:31 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
float4 col = RtTexture . Load ( int3 ( xy , 0 ) ) ;
#else
float4 col = Texture . Load ( int3 ( xy , 0 ) ) ;
#endif
2019-08-25 18:01:29 +00:00
return ( int ) ( col . r * exp2 ( 32.0f ) ) ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_raw_color ( int2 xy )
2018-12-11 00:04:31 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
return RtTexture . Load ( int3 ( xy , 0 ) ) ;
#else
return Texture . Load ( int3 ( xy , 0 ) ) ;
#endif
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_c ( int2 uv )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
return Texture . Load ( int3 ( uv , 0 ) ) ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Depth sampling
//////////////////////////////////////////////////////////////////////
2017-02-17 09:59:21 +00:00
2018-12-16 07:45:49 +00:00
int2 clamp_wrap_uv_depth ( int2 uv )
{
2023-01-31 10:50:45 +00:00
int4 mask = asint ( MinMax ) << 4 ;
2018-12-16 07:45:49 +00:00
if ( PS_WMS == PS_WMT )
{
if ( PS_WMS == 2 )
{
uv = clamp ( uv , mask . xy , mask . zw ) ;
}
else if ( PS_WMS == 3 )
{
uv = ( uv & mask . xy ) | mask . zw ;
}
}
else
{
if ( PS_WMS == 2 )
{
uv . x = clamp ( uv . x , mask . x , mask . z ) ;
}
else if ( PS_WMS == 3 )
{
uv . x = ( uv . x & mask . x ) | mask . z ;
}
if ( PS_WMT == 2 )
{
uv . y = clamp ( uv . y , mask . y , mask . w ) ;
}
else if ( PS_WMT == 3 )
{
uv . y = ( uv . y & mask . y ) | mask . w ;
}
}
return uv ;
}
2018-12-18 16:58:35 +00:00
float4 sample_depth ( float2 st , float2 pos )
2018-12-13 01:52:06 +00:00
{
2018-12-16 07:45:49 +00:00
float2 uv_f = ( float2 ) clamp_wrap_uv_depth ( int2 ( st ) ) * ( float2 ) PS_SCALE_FACTOR * ( float2 ) ( 1.0f / 16.0f ) ;
int2 uv = ( int2 ) uv_f ;
2018-12-13 01:52:06 +00:00
float4 t = ( float4 ) ( 0.0f ) ;
if ( PS_TALES_OF_ABYSS_HLE == 1 )
{
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth ( pos ) ;
// Convert msb based on the palette
2019-08-25 18:14:50 +00:00
t = Palette . Load ( int3 ( ( depth >> 8 ) & 0xFF , 0 , 0 ) ) * 255.0f ;
2018-12-13 01:52:06 +00:00
}
else if ( PS_URBAN_CHAOS_HLE == 1 )
{
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
// So it will do a first channel trick to extract lsb, value is right-shifted.
// Then a new channel trick to extract msb which will shifted to the left.
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
// To be faster both steps (msb&lsb) are done in a single pass.
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth ( pos ) ;
2019-08-25 18:01:29 +00:00
2018-12-13 01:52:06 +00:00
// Convert lsb based on the palette
2019-08-25 18:14:50 +00:00
t = Palette . Load ( int3 ( depth & 0xFF , 0 , 0 ) ) * 255.0f ;
2018-12-13 01:52:06 +00:00
// Msb is easier
float green = ( float ) ( ( depth >> 8 ) & 0xFF ) * 36.0f ;
green = min ( green , 255.0f ) ;
2019-08-25 18:14:50 +00:00
t . g + = green ;
2018-12-13 01:52:06 +00:00
}
2018-12-16 07:45:49 +00:00
else if ( PS_DEPTH_FMT == 1 )
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float32_rgba8 of convert
2018-12-16 07:45:49 +00:00
// Convert a FLOAT32 depth texture into a RGBA color texture
2022-02-12 04:50:35 +00:00
uint d = uint ( fetch_c ( uv ) . r * exp2 ( 32.0f ) ) ;
t = float4 ( uint4 ( ( d & 0xFF u ) , ( ( d >> 8 ) & 0xFF u ) , ( ( d >> 16 ) & 0xFF u ) , ( d >> 24 ) ) ) ;
2018-12-16 07:45:49 +00:00
}
else if ( PS_DEPTH_FMT == 2 )
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float16_rgb5a1 of convert
2018-12-16 07:45:49 +00:00
// Convert a FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
2022-02-12 04:50:35 +00:00
uint d = uint ( fetch_c ( uv ) . r * exp2 ( 32.0f ) ) ;
2022-02-18 18:21:01 +00:00
t = float4 ( uint4 ( ( d & 0x1F u ) , ( ( d >> 5 ) & 0x1F u ) , ( ( d >> 10 ) & 0x1F u ) , ( d >> 15 ) & 0x01 u ) ) * float4 ( 8.0f , 8.0f , 8.0f , 128.0f ) ;
2018-12-16 07:45:49 +00:00
}
else if ( PS_DEPTH_FMT == 3 )
{
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
2019-08-25 18:14:50 +00:00
t = fetch_c ( uv ) * 255.0f ;
2018-12-16 07:45:49 +00:00
}
if ( PS_AEM_FMT == FMT_24 )
{
t . a = ( ( PS_AEM == 0 ) || any ( bool3 ( t . rgb ) ) ) ? 255.0f * TA . x : 0.0f ;
}
else if ( PS_AEM_FMT == FMT_16 )
{
t . a = t . a >= 128.0f ? 255.0f * TA . y : ( ( PS_AEM == 0 ) || any ( bool3 ( t . rgb ) ) ) ? 255.0f * TA . x : 0.0f ;
}
2018-12-13 01:52:06 +00:00
return t ;
}
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Fetch a Single Channel
//////////////////////////////////////////////////////////////////////
2019-02-20 11:11:23 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_red ( int2 xy )
{
float4 rt ;
2019-02-20 11:11:23 +00:00
2019-08-25 18:01:29 +00:00
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:01:29 +00:00
int depth = ( fetch_raw_depth ( xy ) ) & 0xFF ;
rt = ( float4 ) ( depth ) / 255.0f ;
2011-02-19 10:57:28 +00:00
}
else
{
2019-08-25 18:01:29 +00:00
rt = fetch_raw_color ( xy ) ;
2011-02-19 10:57:28 +00:00
}
2018-10-02 19:43:05 +00:00
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . r ) * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2021-12-05 04:55:57 +00:00
float4 fetch_green ( int2 xy )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:01:29 +00:00
float4 rt ;
2018-05-27 09:39:37 +00:00
2019-08-25 18:01:29 +00:00
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
2018-05-27 09:39:37 +00:00
{
2021-12-05 04:55:57 +00:00
int depth = ( fetch_raw_depth ( xy ) >> 8 ) & 0xFF ;
2019-08-25 18:01:29 +00:00
rt = ( float4 ) ( depth ) / 255.0f ;
2018-05-27 09:39:37 +00:00
}
2019-08-25 18:01:29 +00:00
else
2018-05-27 09:39:37 +00:00
{
2019-08-25 18:01:29 +00:00
rt = fetch_raw_color ( xy ) ;
2018-05-27 09:39:37 +00:00
}
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . g ) * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2021-12-05 04:55:57 +00:00
float4 fetch_blue ( int2 xy )
2011-02-19 10:57:28 +00:00
{
2021-12-05 04:55:57 +00:00
float4 rt ;
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
{
int depth = ( fetch_raw_depth ( xy ) >> 16 ) & 0xFF ;
rt = ( float4 ) ( depth ) / 255.0f ;
}
else
{
rt = fetch_raw_color ( xy ) ;
}
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . b ) * 255.0f ;
2019-08-25 18:01:29 +00:00
}
2018-10-02 19:43:05 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_alpha ( int2 xy )
{
float4 rt = fetch_raw_color ( xy ) ;
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . a ) * 255.0f ;
2019-08-25 18:01:29 +00:00
}
2011-02-19 10:57:28 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_rgb ( int2 xy )
{
float4 rt = fetch_raw_color ( xy ) ;
2023-01-03 11:13:50 +00:00
float4 c = float4 ( sample_p_norm ( rt . r ) . r , sample_p_norm ( rt . g ) . g , sample_p_norm ( rt . b ) . b , 1.0 ) ;
2019-08-25 18:14:50 +00:00
return c * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_gXbY ( int2 xy )
{
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
{
int depth = fetch_raw_depth ( xy ) ;
int bg = ( depth >> ( 8 + ChannelShuffle . w ) ) & 0xFF ;
return ( float4 ) ( bg ) ;
}
else
{
int4 rt = ( int4 ) ( fetch_raw_color ( xy ) * 255.0 ) ;
int green = ( rt . g >> ChannelShuffle . w ) & ChannelShuffle . z ;
int blue = ( rt . b << ChannelShuffle . y ) & ChannelShuffle . x ;
2019-08-25 18:14:50 +00:00
return ( float4 ) ( green | blue ) ;
2019-08-25 18:01:29 +00:00
}
}
2022-01-09 06:46:40 +00:00
float4 sample_color ( float2 st , float uv_w )
2011-02-19 10:57:28 +00:00
{
2013-02-12 10:57:48 +00:00
# if PS_TCOFFSETHACK
st + = TC_OffsetHack . xy ;
2018-10-02 19:43:05 +00:00
# endif
2012-06-13 15:36:10 +00:00
2011-02-19 10:57:28 +00:00
float4 t ;
2012-06-13 15:36:10 +00:00
float4x4 c ;
float2 dd ;
2017-02-17 09:59:21 +00:00
if ( PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2 )
2011-02-19 10:57:28 +00:00
{
2022-01-09 06:46:40 +00:00
c [ 0 ] = sample_c ( st , uv_w ) ;
2011-02-19 10:57:28 +00:00
}
else
{
float4 uv ;
2012-06-13 15:36:10 +00:00
2011-02-19 10:57:28 +00:00
if ( PS_LTF )
{
uv = st . xyxy + HalfTexel ;
2012-07-19 20:40:42 +00:00
dd = frac ( uv . xy * WH . zw ) ;
2019-08-25 18:14:50 +00:00
2018-09-26 22:59:31 +00:00
if ( PS_FST == 0 )
{
2019-08-25 18:14:50 +00:00
dd = clamp ( dd , ( float2 ) 0.0f , ( float2 ) 0.9999999f ) ;
2018-09-26 22:59:31 +00:00
}
2011-02-19 10:57:28 +00:00
}
else
{
uv = st . xyxy ;
}
2017-02-17 09:59:21 +00:00
uv = clamp_wrap_uv ( uv ) ;
2011-02-19 10:57:28 +00:00
2017-02-17 09:59:21 +00:00
#if PS_PAL_FMT != 0
2022-01-09 06:46:40 +00:00
c = sample_4p ( sample_4_index ( uv , uv_w ) ) ;
2017-02-17 09:59:21 +00:00
#else
2022-01-09 06:46:40 +00:00
c = sample_4c ( uv , uv_w ) ;
2017-02-17 09:59:21 +00:00
#endif
2012-06-13 15:36:10 +00:00
}
2011-02-19 10:57:28 +00:00
2012-06-13 15:36:10 +00:00
[ unroll ]
for ( uint i = 0 ; i < 4 ; i + + )
{
2018-12-19 23:54:51 +00:00
if ( PS_AEM_FMT == FMT_24 )
2012-06-13 15:36:10 +00:00
{
c [ i ] . a = ! PS_AEM || any ( c [ i ] . rgb ) ? TA . x : 0 ;
2011-02-19 10:57:28 +00:00
}
2017-02-17 09:59:21 +00:00
else if ( PS_AEM_FMT == FMT_16 )
2011-02-19 10:57:28 +00:00
{
2018-10-02 19:43:05 +00:00
c [ i ] . a = c [ i ] . a >= 0.5 ? TA . y : ! PS_AEM || any ( c [ i ] . rgb ) ? TA . x : 0 ;
2011-02-19 10:57:28 +00:00
}
}
2012-06-11 03:27:16 +00:00
2012-06-13 15:36:10 +00:00
if ( PS_LTF )
2018-10-02 19:43:05 +00:00
{
2012-06-13 15:36:10 +00:00
t = lerp ( lerp ( c [ 0 ] , c [ 1 ] , dd . x ) , lerp ( c [ 2 ] , c [ 3 ] , dd . x ) , dd . y ) ;
}
else
{
t = c [ 0 ] ;
}
2019-08-25 18:14:50 +00:00
return trunc ( t * 255.0f + 0.05f ) ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:14:50 +00:00
float4 tfx ( float4 T , float4 C )
{
float4 C_out ;
float4 FxT = trunc ( trunc ( C ) * T / 128.0f ) ;
#if (PS_TFX == 0)
C_out = FxT ;
#elif (PS_TFX == 1)
C_out = T ;
#elif (PS_TFX == 2)
C_out . rgb = FxT . rgb + C . a ;
C_out . a = T . a + C . a ;
#elif (PS_TFX == 3)
C_out . rgb = FxT . rgb + C . a ;
C_out . a = T . a ;
#else
C_out = C ;
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
#if (PS_TCC == 0)
C_out . a = C . a ;
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
// Clamp only when it is useful
C_out = min ( C_out , 255.0f ) ;
#endif
2018-10-02 19:43:05 +00:00
2019-08-25 18:14:50 +00:00
return C_out ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:14:50 +00:00
void atst ( float4 C )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:14:50 +00:00
float a = C . a ;
2018-10-02 19:43:05 +00:00
2016-08-14 16:35:42 +00:00
if ( PS_ATST == 0 )
2011-02-19 10:57:28 +00:00
{
// nothing to do
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 1 )
2011-02-19 10:57:28 +00:00
{
2016-08-14 16:35:42 +00:00
if ( a > AREF ) discard ;
2011-02-19 10:57:28 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 2 )
2011-02-19 10:57:28 +00:00
{
2018-10-02 19:43:05 +00:00
if ( a < AREF ) discard ;
2011-02-19 10:57:28 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 3 )
2012-03-08 17:43:21 +00:00
{
2016-08-14 16:35:42 +00:00
if ( abs ( a - AREF ) > 0.5f ) discard ;
2012-07-23 16:39:56 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 4 )
2012-07-23 16:39:56 +00:00
{
2016-08-14 16:35:42 +00:00
if ( abs ( a - AREF ) < 0.5f ) discard ;
2011-02-19 10:57:28 +00:00
}
}
float4 fog ( float4 c , float f )
{
if ( PS_FOG )
{
2020-02-28 01:08:47 +00:00
c . rgb = trunc ( lerp ( FogColor , c . rgb , f ) ) ;
2011-02-19 10:57:28 +00:00
}
return c ;
}
float4 ps_color ( PS_INPUT input )
{
2023-01-31 10:50:45 +00:00
#if PS_FST == 0
2019-02-21 12:10:41 +00:00
float2 st = input . t . xy / input . t . w ;
float2 st_int = input . ti . zw / input . t . w ;
2018-12-18 16:58:35 +00:00
#else
float2 st = input . ti . xy ;
float2 st_int = input . ti . zw ;
#endif
2011-02-19 10:57:28 +00:00
2018-12-11 00:04:31 +00:00
#if PS_CHANNEL_FETCH == 1
2019-08-25 18:14:50 +00:00
float4 T = fetch_red ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 2
2019-08-25 18:14:50 +00:00
float4 T = fetch_green ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 3
2019-08-25 18:14:50 +00:00
float4 T = fetch_blue ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 4
2019-08-25 18:14:50 +00:00
float4 T = fetch_alpha ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 5
2019-08-25 18:14:50 +00:00
float4 T = fetch_rgb ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 6
2019-08-25 18:14:50 +00:00
float4 T = fetch_gXbY ( int2 ( input . p . xy ) ) ;
2018-12-13 01:52:06 +00:00
#elif PS_DEPTH_FMT > 0
2019-08-25 18:14:50 +00:00
float4 T = sample_depth ( st_int , input . p . xy ) ;
2018-12-11 00:04:31 +00:00
#else
2022-01-09 06:46:40 +00:00
float4 T = sample_color ( st , input . t . w ) ;
2018-12-11 00:04:31 +00:00
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
float4 C = tfx ( T , input . c ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
atst ( C ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
C = fog ( C , input . t . z ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
return C ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
void ps_fbmask ( inout float4 C , float2 pos_xy )
{
if ( PS_FBMASK )
{
2022-01-30 07:10:10 +00:00
float4 RT = trunc ( RtTexture . Load ( int3 ( pos_xy , 0 ) ) * 255.0f + 0.1f ) ;
2022-10-09 05:51:41 +00:00
C = ( float4 ) ( ( ( uint4 ) C & ~ FbMask ) | ( ( uint4 ) RT & FbMask ) ) ;
2019-08-25 18:01:29 +00:00
}
}
2019-09-15 18:49:34 +00:00
void ps_dither ( inout float3 C , float2 pos_xy )
{
2020-07-15 12:51:03 +00:00
if ( PS_DITHER )
{
int2 fpos ;
if ( PS_DITHER == 2 )
fpos = int2 ( pos_xy ) ;
else
fpos = int2 ( pos_xy / ( float ) PS_SCALE_FACTOR ) ;
2021-12-19 21:24:30 +00:00
C + = DitherMatrix [ fpos . x & 3 ] [ fpos . y & 3 ] ;
2020-07-15 12:51:03 +00:00
}
2019-09-15 18:49:34 +00:00
}
2021-11-30 11:57:51 +00:00
void ps_color_clamp_wrap ( inout float3 C )
{
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
2022-10-09 05:51:41 +00:00
if ( SW_BLEND || PS_DITHER || PS_FBMASK )
2021-11-30 11:57:51 +00:00
{
// Standard Clamp
if ( PS_COLCLIP == 0 && PS_HDR == 0 )
C = clamp ( C , ( float3 ) 0.0f , ( float3 ) 255.0f ) ;
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
2022-08-16 10:27:06 +00:00
if ( PS_DFMT == FMT_16 && PS_BLEND_MIX == 0 )
2021-11-30 11:57:51 +00:00
C = ( float3 ) ( ( int3 ) C & ( int3 ) 0xF8 ) ;
2022-10-09 05:51:41 +00:00
else if ( PS_COLCLIP == 1 || PS_HDR == 1 )
2021-11-30 11:57:51 +00:00
C = ( float3 ) ( ( int3 ) C & ( int3 ) 0xFF ) ;
}
}
2022-08-05 18:54:25 +00:00
void ps_blend ( inout float4 Color , inout float As , float2 pos_xy )
2019-08-25 18:01:29 +00:00
{
if ( SW_BLEND )
{
2022-01-08 17:43:28 +00:00
// PABE
if ( PS_PABE )
{
// No blending so early exit
if ( As < 1.0f )
return ;
}
2022-08-26 15:07:21 +00:00
float4 RT = SW_BLEND_NEEDS_RT ? trunc ( RtTexture . Load ( int3 ( pos_xy , 0 ) ) * 255.0f + 0.1f ) : ( float4 ) 0.0f ;
2019-08-25 18:01:29 +00:00
2022-08-26 15:07:21 +00:00
float Ad = RT . a / 128.0f ;
2019-08-25 18:01:29 +00:00
2019-08-25 18:14:50 +00:00
float3 Cd = RT . rgb ;
float3 Cs = Color . rgb ;
2019-08-25 18:01:29 +00:00
float3 A = ( PS_BLEND_A == 0 ) ? Cs : ( ( PS_BLEND_A == 1 ) ? Cd : ( float3 ) 0.0f ) ;
float3 B = ( PS_BLEND_B == 0 ) ? Cs : ( ( PS_BLEND_B == 1 ) ? Cd : ( float3 ) 0.0f ) ;
2022-01-23 11:39:01 +00:00
float C = ( PS_BLEND_C == 0 ) ? As : ( ( PS_BLEND_C == 1 ) ? Ad : Af ) ;
2019-08-25 18:01:29 +00:00
float3 D = ( PS_BLEND_D == 0 ) ? Cs : ( ( PS_BLEND_D == 1 ) ? Cd : ( float3 ) 0.0f ) ;
2021-12-26 17:12:09 +00:00
// As/Af clamp alpha for Blend mix
2022-08-05 18:54:25 +00:00
// We shouldn't clamp blend mix with clr1 as we want alpha higher
2022-08-09 07:56:00 +00:00
float C_clamped = C ;
2022-08-16 09:11:17 +00:00
if ( PS_BLEND_MIX > 0 && PS_CLR_HW != 1 )
2022-08-09 07:56:00 +00:00
C_clamped = min ( C_clamped , 1.0f ) ;
2021-12-26 17:12:09 +00:00
2022-08-08 22:19:20 +00:00
if ( PS_BLEND_A == PS_BLEND_B )
Color . rgb = D ;
2022-08-16 09:11:17 +00:00
// In blend_mix, HW adds on some alpha factor * dst.
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
// Instead, apply an offset to convert HW's round to a floor.
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
else if ( PS_BLEND_MIX == 2 )
2022-08-09 07:56:00 +00:00
Color . rgb = ( ( A - B ) * C_clamped + D ) + ( 124.0f / 256.0f ) ;
2022-08-16 09:11:17 +00:00
else if ( PS_BLEND_MIX == 1 )
2022-08-09 07:56:00 +00:00
Color . rgb = ( ( A - B ) * C_clamped + D ) - ( 124.0f / 256.0f ) ;
2022-08-08 22:19:20 +00:00
else
Color . rgb = trunc ( ( ( A - B ) * C ) + D ) ;
2022-08-05 18:54:25 +00:00
if ( PS_CLR_HW == 1 )
{
// Replace Af with As so we can do proper compensation for Alpha.
if ( PS_BLEND_C == 2 )
As = Af ;
// Subtract 1 for alpha to compensate for the changed equation,
// if c.rgb > 255.0f then we further need to adjust alpha accordingly,
// we pick the lowest overflow from all colors because it's the safest,
// we divide by 255 the color because we don't know Cd value,
// changed alpha should only be done for hw blend.
float min_color = min ( min ( Color . r , Color . g ) , Color . b ) ;
float alpha_compensate = max ( 1.0f , min_color / 255.0f ) ;
As - = alpha_compensate ;
}
2022-08-09 07:56:00 +00:00
else if ( PS_CLR_HW == 2 )
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
// as that is the minimum color Cd can be,
// then we multiply by alpha to get the minimum
// blended value it can be.
float color_compensate = 1.0f * ( C + 1.0f ) ;
Color . rgb - = ( float3 ) color_compensate ;
}
2019-08-25 18:01:29 +00:00
}
2022-01-23 11:39:01 +00:00
else
{
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
if ( PS_CLR_HW == 1 || PS_CLR_HW == 5 )
2022-01-23 11:39:01 +00:00
{
2022-01-26 02:05:06 +00:00
// Needed for Cd * (As/Ad/F + 1) blending modes
2022-01-23 11:39:01 +00:00
Color . rgb = ( float3 ) 255.0f ;
}
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
else if ( PS_CLR_HW == 2 || PS_CLR_HW == 4 )
2022-01-23 11:39:01 +00:00
{
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
// Cd*As,Cd*Ad or Cd*F
2022-01-23 11:39:01 +00:00
2022-02-01 18:19:20 +00:00
float Alpha = PS_BLEND_C == 2 ? Af : As ;
2022-01-23 11:39:01 +00:00
Color . rgb = max ( ( float3 ) 0.0f , ( Alpha - ( float3 ) 1.0f ) ) ;
Color . rgb * = ( float3 ) 255.0f ;
}
2022-02-01 18:19:20 +00:00
else if ( PS_CLR_HW == 3 )
2022-01-26 02:05:06 +00:00
{
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value
Color . rgb * = ( 255.0f / 128.0f ) ;
}
2022-01-23 11:39:01 +00:00
}
2019-08-25 18:01:29 +00:00
}
PS_OUTPUT ps_main ( PS_INPUT input )
{
float4 C = ps_color ( input ) ;
PS_OUTPUT output ;
2021-12-22 10:56:38 +00:00
if ( PS_SCANMSK & 2 )
{
// fail depth test on prohibited lines
if ( ( int ( input . p . y ) & 1 ) == ( PS_SCANMSK & 1 ) )
discard ;
}
2019-08-25 18:01:29 +00:00
if ( PS_SHUFFLE )
{
2019-08-25 18:14:50 +00:00
uint4 denorm_c = uint4 ( C ) ;
2019-08-25 18:01:29 +00:00
uint2 denorm_TA = uint2 ( float2 ( TA . xy ) * 255.0f + 0.5f ) ;
// Mask will take care of the correct destination
if ( PS_READ_BA )
C . rb = C . bb ;
else
C . rb = C . rr ;
if ( PS_READ_BA )
{
if ( denorm_c . a & 0x80 u )
2019-08-25 18:14:50 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . a & 0x7F u ) | ( denorm_TA . y & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
else
2019-08-25 18:14:50 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . a & 0x7F u ) | ( denorm_TA . x & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
}
else
{
if ( denorm_c . g & 0x80 u )
2019-08-25 18:14:50 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . g & 0x7F u ) | ( denorm_TA . y & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
else
2019-08-25 18:14:50 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . g & 0x7F u ) | ( denorm_TA . x & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
}
}
// Must be done before alpha correction
2022-07-16 17:26:29 +00:00
// AA (Fixed one) will output a coverage of 1.0 as alpha
if ( PS_FIXED_ONE_A )
{
C . a = 128.0f ;
}
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
float alpha_blend ;
if ( PS_BLEND_C == 1 && PS_CLR_HW > 3 )
{
float4 RT = trunc ( RtTexture . Load ( int3 ( input . p . xy , 0 ) ) * 255.0f + 0.1f ) ;
2022-08-31 22:48:35 +00:00
alpha_blend = RT . a / 128.0f ;
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
}
else
{
alpha_blend = C . a / 128.0f ;
}
2019-08-25 18:01:29 +00:00
// Alpha correction
if ( PS_DFMT == FMT_16 )
{
2019-08-25 18:14:50 +00:00
float A_one = 128.0f ; // alpha output will be 0x80
2019-08-25 18:01:29 +00:00
C . a = PS_FBA ? A_one : step ( A_one , C . a ) * A_one ;
}
else if ( ( PS_DFMT == FMT_32 ) && PS_FBA )
{
2019-08-25 18:14:50 +00:00
float A_one = 128.0f ;
2019-08-25 18:01:29 +00:00
if ( C . a < A_one ) C . a + = A_one ;
}
2022-03-19 12:19:16 +00:00
#if PS_DATE == 3
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
int stencil_ceil = int ( PrimMinTexture . Load ( int3 ( input . p . xy , 0 ) ) ) ;
if ( int ( input . primid ) > stencil_ceil )
discard ;
#endif
// Get first primitive that will write a failling alpha value
2022-09-01 03:56:34 +00:00
#if PS_DATE == 1
2022-03-19 12:19:16 +00:00
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
output . c = ( C . a > 127.5f ) ? float ( input . primid ) : float ( 0x7FFFFFFF ) ;
2022-09-01 03:56:34 +00:00
#elif PS_DATE == 2
2022-03-19 12:19:16 +00:00
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
output . c = ( C . a < 127.5f ) ? float ( input . primid ) : float ( 0x7FFFFFFF ) ;
#else
// Not primid DATE setup
2019-08-25 18:01:29 +00:00
ps_blend ( C , alpha_blend , input . p . xy ) ;
2021-11-30 11:57:51 +00:00
ps_dither ( C . rgb , input . p . xy ) ;
2019-08-25 18:01:29 +00:00
2021-11-30 11:57:51 +00:00
// Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap ( C . rgb ) ;
ps_fbmask ( C , input . p . xy ) ;
2020-07-15 12:51:03 +00:00
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2022-10-09 05:51:41 +00:00
output . c0 = PS_HDR ? float4 ( C . rgb / 65535.0f , C . a / 255.0f ) : C / 255.0f ;
2021-12-31 07:29:26 +00:00
#if !PS_NO_COLOR1
2019-08-25 18:01:29 +00:00
output . c1 = ( float4 ) ( alpha_blend ) ;
2021-12-31 07:29:26 +00:00
#endif
#if PS_NO_ABLEND
// write alpha blend factor into col0
output . c0 . a = alpha_blend ;
#endif
#if PS_ONLY_ALPHA
// rgb isn't used
output . c0 . rgb = float3 ( 0.0f , 0.0f , 0.0f ) ;
#endif
2022-03-20 08:25:25 +00:00
#endif
2019-08-25 18:01:29 +00:00
2022-03-19 12:19:16 +00:00
#endif
2020-06-06 15:21:03 +00:00
#if PS_ZCLAMP
output . depth = min ( input . p . z , MaxDepthPS ) ;
#endif
2019-08-25 18:01:29 +00:00
return output ;
}
//////////////////////////////////////////////////////////////////////
// Vertex Shader
//////////////////////////////////////////////////////////////////////
2011-02-19 10:57:28 +00:00
VS_OUTPUT vs_main ( VS_INPUT input )
{
2020-05-23 18:37:51 +00:00
// Clamp to max depth, gs doesn't wrap
input . z = min ( input . z , MaxDepth ) ;
2011-02-19 10:57:28 +00:00
VS_OUTPUT output ;
2018-10-02 19:43:05 +00:00
2011-02-19 10:57:28 +00:00
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
2018-10-02 19:43:05 +00:00
2021-12-21 07:41:45 +00:00
output . p = float4 ( input . p , input . z , 1.0f ) - float4 ( 0.05f , 0.05f , 0 , 0 ) ;
2011-02-19 10:57:28 +00:00
2021-12-21 07:41:45 +00:00
output . p . xy = output . p . xy * float2 ( VertexScale . x , - VertexScale . y ) - float2 ( VertexOffset . x , - VertexOffset . y ) ;
output . p . z * = exp2 ( - 32.0f ) ; // integer->float depth
2011-02-19 10:57:28 +00:00
if ( VS_TME )
{
2021-12-21 07:41:45 +00:00
float2 uv = input . uv - TextureOffset ;
float2 st = input . st - TextureOffset ;
2018-12-18 16:58:35 +00:00
// Integer nomalized
2021-12-21 07:41:45 +00:00
output . ti . xy = uv * TextureScale ;
2017-02-17 09:59:21 +00:00
2018-12-18 16:58:35 +00:00
if ( VS_FST )
{
// Integer integral
output . ti . zw = uv ;
2011-02-19 10:57:28 +00:00
}
else
{
2018-12-18 16:58:35 +00:00
// float for post-processing in some games
2021-12-21 07:41:45 +00:00
output . ti . zw = st / TextureScale ;
2011-02-19 10:57:28 +00:00
}
2018-12-18 16:58:35 +00:00
// Float coords
output . t . xy = st ;
output . t . w = input . q ;
2011-02-19 10:57:28 +00:00
}
else
{
output . t . xy = 0 ;
output . t . w = 1.0f ;
2018-12-18 16:58:35 +00:00
output . ti = 0 ;
2011-02-19 10:57:28 +00:00
}
output . c = input . c ;
2012-01-08 17:10:00 +00:00
output . t . z = input . f . r ;
2011-02-19 10:57:28 +00:00
return output ;
}
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Geometry Shader
//////////////////////////////////////////////////////////////////////
2022-09-01 06:50:25 +00:00
#if GS_FORWARD_PRIMID
#define PRIMID_IN , uint primid : SV_PrimitiveID
#define VS2PS(x) vs2ps_impl(x, primid)
PS_INPUT vs2ps_impl ( VS_OUTPUT vs , uint primid )
{
PS_INPUT o ;
o . p = vs . p ;
o . t = vs . t ;
o . ti = vs . ti ;
o . c = vs . c ;
o . primid = primid ;
return o ;
}
#else
#define PRIMID_IN
#define VS2PS(x) vs2ps_impl(x)
PS_INPUT vs2ps_impl ( VS_OUTPUT vs )
{
PS_INPUT o ;
o . p = vs . p ;
o . t = vs . t ;
o . ti = vs . ti ;
o . c = vs . c ;
return o ;
}
#endif
2021-12-23 11:35:05 +00:00
#if GS_PRIM == 0
2017-03-03 21:18:49 +00:00
[ maxvertexcount ( 6 ) ]
2022-09-01 06:50:25 +00:00
void gs_main ( point VS_OUTPUT input [ 1 ] , inout TriangleStream < PS_INPUT > stream PRIMID_IN )
2017-03-03 21:18:49 +00:00
{
// Transform a point to a NxN sprite
2022-09-01 06:50:25 +00:00
PS_INPUT Point = VS2PS ( input [ 0 ] ) ;
2017-03-03 21:18:49 +00:00
// Get new position
float4 lt_p = input [ 0 ] . p ;
float4 rb_p = input [ 0 ] . p + float4 ( PointSize . x , PointSize . y , 0.0f , 0.0f ) ;
float4 lb_p = rb_p ;
float4 rt_p = rb_p ;
lb_p . x = lt_p . x ;
rt_p . y = lt_p . y ;
// Triangle 1
Point . p = lt_p ;
stream . Append ( Point ) ;
Point . p = lb_p ;
stream . Append ( Point ) ;
Point . p = rt_p ;
stream . Append ( Point ) ;
// Triangle 2
Point . p = lb_p ;
stream . Append ( Point ) ;
Point . p = rt_p ;
stream . Append ( Point ) ;
Point . p = rb_p ;
stream . Append ( Point ) ;
}
2021-12-23 11:35:05 +00:00
#elif GS_PRIM == 1
2017-03-03 21:18:49 +00:00
[ maxvertexcount ( 6 ) ]
2022-09-01 06:50:25 +00:00
void gs_main ( line VS_OUTPUT input [ 2 ] , inout TriangleStream < PS_INPUT > stream PRIMID_IN )
2017-03-03 21:18:49 +00:00
{
// Transform a line to a thick line-sprite
2022-09-01 06:50:25 +00:00
PS_INPUT left = VS2PS ( input [ 0 ] ) ;
PS_INPUT right = VS2PS ( input [ 1 ] ) ;
2017-03-03 21:18:49 +00:00
float2 lt_p = input [ 0 ] . p . xy ;
float2 rt_p = input [ 1 ] . p . xy ;
// Potentially there is faster math
float2 line_vector = normalize ( rt_p . xy - lt_p . xy ) ;
float2 line_normal = float2 ( line_vector . y , - line_vector . x ) ;
float2 line_width = ( line_normal * PointSize ) / 2 ;
lt_p - = line_width ;
rt_p - = line_width ;
float2 lb_p = input [ 0 ] . p . xy + line_width ;
float2 rb_p = input [ 1 ] . p . xy + line_width ;
# if GS_IIP == 0
left . c = right . c ;
# endif
// Triangle 1
left . p . xy = lt_p ;
stream . Append ( left ) ;
left . p . xy = lb_p ;
stream . Append ( left ) ;
right . p . xy = rt_p ;
stream . Append ( right ) ;
stream . RestartStrip ( ) ;
// Triangle 2
left . p . xy = lb_p ;
stream . Append ( left ) ;
right . p . xy = rt_p ;
stream . Append ( right ) ;
right . p . xy = rb_p ;
stream . Append ( right ) ;
stream . RestartStrip ( ) ;
}
2011-02-19 10:57:28 +00:00
#elif GS_PRIM == 3
[ maxvertexcount ( 4 ) ]
2022-09-01 06:50:25 +00:00
void gs_main ( line VS_OUTPUT input [ 2 ] , inout TriangleStream < PS_INPUT > stream PRIMID_IN )
2011-02-19 10:57:28 +00:00
{
2022-09-01 06:50:25 +00:00
PS_INPUT lt = VS2PS ( input [ 0 ] ) ;
PS_INPUT rb = VS2PS ( input [ 1 ] ) ;
2011-02-19 10:57:28 +00:00
2017-03-03 21:18:49 +00:00
// flat depth
lt . p . z = rb . p . z ;
// flat fog and texture perspective
lt . t . zw = rb . t . zw ;
// flat color
lt . c = rb . c ;
2011-02-19 10:57:28 +00:00
2017-03-03 21:18:49 +00:00
// Swap texture and position coordinate
2022-09-01 06:50:25 +00:00
PS_INPUT lb = rb ;
2017-03-03 21:18:49 +00:00
lb . p . x = lt . p . x ;
lb . t . x = lt . t . x ;
2018-12-18 16:58:35 +00:00
lb . ti . x = lt . ti . x ;
lb . ti . z = lt . ti . z ;
2011-02-19 10:57:28 +00:00
2022-09-01 06:50:25 +00:00
PS_INPUT rt = rb ;
2017-03-03 21:18:49 +00:00
rt . p . y = lt . p . y ;
rt . t . y = lt . t . y ;
2018-12-18 16:58:35 +00:00
rt . ti . y = lt . ti . y ;
rt . ti . w = lt . ti . w ;
2011-02-19 10:57:28 +00:00
2017-03-03 21:18:49 +00:00
stream . Append ( lt ) ;
2011-02-19 10:57:28 +00:00
stream . Append ( lb ) ;
stream . Append ( rt ) ;
2017-03-03 21:18:49 +00:00
stream . Append ( rb ) ;
2011-02-19 10:57:28 +00:00
}
#endif
#endif