2023-07-20 13:23:09 +00:00
/ * PCSX2 - PS2 Emulator for PCs
* Copyright ( C ) 2002 - 2023 PCSX2 Dev Team
*
* PCSX2 is free software : you can redistribute it and / or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found -
* ation , either version 3 of the License , or ( at your option ) any later version .
*
* PCSX2 is distributed in the hope that it will be useful , but WITHOUT ANY WARRANTY ;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE . See the GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License along with PCSX2 .
* If not , see < http : //www.gnu.org/licenses/>.
* /
2011-02-19 10:57:28 +00:00
#define FMT_32 0
#define FMT_24 1
#define FMT_16 2
2020-05-22 22:58:53 +00:00
#ifndef VS_TME
2021-12-23 11:35:05 +00:00
#define VS_IIP 0
2011-02-19 10:57:28 +00:00
#define VS_TME 1
#define VS_FST 1
#endif
#ifndef GS_IIP
#define GS_IIP 0
#define GS_PRIM 3
2022-09-01 06:50:25 +00:00
#define GS_FORWARD_PRIMID 0
2011-02-19 10:57:28 +00:00
#endif
#ifndef PS_FST
2021-12-23 11:35:05 +00:00
#define PS_IIP 0
2011-02-19 10:57:28 +00:00
#define PS_FST 0
#define PS_WMS 0
#define PS_WMT 0
2023-01-31 10:50:45 +00:00
#define PS_ADJS 0
#define PS_ADJT 0
2021-12-13 05:33:09 +00:00
#define PS_AEM_FMT FMT_32
2011-02-19 10:57:28 +00:00
#define PS_AEM 0
#define PS_TFX 0
#define PS_TCC 1
#define PS_ATST 1
#define PS_FOG 0
2021-12-23 11:35:05 +00:00
#define PS_IIP 0
2023-03-10 09:41:09 +00:00
#define PS_BLEND_HW 0
2023-03-10 12:02:18 +00:00
#define PS_A_MASKED 0
2011-02-19 10:57:28 +00:00
#define PS_FBA 0
2019-04-17 20:14:17 +00:00
#define PS_FBMASK 0
2011-02-19 10:57:28 +00:00
#define PS_LTF 1
2013-02-12 10:57:48 +00:00
#define PS_TCOFFSETHACK 0
2012-07-19 20:40:42 +00:00
#define PS_POINT_SAMPLER 0
2023-03-17 13:20:06 +00:00
#define PS_REGION_RECT 0
2015-06-09 23:17:26 +00:00
#define PS_SHUFFLE 0
2023-08-09 00:48:03 +00:00
#define PS_SHUFFLE_SAME 0
2015-06-09 23:17:26 +00:00
#define PS_READ_BA 0
2023-02-16 10:25:46 +00:00
#define PS_READ16_SRC 0
2018-12-18 07:31:20 +00:00
#define PS_DFMT 0
2018-12-12 17:52:57 +00:00
#define PS_DEPTH_FMT 0
2017-02-17 09:59:21 +00:00
#define PS_PAL_FMT 0
2018-12-11 00:04:31 +00:00
#define PS_CHANNEL_FETCH 0
2018-12-13 01:52:06 +00:00
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
2019-06-06 15:56:22 +00:00
#define PS_HDR 0
2019-06-26 14:07:03 +00:00
#define PS_COLCLIP 0
2019-06-06 15:56:22 +00:00
#define PS_BLEND_A 0
#define PS_BLEND_B 0
#define PS_BLEND_C 0
#define PS_BLEND_D 0
2022-02-18 11:24:32 +00:00
#define PS_BLEND_MIX 0
2023-02-25 04:01:04 +00:00
#define PS_ROUND_INV 0
2022-07-16 17:26:29 +00:00
#define PS_FIXED_ONE_A 0
2021-02-04 01:39:20 +00:00
#define PS_PABE 0
2019-09-15 18:49:34 +00:00
#define PS_DITHER 0
2020-06-06 15:21:03 +00:00
#define PS_ZCLAMP 0
2021-12-22 10:56:38 +00:00
#define PS_SCANMSK 0
2022-01-09 06:46:40 +00:00
#define PS_AUTOMATIC_LOD 0
#define PS_MANUAL_LOD 0
2022-01-30 07:10:10 +00:00
#define PS_TEX_IS_FB 0
2022-03-20 08:25:25 +00:00
#define PS_NO_COLOR 0
#define PS_NO_COLOR1 0
2021-12-31 07:29:26 +00:00
#define PS_NO_ABLEND 0
#define PS_ONLY_ALPHA 0
2022-03-19 12:19:16 +00:00
#define PS_DATE 0
2011-02-19 10:57:28 +00:00
#endif
2019-06-06 15:56:22 +00:00
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
2022-08-26 15:07:21 +00:00
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
2023-03-10 12:02:18 +00:00
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
2019-06-06 15:56:22 +00:00
2011-02-19 10:57:28 +00:00
struct VS_INPUT
{
2012-01-19 04:53:36 +00:00
float2 st : TEXCOORD0 ;
2019-08-25 18:14:50 +00:00
uint4 c : COLOR0 ;
2012-01-19 04:53:36 +00:00
float q : TEXCOORD1 ;
2011-02-19 10:57:28 +00:00
uint2 p : POSITION0 ;
uint z : POSITION1 ;
2012-01-19 04:53:36 +00:00
uint2 uv : TEXCOORD2 ;
2011-02-19 10:57:28 +00:00
float4 f : COLOR1 ;
} ;
struct VS_OUTPUT
{
float4 p : SV_Position ;
float4 t : TEXCOORD0 ;
2018-12-18 16:58:35 +00:00
float4 ti : TEXCOORD2 ;
2021-12-23 11:35:05 +00:00
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
2011-02-19 10:57:28 +00:00
float4 c : COLOR0 ;
2021-12-23 11:35:05 +00:00
#else
nointerpolation float4 c : COLOR0 ;
#endif
2011-02-19 10:57:28 +00:00
} ;
struct PS_INPUT
{
float4 p : SV_Position ;
float4 t : TEXCOORD0 ;
2018-12-18 16:58:35 +00:00
float4 ti : TEXCOORD2 ;
2021-12-23 11:35:05 +00:00
#if VS_IIP != 0 || GS_IIP != 0 || PS_IIP != 0
2011-02-19 10:57:28 +00:00
float4 c : COLOR0 ;
2021-12-23 11:35:05 +00:00
#else
nointerpolation float4 c : COLOR0 ;
#endif
2022-09-01 06:50:25 +00:00
#if (PS_DATE >= 1 && PS_DATE <= 3) || GS_FORWARD_PRIMID
2022-03-19 12:19:16 +00:00
uint primid : SV_PrimitiveID ;
#endif
2011-02-19 10:57:28 +00:00
} ;
2023-04-07 07:55:55 +00:00
#ifdef PIXEL_SHADER
2011-02-19 10:57:28 +00:00
struct PS_OUTPUT
{
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2022-09-01 03:56:34 +00:00
#if PS_DATE == 1 || PS_DATE == 2
2022-03-19 12:19:16 +00:00
float c : SV_Target ;
#else
2011-02-19 10:57:28 +00:00
float4 c0 : SV_Target0 ;
2021-12-31 07:29:26 +00:00
#if !PS_NO_COLOR1
2011-02-19 10:57:28 +00:00
float4 c1 : SV_Target1 ;
2021-12-31 07:29:26 +00:00
#endif
2022-03-20 08:25:25 +00:00
#endif
2022-03-19 12:19:16 +00:00
#endif
2020-06-06 15:21:03 +00:00
#if PS_ZCLAMP
float depth : SV_Depth ;
#endif
2011-02-19 10:57:28 +00:00
} ;
Texture2D < float4 > Texture : register ( t0 ) ;
Texture2D < float4 > Palette : register ( t1 ) ;
2022-01-30 07:10:10 +00:00
Texture2D < float4 > RtTexture : register ( t2 ) ;
2022-03-19 12:19:16 +00:00
Texture2D < float > PrimMinTexture : register ( t3 ) ;
2011-02-19 10:57:28 +00:00
SamplerState TextureSampler : register ( s0 ) ;
2022-03-19 12:19:16 +00:00
#ifdef DX12
cbuffer cb1 : register ( b1 )
#else
2011-02-19 10:57:28 +00:00
cbuffer cb1
2022-03-19 12:19:16 +00:00
#endif
2011-02-19 10:57:28 +00:00
{
float3 FogColor ;
float AREF ;
float4 WH ;
float2 TA ;
2021-12-21 07:41:45 +00:00
float MaxDepthPS ;
float Af ;
2019-04-17 20:14:17 +00:00
uint4 FbMask ;
2021-12-21 07:41:45 +00:00
float4 HalfTexel ;
float4 MinMax ;
2023-01-31 10:50:45 +00:00
float4 STRange ;
2021-12-21 07:41:45 +00:00
int4 ChannelShuffle ;
float2 TC_OffsetHack ;
2022-01-16 07:05:05 +00:00
float2 STScale ;
2019-09-15 18:49:34 +00:00
float4x4 DitherMatrix ;
2023-03-12 11:05:25 +00:00
float ScaledScaleFactor ;
float RcpScaleFactor ;
2011-02-19 10:57:28 +00:00
} ;
2022-01-09 06:46:40 +00:00
float4 sample_c ( float2 uv , float uv_w )
2011-02-19 10:57:28 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
return RtTexture . Load ( int3 ( int2 ( uv * WH . zw ) , 0 ) ) ;
2023-03-17 13:20:06 +00:00
#elif PS_REGION_RECT == 1
return Texture . Load ( int3 ( int2 ( uv ) , 0 ) ) ;
2022-01-30 07:10:10 +00:00
#else
2018-08-21 22:51:19 +00:00
if ( PS_POINT_SAMPLER )
2012-07-19 20:40:42 +00:00
{
2018-10-09 07:07:45 +00:00
// Weird issue with ATI/AMD cards,
2012-07-19 20:40:42 +00:00
// it looks like they add 127/128 of a texel to sampling coordinates
// occasionally causing point sampling to erroneously round up.
// I'm manually adjusting coordinates to the centre of texels here,
// though the centre is just paranoia, the top left corner works fine.
2018-10-09 07:07:45 +00:00
// As of 2018 this issue is still present.
2012-07-19 20:40:42 +00:00
uv = ( trunc ( uv * WH . zw ) + float2 ( 0.5 , 0.5 ) ) / WH . zw ;
}
2023-01-31 10:50:45 +00:00
#if !PS_ADJS && !PS_ADJT
2022-01-16 07:05:05 +00:00
uv * = STScale ;
2023-01-31 10:50:45 +00:00
#else
# if PS_ADJS
uv . x = ( uv . x - STRange . x ) * STRange . z ;
# else
uv . x = uv . x * STScale . x ;
# endif
# if PS_ADJT
uv . y = ( uv . y - STRange . y ) * STRange . w ;
# else
uv . y = uv . y * STScale . y ;
# endif
#endif
2022-01-09 06:46:40 +00:00
#if PS_AUTOMATIC_LOD == 1
2011-02-19 10:57:28 +00:00
return Texture . Sample ( TextureSampler , uv ) ;
2022-01-09 06:46:40 +00:00
#elif PS_MANUAL_LOD == 1
// FIXME add LOD: K - ( LOG2(Q) * (1 << L))
float K = MinMax . x ;
float L = MinMax . y ;
float bias = MinMax . z ;
float max_lod = MinMax . w ;
float gs_lod = K - log2 ( abs ( uv_w ) ) * L ;
// FIXME max useful ?
//float lod = max(min(gs_lod, max_lod) - bias, 0.0f);
float lod = min ( gs_lod , max_lod ) - bias ;
return Texture . SampleLevel ( TextureSampler , uv , lod ) ;
#else
return Texture . SampleLevel ( TextureSampler , uv , 0 ) ; // No lod
#endif
2022-01-30 07:10:10 +00:00
#endif
2011-02-19 10:57:28 +00:00
}
2023-01-03 11:13:50 +00:00
float4 sample_p ( uint u )
2011-02-19 10:57:28 +00:00
{
2023-01-03 11:13:50 +00:00
return Palette . Load ( int3 ( int ( u ) , 0 , 0 ) ) ;
}
float4 sample_p_norm ( float u )
{
return sample_p ( uint ( u * 255.5f ) ) ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
float4 clamp_wrap_uv ( float4 uv )
2018-12-16 07:45:49 +00:00
{
2023-01-31 10:50:45 +00:00
float4 tex_size = WH . xyxy ;
2018-12-12 17:52:57 +00:00
2019-08-25 18:01:29 +00:00
if ( PS_WMS == PS_WMT )
2018-12-12 17:52:57 +00:00
{
2023-03-17 13:20:06 +00:00
if ( PS_REGION_RECT != 0 && PS_WMS == 0 )
{
uv = frac ( uv ) ;
}
else if ( PS_REGION_RECT != 0 && PS_WMS == 1 )
{
uv = saturate ( uv ) ;
}
else if ( PS_WMS == 2 )
2019-08-25 18:01:29 +00:00
{
uv = clamp ( uv , MinMax . xyxy , MinMax . zwzw ) ;
}
else if ( PS_WMS == 3 )
{
# if PS_FST == 0
// wrap negative uv coords to avoid an off by one error that shifted
// textures. Fixes Xenosaga's hair issue.
uv = frac ( uv ) ;
# endif
2023-01-31 10:50:45 +00:00
uv = ( float4 ) ( ( ( uint4 ) ( uv * tex_size ) & asuint ( MinMax . xyxy ) ) | asuint ( MinMax . zwzw ) ) / tex_size ;
2019-08-25 18:01:29 +00:00
}
2018-12-12 17:52:57 +00:00
}
else
{
2023-03-17 13:20:06 +00:00
if ( PS_REGION_RECT != 0 && PS_WMS == 0 )
{
uv . xz = frac ( uv . xz ) ;
}
else if ( PS_REGION_RECT != 0 && PS_WMS == 1 )
{
uv . xz = saturate ( uv . xz ) ;
}
else if ( PS_WMS == 2 )
2019-08-25 18:01:29 +00:00
{
uv . xz = clamp ( uv . xz , MinMax . xx , MinMax . zz ) ;
}
else if ( PS_WMS == 3 )
{
# if PS_FST == 0
uv . xz = frac ( uv . xz ) ;
# endif
2023-01-31 10:50:45 +00:00
uv . xz = ( float2 ) ( ( ( uint2 ) ( uv . xz * tex_size . xx ) & asuint ( MinMax . xx ) ) | asuint ( MinMax . zz ) ) / tex_size . xx ;
2019-08-25 18:01:29 +00:00
}
2023-03-17 13:20:06 +00:00
if ( PS_REGION_RECT != 0 && PS_WMT == 0 )
{
uv . yw = frac ( uv . yw ) ;
}
else if ( PS_REGION_RECT != 0 && PS_WMT == 1 )
{
uv . yw = saturate ( uv . yw ) ;
}
else if ( PS_WMT == 2 )
2019-08-25 18:01:29 +00:00
{
uv . yw = clamp ( uv . yw , MinMax . yy , MinMax . ww ) ;
}
else if ( PS_WMT == 3 )
{
# if PS_FST == 0
uv . yw = frac ( uv . yw ) ;
# endif
2023-01-31 10:50:45 +00:00
uv . yw = ( float2 ) ( ( ( uint2 ) ( uv . yw * tex_size . yy ) & asuint ( MinMax . yy ) ) | asuint ( MinMax . ww ) ) / tex_size . yy ;
2019-08-25 18:01:29 +00:00
}
2018-12-12 17:52:57 +00:00
}
2023-03-17 13:20:06 +00:00
if ( PS_REGION_RECT != 0 )
{
// Normalized -> Integer Coordinates.
uv = clamp ( uv * WH . zwzw + STRange . xyxy , STRange . xyxy , STRange . zwzw ) ;
}
2019-08-25 18:01:29 +00:00
return uv ;
2018-12-11 00:04:31 +00:00
}
2022-01-09 06:46:40 +00:00
float4x4 sample_4c ( float4 uv , float uv_w )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
float4x4 c ;
2018-12-12 17:52:57 +00:00
2022-01-09 06:46:40 +00:00
c [ 0 ] = sample_c ( uv . xy , uv_w ) ;
c [ 1 ] = sample_c ( uv . zy , uv_w ) ;
c [ 2 ] = sample_c ( uv . xw , uv_w ) ;
c [ 3 ] = sample_c ( uv . zw , uv_w ) ;
2019-08-25 18:01:29 +00:00
return c ;
}
2023-01-03 11:13:50 +00:00
uint4 sample_4_index ( float4 uv , float uv_w )
2019-08-25 18:01:29 +00:00
{
float4 c ;
2022-01-09 06:46:40 +00:00
c . x = sample_c ( uv . xy , uv_w ) . a ;
c . y = sample_c ( uv . zy , uv_w ) . a ;
c . z = sample_c ( uv . xw , uv_w ) . a ;
c . w = sample_c ( uv . zw , uv_w ) . a ;
2019-08-25 18:01:29 +00:00
// Denormalize value
2023-01-03 11:13:50 +00:00
uint4 i = uint4 ( c * 255.5f ) ;
2019-08-25 18:01:29 +00:00
if ( PS_PAL_FMT == 1 )
2018-12-12 17:52:57 +00:00
{
2019-08-25 18:01:29 +00:00
// 4HL
2023-01-03 11:13:50 +00:00
return i & 0xF u ;
2018-12-12 17:52:57 +00:00
}
2019-08-25 18:01:29 +00:00
else if ( PS_PAL_FMT == 2 )
2018-12-12 17:52:57 +00:00
{
2019-08-25 18:01:29 +00:00
// 4HH
2023-01-03 11:13:50 +00:00
return i >> 4 u ;
}
else
{
// 8
return i ;
2018-12-12 17:52:57 +00:00
}
2018-12-11 00:04:31 +00:00
}
2023-01-03 11:13:50 +00:00
float4x4 sample_4p ( uint4 u )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
float4x4 c ;
c [ 0 ] = sample_p ( u . x ) ;
c [ 1 ] = sample_p ( u . y ) ;
c [ 2 ] = sample_p ( u . z ) ;
c [ 3 ] = sample_p ( u . w ) ;
return c ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
int fetch_raw_depth ( int2 xy )
2018-12-11 00:04:31 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
float4 col = RtTexture . Load ( int3 ( xy , 0 ) ) ;
#else
float4 col = Texture . Load ( int3 ( xy , 0 ) ) ;
#endif
2019-08-25 18:01:29 +00:00
return ( int ) ( col . r * exp2 ( 32.0f ) ) ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_raw_color ( int2 xy )
2018-12-11 00:04:31 +00:00
{
2022-01-30 07:10:10 +00:00
#if PS_TEX_IS_FB == 1
return RtTexture . Load ( int3 ( xy , 0 ) ) ;
#else
return Texture . Load ( int3 ( xy , 0 ) ) ;
#endif
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_c ( int2 uv )
2018-12-11 00:04:31 +00:00
{
2019-08-25 18:01:29 +00:00
return Texture . Load ( int3 ( uv , 0 ) ) ;
2018-12-11 00:04:31 +00:00
}
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Depth sampling
//////////////////////////////////////////////////////////////////////
2017-02-17 09:59:21 +00:00
2018-12-16 07:45:49 +00:00
int2 clamp_wrap_uv_depth ( int2 uv )
{
2023-01-31 10:50:45 +00:00
int4 mask = asint ( MinMax ) << 4 ;
2018-12-16 07:45:49 +00:00
if ( PS_WMS == PS_WMT )
{
if ( PS_WMS == 2 )
{
uv = clamp ( uv , mask . xy , mask . zw ) ;
}
else if ( PS_WMS == 3 )
{
uv = ( uv & mask . xy ) | mask . zw ;
}
}
else
{
if ( PS_WMS == 2 )
{
uv . x = clamp ( uv . x , mask . x , mask . z ) ;
}
else if ( PS_WMS == 3 )
{
uv . x = ( uv . x & mask . x ) | mask . z ;
}
if ( PS_WMT == 2 )
{
uv . y = clamp ( uv . y , mask . y , mask . w ) ;
}
else if ( PS_WMT == 3 )
{
uv . y = ( uv . y & mask . y ) | mask . w ;
}
}
return uv ;
}
2018-12-18 16:58:35 +00:00
float4 sample_depth ( float2 st , float2 pos )
2018-12-13 01:52:06 +00:00
{
2023-03-12 11:05:25 +00:00
float2 uv_f = ( float2 ) clamp_wrap_uv_depth ( int2 ( st ) ) * ( float2 ) ScaledScaleFactor ;
2018-12-16 07:45:49 +00:00
2023-04-08 08:11:35 +00:00
#if PS_REGION_RECT == 1
uv_f = clamp ( uv_f + STRange . xy , STRange . xy , STRange . zw ) ;
#endif
int2 uv = ( int2 ) uv_f ;
2018-12-13 01:52:06 +00:00
float4 t = ( float4 ) ( 0.0f ) ;
if ( PS_TALES_OF_ABYSS_HLE == 1 )
{
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth ( pos ) ;
// Convert msb based on the palette
2019-08-25 18:14:50 +00:00
t = Palette . Load ( int3 ( ( depth >> 8 ) & 0xFF , 0 , 0 ) ) * 255.0f ;
2018-12-13 01:52:06 +00:00
}
else if ( PS_URBAN_CHAOS_HLE == 1 )
{
// Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.
// So it will do a first channel trick to extract lsb, value is right-shifted.
// Then a new channel trick to extract msb which will shifted to the left.
// OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.
// To be faster both steps (msb&lsb) are done in a single pass.
// Warning: UV can't be used in channel effect
int depth = fetch_raw_depth ( pos ) ;
2019-08-25 18:01:29 +00:00
2018-12-13 01:52:06 +00:00
// Convert lsb based on the palette
2019-08-25 18:14:50 +00:00
t = Palette . Load ( int3 ( depth & 0xFF , 0 , 0 ) ) * 255.0f ;
2018-12-13 01:52:06 +00:00
// Msb is easier
float green = ( float ) ( ( depth >> 8 ) & 0xFF ) * 36.0f ;
green = min ( green , 255.0f ) ;
2019-08-25 18:14:50 +00:00
t . g + = green ;
2018-12-13 01:52:06 +00:00
}
2018-12-16 07:45:49 +00:00
else if ( PS_DEPTH_FMT == 1 )
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float32_rgba8 of convert
2018-12-16 07:45:49 +00:00
// Convert a FLOAT32 depth texture into a RGBA color texture
2022-02-12 04:50:35 +00:00
uint d = uint ( fetch_c ( uv ) . r * exp2 ( 32.0f ) ) ;
t = float4 ( uint4 ( ( d & 0xFF u ) , ( ( d >> 8 ) & 0xFF u ) , ( ( d >> 16 ) & 0xFF u ) , ( d >> 24 ) ) ) ;
2018-12-16 07:45:49 +00:00
}
else if ( PS_DEPTH_FMT == 2 )
{
2022-02-18 18:21:01 +00:00
// Based on ps_convert_float16_rgb5a1 of convert
2018-12-16 07:45:49 +00:00
// Convert a FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
2022-02-12 04:50:35 +00:00
uint d = uint ( fetch_c ( uv ) . r * exp2 ( 32.0f ) ) ;
2022-02-18 18:21:01 +00:00
t = float4 ( uint4 ( ( d & 0x1F u ) , ( ( d >> 5 ) & 0x1F u ) , ( ( d >> 10 ) & 0x1F u ) , ( d >> 15 ) & 0x01 u ) ) * float4 ( 8.0f , 8.0f , 8.0f , 128.0f ) ;
2018-12-16 07:45:49 +00:00
}
else if ( PS_DEPTH_FMT == 3 )
{
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
2019-08-25 18:14:50 +00:00
t = fetch_c ( uv ) * 255.0f ;
2018-12-16 07:45:49 +00:00
}
if ( PS_AEM_FMT == FMT_24 )
{
t . a = ( ( PS_AEM == 0 ) || any ( bool3 ( t . rgb ) ) ) ? 255.0f * TA . x : 0.0f ;
}
else if ( PS_AEM_FMT == FMT_16 )
{
t . a = t . a >= 128.0f ? 255.0f * TA . y : ( ( PS_AEM == 0 ) || any ( bool3 ( t . rgb ) ) ) ? 255.0f * TA . x : 0.0f ;
}
2018-12-13 01:52:06 +00:00
return t ;
}
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Fetch a Single Channel
//////////////////////////////////////////////////////////////////////
2019-02-20 11:11:23 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_red ( int2 xy )
{
float4 rt ;
2019-02-20 11:11:23 +00:00
2019-08-25 18:01:29 +00:00
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:01:29 +00:00
int depth = ( fetch_raw_depth ( xy ) ) & 0xFF ;
rt = ( float4 ) ( depth ) / 255.0f ;
2011-02-19 10:57:28 +00:00
}
else
{
2019-08-25 18:01:29 +00:00
rt = fetch_raw_color ( xy ) ;
2011-02-19 10:57:28 +00:00
}
2018-10-02 19:43:05 +00:00
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . r ) * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2021-12-05 04:55:57 +00:00
float4 fetch_green ( int2 xy )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:01:29 +00:00
float4 rt ;
2018-05-27 09:39:37 +00:00
2019-08-25 18:01:29 +00:00
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
2018-05-27 09:39:37 +00:00
{
2021-12-05 04:55:57 +00:00
int depth = ( fetch_raw_depth ( xy ) >> 8 ) & 0xFF ;
2019-08-25 18:01:29 +00:00
rt = ( float4 ) ( depth ) / 255.0f ;
2018-05-27 09:39:37 +00:00
}
2019-08-25 18:01:29 +00:00
else
2018-05-27 09:39:37 +00:00
{
2019-08-25 18:01:29 +00:00
rt = fetch_raw_color ( xy ) ;
2018-05-27 09:39:37 +00:00
}
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . g ) * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2021-12-05 04:55:57 +00:00
float4 fetch_blue ( int2 xy )
2011-02-19 10:57:28 +00:00
{
2021-12-05 04:55:57 +00:00
float4 rt ;
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
{
int depth = ( fetch_raw_depth ( xy ) >> 16 ) & 0xFF ;
rt = ( float4 ) ( depth ) / 255.0f ;
}
else
{
rt = fetch_raw_color ( xy ) ;
}
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . b ) * 255.0f ;
2019-08-25 18:01:29 +00:00
}
2018-10-02 19:43:05 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_alpha ( int2 xy )
{
float4 rt = fetch_raw_color ( xy ) ;
2023-01-03 11:13:50 +00:00
return sample_p_norm ( rt . a ) * 255.0f ;
2019-08-25 18:01:29 +00:00
}
2011-02-19 10:57:28 +00:00
2019-08-25 18:01:29 +00:00
float4 fetch_rgb ( int2 xy )
{
float4 rt = fetch_raw_color ( xy ) ;
2023-01-03 11:13:50 +00:00
float4 c = float4 ( sample_p_norm ( rt . r ) . r , sample_p_norm ( rt . g ) . g , sample_p_norm ( rt . b ) . b , 1.0 ) ;
2019-08-25 18:14:50 +00:00
return c * 255.0f ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
float4 fetch_gXbY ( int2 xy )
{
if ( ( PS_DEPTH_FMT == 1 ) || ( PS_DEPTH_FMT == 2 ) )
{
int depth = fetch_raw_depth ( xy ) ;
int bg = ( depth >> ( 8 + ChannelShuffle . w ) ) & 0xFF ;
return ( float4 ) ( bg ) ;
}
else
{
int4 rt = ( int4 ) ( fetch_raw_color ( xy ) * 255.0 ) ;
int green = ( rt . g >> ChannelShuffle . w ) & ChannelShuffle . z ;
int blue = ( rt . b << ChannelShuffle . y ) & ChannelShuffle . x ;
2019-08-25 18:14:50 +00:00
return ( float4 ) ( green | blue ) ;
2019-08-25 18:01:29 +00:00
}
}
2022-01-09 06:46:40 +00:00
float4 sample_color ( float2 st , float uv_w )
2011-02-19 10:57:28 +00:00
{
2013-02-12 10:57:48 +00:00
# if PS_TCOFFSETHACK
st + = TC_OffsetHack . xy ;
2018-10-02 19:43:05 +00:00
# endif
2012-06-13 15:36:10 +00:00
2011-02-19 10:57:28 +00:00
float4 t ;
2012-06-13 15:36:10 +00:00
float4x4 c ;
float2 dd ;
2023-03-17 13:20:06 +00:00
if ( PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_REGION_RECT == 0 && PS_WMS < 2 && PS_WMT < 2 )
2011-02-19 10:57:28 +00:00
{
2022-01-09 06:46:40 +00:00
c [ 0 ] = sample_c ( st , uv_w ) ;
2011-02-19 10:57:28 +00:00
}
else
{
float4 uv ;
2012-06-13 15:36:10 +00:00
2011-02-19 10:57:28 +00:00
if ( PS_LTF )
{
uv = st . xyxy + HalfTexel ;
2012-07-19 20:40:42 +00:00
dd = frac ( uv . xy * WH . zw ) ;
2019-08-25 18:14:50 +00:00
2018-09-26 22:59:31 +00:00
if ( PS_FST == 0 )
{
2019-08-25 18:14:50 +00:00
dd = clamp ( dd , ( float2 ) 0.0f , ( float2 ) 0.9999999f ) ;
2018-09-26 22:59:31 +00:00
}
2011-02-19 10:57:28 +00:00
}
else
{
uv = st . xyxy ;
}
2017-02-17 09:59:21 +00:00
uv = clamp_wrap_uv ( uv ) ;
2011-02-19 10:57:28 +00:00
2017-02-17 09:59:21 +00:00
#if PS_PAL_FMT != 0
2022-01-09 06:46:40 +00:00
c = sample_4p ( sample_4_index ( uv , uv_w ) ) ;
2017-02-17 09:59:21 +00:00
#else
2022-01-09 06:46:40 +00:00
c = sample_4c ( uv , uv_w ) ;
2017-02-17 09:59:21 +00:00
#endif
2012-06-13 15:36:10 +00:00
}
2011-02-19 10:57:28 +00:00
2012-06-13 15:36:10 +00:00
[ unroll ]
for ( uint i = 0 ; i < 4 ; i + + )
{
2018-12-19 23:54:51 +00:00
if ( PS_AEM_FMT == FMT_24 )
2012-06-13 15:36:10 +00:00
{
c [ i ] . a = ! PS_AEM || any ( c [ i ] . rgb ) ? TA . x : 0 ;
2011-02-19 10:57:28 +00:00
}
2017-02-17 09:59:21 +00:00
else if ( PS_AEM_FMT == FMT_16 )
2011-02-19 10:57:28 +00:00
{
2018-10-02 19:43:05 +00:00
c [ i ] . a = c [ i ] . a >= 0.5 ? TA . y : ! PS_AEM || any ( c [ i ] . rgb ) ? TA . x : 0 ;
2011-02-19 10:57:28 +00:00
}
}
2012-06-11 03:27:16 +00:00
2012-06-13 15:36:10 +00:00
if ( PS_LTF )
2018-10-02 19:43:05 +00:00
{
2012-06-13 15:36:10 +00:00
t = lerp ( lerp ( c [ 0 ] , c [ 1 ] , dd . x ) , lerp ( c [ 2 ] , c [ 3 ] , dd . x ) , dd . y ) ;
}
else
{
t = c [ 0 ] ;
}
2019-08-25 18:14:50 +00:00
return trunc ( t * 255.0f + 0.05f ) ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:14:50 +00:00
float4 tfx ( float4 T , float4 C )
{
float4 C_out ;
2023-07-28 13:23:53 +00:00
float4 FxT = trunc ( ( C * T ) / 128.0f ) ;
2019-08-25 18:14:50 +00:00
#if (PS_TFX == 0)
C_out = FxT ;
#elif (PS_TFX == 1)
C_out = T ;
#elif (PS_TFX == 2)
C_out . rgb = FxT . rgb + C . a ;
C_out . a = T . a + C . a ;
#elif (PS_TFX == 3)
C_out . rgb = FxT . rgb + C . a ;
C_out . a = T . a ;
#else
C_out = C ;
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
#if (PS_TCC == 0)
C_out . a = C . a ;
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
// Clamp only when it is useful
C_out = min ( C_out , 255.0f ) ;
#endif
2018-10-02 19:43:05 +00:00
2019-08-25 18:14:50 +00:00
return C_out ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:14:50 +00:00
void atst ( float4 C )
2011-02-19 10:57:28 +00:00
{
2019-08-25 18:14:50 +00:00
float a = C . a ;
2018-10-02 19:43:05 +00:00
2016-08-14 16:35:42 +00:00
if ( PS_ATST == 0 )
2011-02-19 10:57:28 +00:00
{
// nothing to do
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 1 )
2011-02-19 10:57:28 +00:00
{
2016-08-14 16:35:42 +00:00
if ( a > AREF ) discard ;
2011-02-19 10:57:28 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 2 )
2011-02-19 10:57:28 +00:00
{
2018-10-02 19:43:05 +00:00
if ( a < AREF ) discard ;
2011-02-19 10:57:28 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 3 )
2012-03-08 17:43:21 +00:00
{
2016-08-14 16:35:42 +00:00
if ( abs ( a - AREF ) > 0.5f ) discard ;
2012-07-23 16:39:56 +00:00
}
2016-08-14 16:35:42 +00:00
else if ( PS_ATST == 4 )
2012-07-23 16:39:56 +00:00
{
2016-08-14 16:35:42 +00:00
if ( abs ( a - AREF ) < 0.5f ) discard ;
2011-02-19 10:57:28 +00:00
}
}
float4 fog ( float4 c , float f )
{
if ( PS_FOG )
{
2020-02-28 01:08:47 +00:00
c . rgb = trunc ( lerp ( FogColor , c . rgb , f ) ) ;
2011-02-19 10:57:28 +00:00
}
return c ;
}
float4 ps_color ( PS_INPUT input )
{
2023-01-31 10:50:45 +00:00
#if PS_FST == 0
2019-02-21 12:10:41 +00:00
float2 st = input . t . xy / input . t . w ;
float2 st_int = input . ti . zw / input . t . w ;
2018-12-18 16:58:35 +00:00
#else
float2 st = input . ti . xy ;
float2 st_int = input . ti . zw ;
#endif
2011-02-19 10:57:28 +00:00
2018-12-11 00:04:31 +00:00
#if PS_CHANNEL_FETCH == 1
2019-08-25 18:14:50 +00:00
float4 T = fetch_red ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 2
2019-08-25 18:14:50 +00:00
float4 T = fetch_green ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 3
2019-08-25 18:14:50 +00:00
float4 T = fetch_blue ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 4
2019-08-25 18:14:50 +00:00
float4 T = fetch_alpha ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 5
2019-08-25 18:14:50 +00:00
float4 T = fetch_rgb ( int2 ( input . p . xy ) ) ;
2018-12-11 00:04:31 +00:00
#elif PS_CHANNEL_FETCH == 6
2019-08-25 18:14:50 +00:00
float4 T = fetch_gXbY ( int2 ( input . p . xy ) ) ;
2018-12-13 01:52:06 +00:00
#elif PS_DEPTH_FMT > 0
2019-08-25 18:14:50 +00:00
float4 T = sample_depth ( st_int , input . p . xy ) ;
2018-12-11 00:04:31 +00:00
#else
2022-01-09 06:46:40 +00:00
float4 T = sample_color ( st , input . t . w ) ;
2018-12-11 00:04:31 +00:00
#endif
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
float4 C = tfx ( T , input . c ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
atst ( C ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
C = fog ( C , input . t . z ) ;
2011-02-19 10:57:28 +00:00
2019-08-25 18:14:50 +00:00
return C ;
2011-02-19 10:57:28 +00:00
}
2019-08-25 18:01:29 +00:00
void ps_fbmask ( inout float4 C , float2 pos_xy )
{
if ( PS_FBMASK )
{
2022-01-30 07:10:10 +00:00
float4 RT = trunc ( RtTexture . Load ( int3 ( pos_xy , 0 ) ) * 255.0f + 0.1f ) ;
2022-10-09 05:51:41 +00:00
C = ( float4 ) ( ( ( uint4 ) C & ~ FbMask ) | ( ( uint4 ) RT & FbMask ) ) ;
2019-08-25 18:01:29 +00:00
}
}
2019-09-15 18:49:34 +00:00
void ps_dither ( inout float3 C , float2 pos_xy )
{
2020-07-15 12:51:03 +00:00
if ( PS_DITHER )
{
int2 fpos ;
if ( PS_DITHER == 2 )
fpos = int2 ( pos_xy ) ;
else
2023-03-12 11:05:25 +00:00
fpos = int2 ( pos_xy * RcpScaleFactor ) ;
2020-07-15 12:51:03 +00:00
2023-02-25 04:01:04 +00:00
float value = DitherMatrix [ fpos . x & 3 ] [ fpos . y & 3 ] ;
2023-03-06 19:41:09 +00:00
if ( PS_ROUND_INV )
2023-02-25 04:01:04 +00:00
C - = value ;
else
C + = value ;
2020-07-15 12:51:03 +00:00
}
2019-09-15 18:49:34 +00:00
}
2021-11-30 11:57:51 +00:00
void ps_color_clamp_wrap ( inout float3 C )
{
// When dithering the bottom 3 bits become meaningless and cause lines in the picture
// so we need to limit the color depth on dithered items
2022-10-09 05:51:41 +00:00
if ( SW_BLEND || PS_DITHER || PS_FBMASK )
2021-11-30 11:57:51 +00:00
{
2023-03-06 19:41:09 +00:00
if ( PS_DFMT == FMT_16 && PS_BLEND_MIX == 0 && PS_ROUND_INV )
2023-02-25 04:01:04 +00:00
C + = 7.0f ; // Need to round up, not down since the shader will invert
2021-11-30 11:57:51 +00:00
// Standard Clamp
if ( PS_COLCLIP == 0 && PS_HDR == 0 )
C = clamp ( C , ( float3 ) 0.0f , ( float3 ) 255.0f ) ;
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
2022-08-16 10:27:06 +00:00
if ( PS_DFMT == FMT_16 && PS_BLEND_MIX == 0 )
2021-11-30 11:57:51 +00:00
C = ( float3 ) ( ( int3 ) C & ( int3 ) 0xF8 ) ;
2022-10-09 05:51:41 +00:00
else if ( PS_COLCLIP == 1 || PS_HDR == 1 )
2021-11-30 11:57:51 +00:00
C = ( float3 ) ( ( int3 ) C & ( int3 ) 0xFF ) ;
}
}
2023-02-21 16:49:46 +00:00
void ps_blend ( inout float4 Color , inout float4 As_rgba , float2 pos_xy )
2019-08-25 18:01:29 +00:00
{
2023-02-21 16:49:46 +00:00
float As = As_rgba . a ;
2019-08-25 18:01:29 +00:00
if ( SW_BLEND )
{
2022-01-08 17:43:28 +00:00
// PABE
if ( PS_PABE )
{
// No blending so early exit
if ( As < 1.0f )
return ;
}
2022-08-26 15:07:21 +00:00
float4 RT = SW_BLEND_NEEDS_RT ? trunc ( RtTexture . Load ( int3 ( pos_xy , 0 ) ) * 255.0f + 0.1f ) : ( float4 ) 0.0f ;
2019-08-25 18:01:29 +00:00
2022-08-26 15:07:21 +00:00
float Ad = RT . a / 128.0f ;
2019-08-25 18:01:29 +00:00
2019-08-25 18:14:50 +00:00
float3 Cd = RT . rgb ;
float3 Cs = Color . rgb ;
2019-08-25 18:01:29 +00:00
float3 A = ( PS_BLEND_A == 0 ) ? Cs : ( ( PS_BLEND_A == 1 ) ? Cd : ( float3 ) 0.0f ) ;
float3 B = ( PS_BLEND_B == 0 ) ? Cs : ( ( PS_BLEND_B == 1 ) ? Cd : ( float3 ) 0.0f ) ;
2022-01-23 11:39:01 +00:00
float C = ( PS_BLEND_C == 0 ) ? As : ( ( PS_BLEND_C == 1 ) ? Ad : Af ) ;
2019-08-25 18:01:29 +00:00
float3 D = ( PS_BLEND_D == 0 ) ? Cs : ( ( PS_BLEND_D == 1 ) ? Cd : ( float3 ) 0.0f ) ;
2021-12-26 17:12:09 +00:00
// As/Af clamp alpha for Blend mix
2023-03-10 09:41:09 +00:00
// We shouldn't clamp blend mix with blend hw 1 as we want alpha higher
2022-08-09 07:56:00 +00:00
float C_clamped = C ;
2023-03-10 09:41:09 +00:00
if ( PS_BLEND_MIX > 0 && PS_BLEND_HW != 1 )
2022-08-09 07:56:00 +00:00
C_clamped = min ( C_clamped , 1.0f ) ;
2021-12-26 17:12:09 +00:00
2022-08-08 22:19:20 +00:00
if ( PS_BLEND_A == PS_BLEND_B )
Color . rgb = D ;
2022-08-16 09:11:17 +00:00
// In blend_mix, HW adds on some alpha factor * dst.
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
// Instead, apply an offset to convert HW's round to a floor.
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
else if ( PS_BLEND_MIX == 2 )
2022-08-09 07:56:00 +00:00
Color . rgb = ( ( A - B ) * C_clamped + D ) + ( 124.0f / 256.0f ) ;
2022-08-16 09:11:17 +00:00
else if ( PS_BLEND_MIX == 1 )
2022-08-09 07:56:00 +00:00
Color . rgb = ( ( A - B ) * C_clamped + D ) - ( 124.0f / 256.0f ) ;
2022-08-08 22:19:20 +00:00
else
Color . rgb = trunc ( ( ( A - B ) * C ) + D ) ;
2022-08-05 18:54:25 +00:00
2023-03-10 09:41:09 +00:00
if ( PS_BLEND_HW == 1 )
2022-08-05 18:54:25 +00:00
{
2023-02-26 20:58:34 +00:00
// As or Af
As_rgba . rgb = ( float3 ) C ;
2022-08-05 18:54:25 +00:00
// Subtract 1 for alpha to compensate for the changed equation,
// if c.rgb > 255.0f then we further need to adjust alpha accordingly,
// we pick the lowest overflow from all colors because it's the safest,
// we divide by 255 the color because we don't know Cd value,
// changed alpha should only be done for hw blend.
2023-02-21 16:49:46 +00:00
float3 alpha_compensate = max ( ( float3 ) 1.0f , Color . rgb / ( float3 ) 255.0f ) ;
As_rgba . rgb - = alpha_compensate ;
2022-08-05 18:54:25 +00:00
}
2023-03-10 12:02:18 +00:00
else if ( PS_BLEND_HW == 2 )
2022-08-09 07:56:00 +00:00
{
// Compensate slightly for Cd*(As + 1) - Cs*As.
// The initial factor we chose is 1 (0.00392)
// as that is the minimum color Cd can be,
// then we multiply by alpha to get the minimum
// blended value it can be.
float color_compensate = 1.0f * ( C + 1.0f ) ;
Color . rgb - = ( float3 ) color_compensate ;
}
2023-03-10 12:02:18 +00:00
else if ( PS_BLEND_HW == 3 )
2023-02-26 20:58:34 +00:00
{
// As, Ad or Af clamped.
As_rgba . rgb = ( float3 ) C_clamped ;
// Cs*(Alpha + 1) might overflow, if it does then adjust alpha value
// that is sent on second output to compensate.
float3 overflow_check = ( Color . rgb - ( float3 ) 255.0f ) / 255.0f ;
float3 alpha_compensate = max ( ( float3 ) 0.0f , overflow_check ) ;
As_rgba . rgb - = alpha_compensate ;
}
2019-08-25 18:01:29 +00:00
}
2022-01-23 11:39:01 +00:00
else
{
2023-03-10 12:02:18 +00:00
if ( PS_BLEND_HW == 1 )
2022-01-23 11:39:01 +00:00
{
2022-01-26 02:05:06 +00:00
// Needed for Cd * (As/Ad/F + 1) blending modes
2022-01-23 11:39:01 +00:00
Color . rgb = ( float3 ) 255.0f ;
}
2023-03-10 12:02:18 +00:00
else if ( PS_BLEND_HW == 2 )
2022-01-23 11:39:01 +00:00
{
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
// Cd*As,Cd*Ad or Cd*F
2022-01-23 11:39:01 +00:00
2022-02-01 18:19:20 +00:00
float Alpha = PS_BLEND_C == 2 ? Af : As ;
2022-01-23 11:39:01 +00:00
Color . rgb = max ( ( float3 ) 0.0f , ( Alpha - ( float3 ) 1.0f ) ) ;
Color . rgb * = ( float3 ) 255.0f ;
}
2023-03-10 09:41:09 +00:00
else if ( PS_BLEND_HW == 3 )
2022-01-26 02:05:06 +00:00
{
// Needed for Cs*Ad, Cs*Ad + Cd, Cd - Cs*Ad
2023-03-06 01:40:00 +00:00
// Multiply Color.rgb by (255/128) to compensate for wrong Ad/255 value when rgb are below 128.
// When any color channel is higher than 128 then adjust the compensation automatically
// to give us more accurate colors, otherwise they will be wrong.
// The higher the value (>128) the lower the compensation will be.
float max_color = max ( max ( Color . r , Color . g ) , Color . b ) ;
float color_compensate = 255.0f / max ( 128.0f , max_color ) ;
Color . rgb * = ( float3 ) color_compensate ;
2022-01-26 02:05:06 +00:00
}
2022-01-23 11:39:01 +00:00
}
2019-08-25 18:01:29 +00:00
}
PS_OUTPUT ps_main ( PS_INPUT input )
{
float4 C = ps_color ( input ) ;
PS_OUTPUT output ;
2021-12-22 10:56:38 +00:00
if ( PS_SCANMSK & 2 )
{
// fail depth test on prohibited lines
if ( ( int ( input . p . y ) & 1 ) == ( PS_SCANMSK & 1 ) )
discard ;
}
2019-08-25 18:01:29 +00:00
if ( PS_SHUFFLE )
{
2019-08-25 18:14:50 +00:00
uint4 denorm_c = uint4 ( C ) ;
2019-08-25 18:01:29 +00:00
uint2 denorm_TA = uint2 ( float2 ( TA . xy ) * 255.0f + 0.5f ) ;
2023-08-09 00:48:03 +00:00
2023-08-09 09:02:01 +00:00
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
2023-08-09 00:48:03 +00:00
if ( PS_SHUFFLE_SAME )
{
if ( PS_READ_BA )
2023-08-09 09:02:01 +00:00
C = ( float4 ) ( float ( ( denorm_c . b & 0x7F u ) | ( denorm_c . a & 0x80 u ) ) ) ;
2023-08-09 00:48:03 +00:00
else
C . ga = C . rg ;
}
2023-08-09 09:02:01 +00:00
// Copy of a 16bit source in to this target
2023-08-09 00:48:03 +00:00
else if ( PS_READ16_SRC )
2019-08-25 18:01:29 +00:00
{
2023-02-16 10:25:46 +00:00
C . rb = ( float2 ) float ( ( denorm_c . r >> 3 ) | ( ( ( denorm_c . g >> 3 ) & 0x7 u ) << 5 ) ) ;
2019-08-25 18:01:29 +00:00
if ( denorm_c . a & 0x80 u )
2023-02-16 10:25:46 +00:00
C . ga = ( float2 ) float ( ( denorm_c . g >> 6 ) | ( ( denorm_c . b >> 3 ) << 2 ) | ( denorm_TA . y & 0x80 u ) ) ;
2019-08-25 18:01:29 +00:00
else
2023-02-16 10:25:46 +00:00
C . ga = ( float2 ) float ( ( denorm_c . g >> 6 ) | ( ( denorm_c . b >> 3 ) << 2 ) | ( denorm_TA . x & 0x80 u ) ) ;
2019-08-25 18:01:29 +00:00
}
2023-08-09 09:02:01 +00:00
// Write RB part. Mask will take care of the correct destination
else if ( PS_READ_BA )
2019-08-25 18:01:29 +00:00
{
2023-08-09 09:02:01 +00:00
C . rb = C . bb ;
if ( denorm_c . a & 0x80 u )
C . ga = ( float2 ) ( float ( ( denorm_c . a & 0x7F u ) | ( denorm_TA . y & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
else
2023-08-09 09:02:01 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . a & 0x7F u ) | ( denorm_TA . x & 0x80 u ) ) ) ;
}
else
{
C . rb = C . rr ;
if ( denorm_c . g & 0x80 u )
C . ga = ( float2 ) ( float ( ( denorm_c . g & 0x7F u ) | ( denorm_TA . y & 0x80 u ) ) ) ;
2023-02-16 10:25:46 +00:00
else
2023-08-09 09:02:01 +00:00
C . ga = ( float2 ) ( float ( ( denorm_c . g & 0x7F u ) | ( denorm_TA . x & 0x80 u ) ) ) ;
2019-08-25 18:01:29 +00:00
}
}
// Must be done before alpha correction
2022-07-16 17:26:29 +00:00
// AA (Fixed one) will output a coverage of 1.0 as alpha
if ( PS_FIXED_ONE_A )
{
C . a = 128.0f ;
}
2023-02-21 16:49:46 +00:00
float4 alpha_blend ;
2023-03-10 12:02:18 +00:00
if ( SW_AD_TO_HW )
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
{
float4 RT = trunc ( RtTexture . Load ( int3 ( input . p . xy , 0 ) ) * 255.0f + 0.1f ) ;
2023-02-21 16:49:46 +00:00
alpha_blend = ( float4 ) ( RT . a / 128.0f ) ;
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
}
else
{
2023-02-21 16:49:46 +00:00
alpha_blend = ( float4 ) ( C . a / 128.0f ) ;
GS-hw: Implement hw, hw/sw, sw blending on Ad when alpha write is masked.
Idea is to replace Ad with As when alpha write is masked,
then expand/let blend mix, accumulation blend non recursive blend or hw clr blend to
do the blending with Ad swapped as As.
We are doing this to try to bring some originally higher blending modes to lower levels
where we can do the draws with less texture barriers instead (gl/vk),
as for d3d11 this allows to run blending on the draws since previously the cases weren't handled properly,
it will be slower on d3d11 since we will be reading the frame buffer but it's better than nothing.
D3D11: It is enabled on Medium blending or higher, if draw is fbmask then it will enable
it on basic blending too.
OpenGL/Vulkan:
It is enabled based on the previous blending modes:
accumulation blend -> either minimum or basic level, depending on colclamp.
non recursive blend -> either minimum or basic level, depending on colclamp.
blend mix -> basic and higher level.
hw clr blend -> minimum and higher level.
All:
Prefer full sw blend when primitives don't overlap, sw fbmask or full barrier is used, it is more accurate.
2022-02-02 11:36:56 +00:00
}
2019-08-25 18:01:29 +00:00
// Alpha correction
if ( PS_DFMT == FMT_16 )
{
2019-08-25 18:14:50 +00:00
float A_one = 128.0f ; // alpha output will be 0x80
2019-08-25 18:01:29 +00:00
C . a = PS_FBA ? A_one : step ( A_one , C . a ) * A_one ;
}
else if ( ( PS_DFMT == FMT_32 ) && PS_FBA )
{
2019-08-25 18:14:50 +00:00
float A_one = 128.0f ;
2019-08-25 18:01:29 +00:00
if ( C . a < A_one ) C . a + = A_one ;
}
2022-03-19 12:19:16 +00:00
#if PS_DATE == 3
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
// the bad alpha value so we must keep it.
int stencil_ceil = int ( PrimMinTexture . Load ( int3 ( input . p . xy , 0 ) ) ) ;
if ( int ( input . primid ) > stencil_ceil )
discard ;
#endif
// Get first primitive that will write a failling alpha value
2022-09-01 03:56:34 +00:00
#if PS_DATE == 1
2022-03-19 12:19:16 +00:00
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
output . c = ( C . a > 127.5f ) ? float ( input . primid ) : float ( 0x7FFFFFFF ) ;
2022-09-01 03:56:34 +00:00
#elif PS_DATE == 2
2022-03-19 12:19:16 +00:00
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
output . c = ( C . a < 127.5f ) ? float ( input . primid ) : float ( 0x7FFFFFFF ) ;
#else
// Not primid DATE setup
2019-08-25 18:01:29 +00:00
ps_blend ( C , alpha_blend , input . p . xy ) ;
2021-11-30 11:57:51 +00:00
ps_dither ( C . rgb , input . p . xy ) ;
2019-08-25 18:01:29 +00:00
2021-11-30 11:57:51 +00:00
// Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap ( C . rgb ) ;
ps_fbmask ( C , input . p . xy ) ;
2020-07-15 12:51:03 +00:00
2022-03-20 08:25:25 +00:00
#if !PS_NO_COLOR
2022-10-09 05:51:41 +00:00
output . c0 = PS_HDR ? float4 ( C . rgb / 65535.0f , C . a / 255.0f ) : C / 255.0f ;
2021-12-31 07:29:26 +00:00
#if !PS_NO_COLOR1
2023-02-21 16:49:46 +00:00
output . c1 = alpha_blend ;
2021-12-31 07:29:26 +00:00
#endif
#if PS_NO_ABLEND
// write alpha blend factor into col0
2023-02-21 16:49:46 +00:00
output . c0 . a = alpha_blend . a ;
2021-12-31 07:29:26 +00:00
#endif
#if PS_ONLY_ALPHA
// rgb isn't used
output . c0 . rgb = float3 ( 0.0f , 0.0f , 0.0f ) ;
#endif
2022-03-20 08:25:25 +00:00
#endif
2019-08-25 18:01:29 +00:00
2022-03-19 12:19:16 +00:00
#endif
2020-06-06 15:21:03 +00:00
#if PS_ZCLAMP
output . depth = min ( input . p . z , MaxDepthPS ) ;
#endif
2019-08-25 18:01:29 +00:00
return output ;
}
2023-04-07 07:55:55 +00:00
#endif // PIXEL_SHADER
2019-08-25 18:01:29 +00:00
//////////////////////////////////////////////////////////////////////
// Vertex Shader
//////////////////////////////////////////////////////////////////////
2023-04-07 07:55:55 +00:00
#ifdef VERTEX_SHADER
#ifdef DX12
cbuffer cb0 : register ( b0 )
#else
cbuffer cb0
#endif
{
float2 VertexScale ;
float2 VertexOffset ;
float2 TextureScale ;
float2 TextureOffset ;
float2 PointSize ;
uint MaxDepth ;
uint BaseVertex ; // Only used in DX11.
} ;
2011-02-19 10:57:28 +00:00
VS_OUTPUT vs_main ( VS_INPUT input )
{
2020-05-23 18:37:51 +00:00
// Clamp to max depth, gs doesn't wrap
input . z = min ( input . z , MaxDepth ) ;
2011-02-19 10:57:28 +00:00
VS_OUTPUT output ;
2018-10-02 19:43:05 +00:00
2011-02-19 10:57:28 +00:00
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
2018-10-02 19:43:05 +00:00
2021-12-21 07:41:45 +00:00
output . p = float4 ( input . p , input . z , 1.0f ) - float4 ( 0.05f , 0.05f , 0 , 0 ) ;
2011-02-19 10:57:28 +00:00
2021-12-21 07:41:45 +00:00
output . p . xy = output . p . xy * float2 ( VertexScale . x , - VertexScale . y ) - float2 ( VertexOffset . x , - VertexOffset . y ) ;
output . p . z * = exp2 ( - 32.0f ) ; // integer->float depth
2011-02-19 10:57:28 +00:00
if ( VS_TME )
{
2021-12-21 07:41:45 +00:00
float2 uv = input . uv - TextureOffset ;
float2 st = input . st - TextureOffset ;
2018-12-18 16:58:35 +00:00
// Integer nomalized
2021-12-21 07:41:45 +00:00
output . ti . xy = uv * TextureScale ;
2017-02-17 09:59:21 +00:00
2018-12-18 16:58:35 +00:00
if ( VS_FST )
{
// Integer integral
output . ti . zw = uv ;
2011-02-19 10:57:28 +00:00
}
else
{
2018-12-18 16:58:35 +00:00
// float for post-processing in some games
2021-12-21 07:41:45 +00:00
output . ti . zw = st / TextureScale ;
2011-02-19 10:57:28 +00:00
}
2018-12-18 16:58:35 +00:00
// Float coords
output . t . xy = st ;
output . t . w = input . q ;
2011-02-19 10:57:28 +00:00
}
else
{
output . t . xy = 0 ;
output . t . w = 1.0f ;
2018-12-18 16:58:35 +00:00
output . ti = 0 ;
2011-02-19 10:57:28 +00:00
}
output . c = input . c ;
2012-01-08 17:10:00 +00:00
output . t . z = input . f . r ;
2011-02-19 10:57:28 +00:00
return output ;
}
2023-04-07 07:55:55 +00:00
#if VS_EXPAND != 0
2019-08-25 18:01:29 +00:00
2023-04-07 07:55:55 +00:00
struct VS_RAW_INPUT
2022-09-01 06:50:25 +00:00
{
2023-04-07 07:55:55 +00:00
float2 ST ;
uint RGBA ;
float Q ;
uint XY ;
uint Z ;
uint UV ;
uint FOG ;
} ;
2022-09-01 06:50:25 +00:00
2023-04-07 07:55:55 +00:00
StructuredBuffer < VS_RAW_INPUT > vertices : register ( t0 ) ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
VS_INPUT load_vertex ( uint index )
2017-03-03 21:18:49 +00:00
{
2023-04-07 07:55:55 +00:00
#ifdef DX12
VS_RAW_INPUT raw = vertices . Load ( index ) ;
#else
VS_RAW_INPUT raw = vertices . Load ( BaseVertex + index ) ;
#endif
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
VS_INPUT vert ;
vert . st = raw . ST ;
vert . c = uint4 ( raw . RGBA & 0xFF u , ( raw . RGBA >> 8 ) & 0xFF u , ( raw . RGBA >> 16 ) & 0xFF u , raw . RGBA >> 24 ) ;
vert . q = raw . Q ;
vert . p = uint2 ( raw . XY & 0xFFFF u , raw . XY >> 16 ) ;
vert . z = raw . Z ;
vert . uv = uint2 ( raw . UV & 0xFFFF u , raw . UV >> 16 ) ;
vert . f = float4 ( float ( raw . FOG & 0xFF u ) , float ( ( raw . FOG >> 8 ) & 0xFF u ) , float ( ( raw . FOG >> 16 ) & 0xFF u ) , float ( raw . FOG >> 24 ) ) / 255.0f ;
return vert ;
}
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
VS_OUTPUT vs_main_expand ( uint vid : SV_VertexID )
{
#if VS_EXPAND == 1 // Point
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
VS_OUTPUT vtx = vs_main ( load_vertex ( vid >> 2 ) ) ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
vtx . p . x + = ( ( vid & 1 u ) != 0 u ) ? PointSize . x : 0.0f ;
vtx . p . y + = ( ( vid & 2 u ) != 0 u ) ? PointSize . y : 0.0f ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
return vtx ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
#elif VS_EXPAND == 2 // Line
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
uint vid_base = vid >> 2 ;
bool is_bottom = vid & 2 ;
bool is_right = vid & 1 ;
// All lines will be a pair of vertices next to each other
// Since DirectX uses provoking vertex first, the bottom point will be the lower of the two
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1 ;
VS_OUTPUT vtx = vs_main ( load_vertex ( vid_base ) ) ;
VS_OUTPUT other = vs_main ( load_vertex ( vid_other ) ) ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
float2 line_vector = normalize ( vtx . p . xy - other . p . xy ) ;
2017-03-03 21:18:49 +00:00
float2 line_normal = float2 ( line_vector . y , - line_vector . x ) ;
float2 line_width = ( line_normal * PointSize ) / 2 ;
2023-04-07 07:55:55 +00:00
// line_normal is inverted for bottom point
float2 offset = ( is_bottom ^ is_right ) ? line_width : - line_width ;
vtx . p . xy + = offset ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
return vtx ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
#elif VS_EXPAND == 3 // Sprite
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
// Sprite points are always in pairs
uint vid_base = vid >> 1 ;
uint vid_lt = vid_base & ~ 1 u ;
uint vid_rb = vid_base | 1 u ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
VS_OUTPUT lt = vs_main ( load_vertex ( vid_lt ) ) ;
VS_OUTPUT rb = vs_main ( load_vertex ( vid_rb ) ) ;
VS_OUTPUT vtx = rb ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
bool is_right = ( ( vid & 1 u ) != 0 u ) ;
vtx . p . x = is_right ? lt . p . x : vtx . p . x ;
vtx . t . x = is_right ? lt . t . x : vtx . t . x ;
vtx . ti . xz = is_right ? lt . ti . xz : vtx . ti . xz ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
bool is_bottom = ( ( vid & 2 u ) != 0 u ) ;
vtx . p . y = is_bottom ? lt . p . y : vtx . p . y ;
vtx . t . y = is_bottom ? lt . t . y : vtx . t . y ;
vtx . ti . yw = is_bottom ? lt . ti . yw : vtx . ti . yw ;
2017-03-03 21:18:49 +00:00
2023-04-07 07:55:55 +00:00
return vtx ;
2011-02-19 10:57:28 +00:00
2023-04-07 07:55:55 +00:00
#endif
2011-02-19 10:57:28 +00:00
}
2023-04-07 07:55:55 +00:00
#endif // VS_EXPAND
#endif // VERTEX_SHADER