GS/HW: Adjust dither on Blend Mix when Cs-Cd * As blend.

This commit is contained in:
refractionpcsx2 2024-03-05 19:53:23 +00:00
parent d28ba0e53c
commit 65649b3cbb
13 changed files with 53 additions and 16 deletions

View File

@ -60,6 +60,7 @@
#define PS_FIXED_ONE_A 0 #define PS_FIXED_ONE_A 0
#define PS_PABE 0 #define PS_PABE 0
#define PS_DITHER 0 #define PS_DITHER 0
#define PS_DITHER_ADJUST 0
#define PS_ZCLAMP 0 #define PS_ZCLAMP 0
#define PS_SCANMSK 0 #define PS_SCANMSK 0
#define PS_AUTOMATIC_LOD 0 #define PS_AUTOMATIC_LOD 0
@ -783,7 +784,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy)
} }
} }
void ps_dither(inout float3 C, float2 pos_xy) void ps_dither(inout float3 C, float2 pos_xy, float alpha_blend)
{ {
if (PS_DITHER) if (PS_DITHER)
{ {
@ -795,6 +796,12 @@ void ps_dither(inout float3 C, float2 pos_xy)
fpos = int2(pos_xy * RcpScaleFactor); fpos = int2(pos_xy * RcpScaleFactor);
float value = DitherMatrix[fpos.x & 3][fpos.y & 3]; float value = DitherMatrix[fpos.x & 3][fpos.y & 3];
// The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend
// so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither.
if (PS_DITHER_ADJUST)
value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f;
if (PS_ROUND_INV) if (PS_ROUND_INV)
C -= value; C -= value;
else else
@ -816,7 +823,7 @@ void ps_color_clamp_wrap(inout float3 C)
C = clamp(C, (float3)0.0f, (float3)255.0f); C = clamp(C, (float3)0.0f, (float3)255.0f);
// In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania // In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania
if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0) if (PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER))
C = (float3)((int3)C & (int3)0xF8); C = (float3)((int3)C & (int3)0xF8);
else if (PS_COLCLIP == 1 || PS_HDR == 1) else if (PS_COLCLIP == 1 || PS_HDR == 1)
C = (float3)((int3)C & (int3)0xFF); C = (float3)((int3)C & (int3)0xFF);
@ -1061,7 +1068,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
} }
} }
ps_dither(C.rgb, input.p.xy); ps_dither(C.rgb, input.p.xy, alpha_blend.a);
// Color clamp/wrap needs to be done after sw blending and dithering // Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C.rgb); ps_color_clamp_wrap(C.rgb);

View File

@ -720,7 +720,7 @@ void ps_fbmask(inout vec4 C)
#endif #endif
} }
void ps_dither(inout vec3 C) void ps_dither(inout vec3 C, float alpha_blend)
{ {
#if PS_DITHER #if PS_DITHER
#if PS_DITHER == 2 #if PS_DITHER == 2
@ -729,6 +729,13 @@ void ps_dither(inout vec3 C)
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor); ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
#endif #endif
float value = DitherMatrix[fpos.y&3][fpos.x&3]; float value = DitherMatrix[fpos.y&3][fpos.x&3];
// The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend
// so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither.
#if PS_DITHER_ADJUST
value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f;
#endif
#if PS_ROUND_INV #if PS_ROUND_INV
C -= value; C -= value;
#else #else
@ -759,7 +766,7 @@ void ps_color_clamp_wrap(inout vec3 C)
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1 // GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 #if PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER)
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8)); C = vec3(ivec3(C) & ivec3(0xF8));
#elif PS_COLCLIP == 1 || PS_HDR == 1 #elif PS_COLCLIP == 1 || PS_HDR == 1
@ -1055,7 +1062,7 @@ void ps_main()
#endif // PS_SHUFFLE_SAME #endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE #endif // PS_SHUFFLE
ps_dither(C.rgb); ps_dither(C.rgb, alpha_blend.a);
// Color clamp/wrap needs to be done after sw blending and dithering // Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C.rgb); ps_color_clamp_wrap(C.rgb);

View File

@ -284,6 +284,7 @@ void main()
#define PS_FIXED_ONE_A 0 #define PS_FIXED_ONE_A 0
#define PS_PABE 0 #define PS_PABE 0
#define PS_DITHER 0 #define PS_DITHER 0
#define PS_DITHER_ADJUST 0
#define PS_ZCLAMP 0 #define PS_ZCLAMP 0
#define PS_FEEDBACK_LOOP 0 #define PS_FEEDBACK_LOOP 0
#define PS_TEX_IS_FB 0 #define PS_TEX_IS_FB 0
@ -969,7 +970,7 @@ void ps_fbmask(inout vec4 C)
#endif #endif
} }
void ps_dither(inout vec3 C) void ps_dither(inout vec3 C, float alpha_blend)
{ {
#if PS_DITHER #if PS_DITHER
ivec2 fpos; ivec2 fpos;
@ -981,6 +982,13 @@ void ps_dither(inout vec3 C)
#endif #endif
float value = DitherMatrix[fpos.y & 3][fpos.x & 3]; float value = DitherMatrix[fpos.y & 3][fpos.x & 3];
// The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend
// so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither.
#if PS_DITHER_ADJUST
value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f;
#endif
#if PS_ROUND_INV #if PS_ROUND_INV
C -= value; C -= value;
#else #else
@ -1011,7 +1019,7 @@ void ps_color_clamp_wrap(inout vec3 C)
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1 // GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 #if PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER)
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C = vec3(ivec3(C) & ivec3(0xF8)); C = vec3(ivec3(C) & ivec3(0xF8));
#elif PS_COLCLIP == 1 || PS_HDR == 1 #elif PS_COLCLIP == 1 || PS_HDR == 1
@ -1286,7 +1294,7 @@ void main()
#endif // PS_SHUFFLE_SAME #endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE #endif // PS_SHUFFLE
ps_dither(C.rgb); ps_dither(C.rgb, alpha_blend.a);
// Color clamp/wrap needs to be done after sw blending and dithering // Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C.rgb); ps_color_clamp_wrap(C.rgb);

View File

@ -329,6 +329,7 @@ struct alignas(16) GSHWDrawConfig
// Dithering // Dithering
u32 dither : 2; u32 dither : 2;
u32 dither_adjust : 1;
// Depth clamp // Depth clamp
u32 zclamp : 1; u32 zclamp : 1;

View File

@ -1698,6 +1698,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a); sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a);
sm.AddMacro("PS_PABE", sel.pabe); sm.AddMacro("PS_PABE", sel.pabe);
sm.AddMacro("PS_DITHER", sel.dither); sm.AddMacro("PS_DITHER", sel.dither);
sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust);
sm.AddMacro("PS_ZCLAMP", sel.zclamp); sm.AddMacro("PS_ZCLAMP", sel.zclamp);
sm.AddMacro("PS_SCANMSK", sel.scanmsk); sm.AddMacro("PS_SCANMSK", sel.scanmsk);
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod); sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);

View File

@ -2824,6 +2824,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a); sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a);
sm.AddMacro("PS_PABE", sel.pabe); sm.AddMacro("PS_PABE", sel.pabe);
sm.AddMacro("PS_DITHER", sel.dither); sm.AddMacro("PS_DITHER", sel.dither);
sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust);
sm.AddMacro("PS_ZCLAMP", sel.zclamp); sm.AddMacro("PS_ZCLAMP", sel.zclamp);
sm.AddMacro("PS_SCANMSK", sel.scanmsk); sm.AddMacro("PS_SCANMSK", sel.scanmsk);
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod); sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);

View File

@ -3931,7 +3931,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
blend_mix &= !sw_blending; blend_mix &= !sw_blending;
sw_blending |= blend_mix; sw_blending |= blend_mix;
// Disable dithering on blend mix. // Disable dithering on blend mix.
m_conf.ps.dither &= !blend_mix; m_conf.ps.dither &= !blend_mix || (m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128);
[[fallthrough]]; [[fallthrough]];
case AccBlendLevel::Minimum: case AccBlendLevel::Minimum:
break; break;
@ -3985,7 +3985,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
blend_mix &= !sw_blending; blend_mix &= !sw_blending;
sw_blending |= blend_mix; sw_blending |= blend_mix;
// Disable dithering on blend mix. // Disable dithering on blend mix.
m_conf.ps.dither &= !blend_mix; m_conf.ps.dither &= !blend_mix || (m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128);
[[fallthrough]]; [[fallthrough]];
case AccBlendLevel::Minimum: case AccBlendLevel::Minimum:
break; break;
@ -5381,6 +5381,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.cb_ps.DitherMatrix[1] = GSVector4(DIMX.DM10, DIMX.DM11, DIMX.DM12, DIMX.DM13); m_conf.cb_ps.DitherMatrix[1] = GSVector4(DIMX.DM10, DIMX.DM11, DIMX.DM12, DIMX.DM13);
m_conf.cb_ps.DitherMatrix[2] = GSVector4(DIMX.DM20, DIMX.DM21, DIMX.DM22, DIMX.DM23); m_conf.cb_ps.DitherMatrix[2] = GSVector4(DIMX.DM20, DIMX.DM21, DIMX.DM22, DIMX.DM23);
m_conf.cb_ps.DitherMatrix[3] = GSVector4(DIMX.DM30, DIMX.DM31, DIMX.DM32, DIMX.DM33); m_conf.cb_ps.DitherMatrix[3] = GSVector4(DIMX.DM30, DIMX.DM31, DIMX.DM32, DIMX.DM33);
m_conf.ps.dither_adjust = m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128;
} }
if (PRIM->FGE) if (PRIM->FGE)

View File

@ -1821,6 +1821,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA); setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA);
setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL); setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL);
setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER); setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER);
setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST);
setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP); setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP);
setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK); setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK);
setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE); setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE);

View File

@ -192,6 +192,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_ONLY_ALPHA, GSMTLConstantIndex_PS_ONLY_ALPHA,
GSMTLConstantIndex_PS_CHANNEL, GSMTLConstantIndex_PS_CHANNEL,
GSMTLConstantIndex_PS_DITHER, GSMTLConstantIndex_PS_DITHER,
GSMTLConstantIndex_PS_DITHER_ADJUST,
GSMTLConstantIndex_PS_ZCLAMP, GSMTLConstantIndex_PS_ZCLAMP,
GSMTLConstantIndex_PS_TCOFFSETHACK, GSMTLConstantIndex_PS_TCOFFSETHACK,
GSMTLConstantIndex_PS_URBAN_CHAOS_HLE, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE,

View File

@ -51,6 +51,7 @@ constant bool PS_NO_COLOR1 [[function_constant(GSMTLConstantIndex_PS_NO
constant bool PS_ONLY_ALPHA [[function_constant(GSMTLConstantIndex_PS_ONLY_ALPHA)]]; constant bool PS_ONLY_ALPHA [[function_constant(GSMTLConstantIndex_PS_ONLY_ALPHA)]];
constant uint PS_CHANNEL [[function_constant(GSMTLConstantIndex_PS_CHANNEL)]]; constant uint PS_CHANNEL [[function_constant(GSMTLConstantIndex_PS_CHANNEL)]];
constant uint PS_DITHER [[function_constant(GSMTLConstantIndex_PS_DITHER)]]; constant uint PS_DITHER [[function_constant(GSMTLConstantIndex_PS_DITHER)]];
constant uint PS_DITHER_ADJUST [[function_constant(GSMTLConstantIndex_PS_DITHER_ADJUST)]];
constant bool PS_ZCLAMP [[function_constant(GSMTLConstantIndex_PS_ZCLAMP)]]; constant bool PS_ZCLAMP [[function_constant(GSMTLConstantIndex_PS_ZCLAMP)]];
constant bool PS_TCOFFSETHACK [[function_constant(GSMTLConstantIndex_PS_TCOFFSETHACK)]]; constant bool PS_TCOFFSETHACK [[function_constant(GSMTLConstantIndex_PS_TCOFFSETHACK)]];
constant bool PS_URBAN_CHAOS_HLE [[function_constant(GSMTLConstantIndex_PS_URBAN_CHAOS_HLE)]]; constant bool PS_URBAN_CHAOS_HLE [[function_constant(GSMTLConstantIndex_PS_URBAN_CHAOS_HLE)]];
@ -842,7 +843,7 @@ struct PSMain
C = float4((uint4(int4(C)) & (cb.fbmask ^ 0xff)) | (uint4(current_color * 255.5) & cb.fbmask)); C = float4((uint4(int4(C)) & (cb.fbmask ^ 0xff)) | (uint4(current_color * 255.5) & cb.fbmask));
} }
void ps_dither(thread float4& C) void ps_dither(thread float4& C, float alpha_blend)
{ {
if (PS_DITHER == 0) if (PS_DITHER == 0)
return; return;
@ -851,7 +852,13 @@ struct PSMain
fpos = ushort2(in.p.xy); fpos = ushort2(in.p.xy);
else else
fpos = ushort2(in.p.xy * float2(cb.scale_factor.y)); fpos = ushort2(in.p.xy * float2(cb.scale_factor.y));
float value = cb.dither_matrix[fpos.y & 3][fpos.x & 3];; float value = cb.dither_matrix[fpos.y & 3][fpos.x & 3];
// The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend
// so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither.
if (PS_DITHER_ADJUST)
value *= alpha_blend > 0.f ? min(1.f / alpha_blend, 1.f) : 1.f;
if (PS_ROUND_INV) if (PS_ROUND_INV)
C.rgb -= value; C.rgb -= value;
else else
@ -877,7 +884,7 @@ struct PSMain
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
// GS: Color = 1, Alpha = 255 => output 1 // GS: Color = 1, Alpha = 255 => output 1
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0) if (PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER))
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
C.rgb = float3(short3(C.rgb) & 0xF8); C.rgb = float3(short3(C.rgb) & 0xF8);
else if (PS_COLCLIP || PS_HDR) else if (PS_COLCLIP || PS_HDR)
@ -1113,7 +1120,7 @@ struct PSMain
} }
} }
ps_dither(C); ps_dither(C, alpha_blend.a);
// Color clamp/wrap needs to be done after sw blending and dithering // Color clamp/wrap needs to be done after sw blending and dithering
ps_color_clamp_wrap(C); ps_color_clamp_wrap(C);

View File

@ -1373,6 +1373,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask) + fmt::format("#define PS_FBMASK {}\n", sel.fbmask)
+ fmt::format("#define PS_HDR {}\n", sel.hdr) + fmt::format("#define PS_HDR {}\n", sel.hdr)
+ fmt::format("#define PS_DITHER {}\n", sel.dither) + fmt::format("#define PS_DITHER {}\n", sel.dither)
+ fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust)
+ fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp) + fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp)
+ fmt::format("#define PS_BLEND_MIX {}\n", sel.blend_mix) + fmt::format("#define PS_BLEND_MIX {}\n", sel.blend_mix)
+ fmt::format("#define PS_ROUND_INV {}\n", sel.round_inv) + fmt::format("#define PS_ROUND_INV {}\n", sel.round_inv)

View File

@ -4788,6 +4788,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_FBMASK", sel.fbmask); AddMacro(ss, "PS_FBMASK", sel.fbmask);
AddMacro(ss, "PS_HDR", sel.hdr); AddMacro(ss, "PS_HDR", sel.hdr);
AddMacro(ss, "PS_DITHER", sel.dither); AddMacro(ss, "PS_DITHER", sel.dither);
AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust);
AddMacro(ss, "PS_ZCLAMP", sel.zclamp); AddMacro(ss, "PS_ZCLAMP", sel.zclamp);
AddMacro(ss, "PS_PABE", sel.pabe); AddMacro(ss, "PS_PABE", sel.pabe);
AddMacro(ss, "PS_SCANMSK", sel.scanmsk); AddMacro(ss, "PS_SCANMSK", sel.scanmsk);

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the /// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache. /// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 39; static constexpr u32 SHADER_CACHE_VERSION = 40;