diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index d4580c3cf1..3ecdff056a 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -60,6 +60,7 @@ #define PS_FIXED_ONE_A 0 #define PS_PABE 0 #define PS_DITHER 0 +#define PS_DITHER_ADJUST 0 #define PS_ZCLAMP 0 #define PS_SCANMSK 0 #define PS_AUTOMATIC_LOD 0 @@ -783,7 +784,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy) } } -void ps_dither(inout float3 C, float2 pos_xy) +void ps_dither(inout float3 C, float2 pos_xy, float alpha_blend) { if (PS_DITHER) { @@ -795,6 +796,12 @@ void ps_dither(inout float3 C, float2 pos_xy) fpos = int2(pos_xy * RcpScaleFactor); float value = DitherMatrix[fpos.x & 3][fpos.y & 3]; + + // The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend + // so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither. + if (PS_DITHER_ADJUST) + value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f; + if (PS_ROUND_INV) C -= value; else @@ -816,7 +823,7 @@ void ps_color_clamp_wrap(inout float3 C) C = clamp(C, (float3)0.0f, (float3)255.0f); // In 16 bits format, only 5 bits of color are used. It impacts shadows computation of Castlevania - if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0) + if (PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER)) C = (float3)((int3)C & (int3)0xF8); else if (PS_COLCLIP == 1 || PS_HDR == 1) C = (float3)((int3)C & (int3)0xFF); @@ -1061,7 +1068,7 @@ PS_OUTPUT ps_main(PS_INPUT input) } } - ps_dither(C.rgb, input.p.xy); + ps_dither(C.rgb, input.p.xy, alpha_blend.a); // Color clamp/wrap needs to be done after sw blending and dithering ps_color_clamp_wrap(C.rgb); diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index d49c1cf628..1b0655241f 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -720,7 +720,7 @@ void ps_fbmask(inout vec4 C) #endif } -void ps_dither(inout vec3 C) +void ps_dither(inout vec3 C, float alpha_blend) { #if PS_DITHER #if PS_DITHER == 2 @@ -729,6 +729,13 @@ void ps_dither(inout vec3 C) ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor); #endif float value = DitherMatrix[fpos.y&3][fpos.x&3]; + + // The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend + // so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither. + #if PS_DITHER_ADJUST + value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f; + #endif + #if PS_ROUND_INV C -= value; #else @@ -759,7 +766,7 @@ void ps_color_clamp_wrap(inout vec3 C) // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // GS: Color = 1, Alpha = 255 => output 1 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 -#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 +#if PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C = vec3(ivec3(C) & ivec3(0xF8)); #elif PS_COLCLIP == 1 || PS_HDR == 1 @@ -1055,7 +1062,7 @@ void ps_main() #endif // PS_SHUFFLE_SAME #endif // PS_SHUFFLE - ps_dither(C.rgb); + ps_dither(C.rgb, alpha_blend.a); // Color clamp/wrap needs to be done after sw blending and dithering ps_color_clamp_wrap(C.rgb); diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 1850ddc191..9b0a90fd57 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -284,6 +284,7 @@ void main() #define PS_FIXED_ONE_A 0 #define PS_PABE 0 #define PS_DITHER 0 +#define PS_DITHER_ADJUST 0 #define PS_ZCLAMP 0 #define PS_FEEDBACK_LOOP 0 #define PS_TEX_IS_FB 0 @@ -969,7 +970,7 @@ void ps_fbmask(inout vec4 C) #endif } -void ps_dither(inout vec3 C) +void ps_dither(inout vec3 C, float alpha_blend) { #if PS_DITHER ivec2 fpos; @@ -981,6 +982,13 @@ void ps_dither(inout vec3 C) #endif float value = DitherMatrix[fpos.y & 3][fpos.x & 3]; + + // The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend + // so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither. + #if PS_DITHER_ADJUST + value *= alpha_blend > 0.0f ? min(1.0f / alpha_blend, 1.0f) : 1.0f; + #endif + #if PS_ROUND_INV C -= value; #else @@ -1011,7 +1019,7 @@ void ps_color_clamp_wrap(inout vec3 C) // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // GS: Color = 1, Alpha = 255 => output 1 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 -#if PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0 +#if PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C = vec3(ivec3(C) & ivec3(0xF8)); #elif PS_COLCLIP == 1 || PS_HDR == 1 @@ -1286,7 +1294,7 @@ void main() #endif // PS_SHUFFLE_SAME #endif // PS_SHUFFLE - ps_dither(C.rgb); + ps_dither(C.rgb, alpha_blend.a); // Color clamp/wrap needs to be done after sw blending and dithering ps_color_clamp_wrap(C.rgb); diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index bc6b626588..576a17a28d 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -329,6 +329,7 @@ struct alignas(16) GSHWDrawConfig // Dithering u32 dither : 2; + u32 dither_adjust : 1; // Depth clamp u32 zclamp : 1; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 22f024bea6..8c8d62045c 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1698,6 +1698,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a); sm.AddMacro("PS_PABE", sel.pabe); sm.AddMacro("PS_DITHER", sel.dither); + sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust); sm.AddMacro("PS_ZCLAMP", sel.zclamp); sm.AddMacro("PS_SCANMSK", sel.scanmsk); sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 0091578bf0..75cc3883f9 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -2824,6 +2824,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_FIXED_ONE_A", sel.fixed_one_a); sm.AddMacro("PS_PABE", sel.pabe); sm.AddMacro("PS_DITHER", sel.dither); + sm.AddMacro("PS_DITHER_ADJUST", sel.dither_adjust); sm.AddMacro("PS_ZCLAMP", sel.zclamp); sm.AddMacro("PS_SCANMSK", sel.scanmsk); sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 0206e90cea..d196f6a893 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -3931,7 +3931,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT blend_mix &= !sw_blending; sw_blending |= blend_mix; // Disable dithering on blend mix. - m_conf.ps.dither &= !blend_mix; + m_conf.ps.dither &= !blend_mix || (m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128); [[fallthrough]]; case AccBlendLevel::Minimum: break; @@ -3985,7 +3985,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT blend_mix &= !sw_blending; sw_blending |= blend_mix; // Disable dithering on blend mix. - m_conf.ps.dither &= !blend_mix; + m_conf.ps.dither &= !blend_mix || (m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128); [[fallthrough]]; case AccBlendLevel::Minimum: break; @@ -5381,6 +5381,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_ps.DitherMatrix[1] = GSVector4(DIMX.DM10, DIMX.DM11, DIMX.DM12, DIMX.DM13); m_conf.cb_ps.DitherMatrix[2] = GSVector4(DIMX.DM20, DIMX.DM21, DIMX.DM22, DIMX.DM23); m_conf.cb_ps.DitherMatrix[3] = GSVector4(DIMX.DM30, DIMX.DM31, DIMX.DM32, DIMX.DM33); + m_conf.ps.dither_adjust = m_conf.ps.blend_a == 0 && m_conf.ps.blend_b == 1 && m_conf.ps.blend_c == 0 && GetAlphaMinMax().max <= 128; } if (PRIM->FGE) diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 30d529a021..5d8c33ba94 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1821,6 +1821,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA); setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL); setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER); + setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST); setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP); setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK); setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index c471fff4f8..f763c0e4b7 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -192,6 +192,7 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_ONLY_ALPHA, GSMTLConstantIndex_PS_CHANNEL, GSMTLConstantIndex_PS_DITHER, + GSMTLConstantIndex_PS_DITHER_ADJUST, GSMTLConstantIndex_PS_ZCLAMP, GSMTLConstantIndex_PS_TCOFFSETHACK, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE, diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index d0758d44ce..102802f826 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -51,6 +51,7 @@ constant bool PS_NO_COLOR1 [[function_constant(GSMTLConstantIndex_PS_NO constant bool PS_ONLY_ALPHA [[function_constant(GSMTLConstantIndex_PS_ONLY_ALPHA)]]; constant uint PS_CHANNEL [[function_constant(GSMTLConstantIndex_PS_CHANNEL)]]; constant uint PS_DITHER [[function_constant(GSMTLConstantIndex_PS_DITHER)]]; +constant uint PS_DITHER_ADJUST [[function_constant(GSMTLConstantIndex_PS_DITHER_ADJUST)]]; constant bool PS_ZCLAMP [[function_constant(GSMTLConstantIndex_PS_ZCLAMP)]]; constant bool PS_TCOFFSETHACK [[function_constant(GSMTLConstantIndex_PS_TCOFFSETHACK)]]; constant bool PS_URBAN_CHAOS_HLE [[function_constant(GSMTLConstantIndex_PS_URBAN_CHAOS_HLE)]]; @@ -842,7 +843,7 @@ struct PSMain C = float4((uint4(int4(C)) & (cb.fbmask ^ 0xff)) | (uint4(current_color * 255.5) & cb.fbmask)); } - void ps_dither(thread float4& C) + void ps_dither(thread float4& C, float alpha_blend) { if (PS_DITHER == 0) return; @@ -851,7 +852,13 @@ struct PSMain fpos = ushort2(in.p.xy); else fpos = ushort2(in.p.xy * float2(cb.scale_factor.y)); - float value = cb.dither_matrix[fpos.y & 3][fpos.x & 3];; + float value = cb.dither_matrix[fpos.y & 3][fpos.x & 3]; + + // The idea here is we add on the dither amount adjusted by the alpha before it goes to the hw blend + // so after the alpha blend the resulting value should be the same as (Cs - Cd) * As + Cd + Dither. + if (PS_DITHER_ADJUST) + value *= alpha_blend > 0.f ? min(1.f / alpha_blend, 1.f) : 1.f; + if (PS_ROUND_INV) C.rgb -= value; else @@ -877,7 +884,7 @@ struct PSMain // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // GS: Color = 1, Alpha = 255 => output 1 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 - if (PS_DST_FMT == FMT_16 && PS_BLEND_MIX == 0) + if (PS_DST_FMT == FMT_16 && (PS_BLEND_MIX == 0 || PS_DITHER)) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania C.rgb = float3(short3(C.rgb) & 0xF8); else if (PS_COLCLIP || PS_HDR) @@ -1113,7 +1120,7 @@ struct PSMain } } - ps_dither(C); + ps_dither(C, alpha_blend.a); // Color clamp/wrap needs to be done after sw blending and dithering ps_color_clamp_wrap(C); diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index e85b3999a8..1bb4fd2575 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1373,6 +1373,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_FBMASK {}\n", sel.fbmask) + fmt::format("#define PS_HDR {}\n", sel.hdr) + fmt::format("#define PS_DITHER {}\n", sel.dither) + + fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust) + fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp) + fmt::format("#define PS_BLEND_MIX {}\n", sel.blend_mix) + fmt::format("#define PS_ROUND_INV {}\n", sel.round_inv) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 589c634d81..a01607be92 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -4788,6 +4788,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_FBMASK", sel.fbmask); AddMacro(ss, "PS_HDR", sel.hdr); AddMacro(ss, "PS_DITHER", sel.dither); + AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust); AddMacro(ss, "PS_ZCLAMP", sel.zclamp); AddMacro(ss, "PS_PABE", sel.pabe); AddMacro(ss, "PS_SCANMSK", sel.scanmsk); diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index b3c320e991..86c99baff6 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 39; +static constexpr u32 SHADER_CACHE_VERSION = 40;