mirror of https://github.com/PCSX2/pcsx2.git
GS: Add constant adjustment in blend mix when reverse subtracting.
This commit is contained in:
parent
2e63a4c037
commit
1b18e02fe0
|
@ -776,20 +776,22 @@ void ps_blend(inout float4 Color, inout float As, float2 pos_xy)
|
|||
|
||||
// As/Af clamp alpha for Blend mix
|
||||
// We shouldn't clamp blend mix with clr1 as we want alpha higher
|
||||
if (PS_BLEND_MIX && PS_CLR_HW != 1)
|
||||
if (PS_BLEND_MIX > 0 && PS_CLR_HW != 1)
|
||||
C = min(C, 1.0f);
|
||||
|
||||
if (PS_BLEND_A == PS_BLEND_B)
|
||||
Color.rgb = D;
|
||||
else if (PS_BLEND_MIX)
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
else if (PS_BLEND_MIX == 2)
|
||||
Color.rgb = ((A - B) * C + D) + (124.0f/256.0f);
|
||||
else if (PS_BLEND_MIX == 1)
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
else
|
||||
Color.rgb = trunc(((A - B) * C) + D);
|
||||
|
||||
|
|
|
@ -755,20 +755,22 @@ void ps_blend(inout vec4 Color, inout float As)
|
|||
|
||||
// As/Af clamp alpha for Blend mix
|
||||
// We shouldn't clamp blend mix with clr1 as we want alpha higher
|
||||
#if PS_BLEND_MIX && PS_CLR_HW != 1
|
||||
#if PS_BLEND_MIX > 0 && PS_CLR_HW != 1
|
||||
C = min(C, 1.0f);
|
||||
#endif
|
||||
|
||||
#if PS_BLEND_A == PS_BLEND_B
|
||||
Color.rgb = D;
|
||||
#elif PS_BLEND_MIX
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
#elif PS_BLEND_MIX == 2
|
||||
Color.rgb = ((A - B) * C + D) + (124.0f/256.0f);
|
||||
#elif PS_BLEND_MIX == 1
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
#else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
|
|
|
@ -1054,21 +1054,23 @@ void ps_blend(inout vec4 Color, inout float As)
|
|||
|
||||
// As/Af clamp alpha for Blend mix
|
||||
// We shouldn't clamp blend mix with clr1 as we want alpha higher
|
||||
#if PS_BLEND_MIX && PS_CLR_HW != 1
|
||||
#if PS_BLEND_MIX > 0 && PS_CLR_HW != 1
|
||||
C = min(C, 1.0f);
|
||||
#endif
|
||||
|
||||
#if PS_BLEND_A == PS_BLEND_B
|
||||
Color.rgb = D;
|
||||
#elif PS_BLEND_MIX
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
#elif PS_BLEND_MIX == 2
|
||||
Color.rgb = ((A - B) * C + D) + (124.0f/256.0f);
|
||||
#elif PS_BLEND_MIX == 1
|
||||
Color.rgb = ((A - B) * C + D) - (124.0f/256.0f);
|
||||
#else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
#endif
|
||||
|
|
|
@ -268,7 +268,7 @@ struct alignas(16) GSHWDrawConfig
|
|||
u32 clr_hw : 3;
|
||||
u32 hdr : 1;
|
||||
u32 colclip : 1;
|
||||
u32 blend_mix : 1;
|
||||
u32 blend_mix : 2;
|
||||
u32 pabe : 1;
|
||||
u32 no_color : 1; // disables color output entirely (depth only)
|
||||
u32 no_color1 : 1; // disables second color output (when unnecessary)
|
||||
|
|
|
@ -2712,7 +2712,7 @@ void GSRendererHW::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER, bool&
|
|||
{
|
||||
// For mixed blend, the source blend is done in the shader (so we use CONST_ONE as a factor).
|
||||
m_conf.blend = {true, GSDevice::CONST_ONE, blend.dst, blend.op, m_conf.ps.blend_c == 2, ALPHA.FIX};
|
||||
m_conf.ps.blend_mix = 1;
|
||||
m_conf.ps.blend_mix = (blend.op == GSDevice::OP_REV_SUBTRACT) ? 2 : 1;
|
||||
|
||||
// Elide DSB colour output if not used by dest.
|
||||
m_conf.ps.no_color1 |= !GSDevice::IsDualSourceBlendFactor(blend.dst);
|
||||
|
|
|
@ -1210,7 +1210,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
|
|||
setFnConstantI(m_fn_constants, pssel.clr_hw, GSMTLConstantIndex_PS_CLR_HW);
|
||||
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
|
||||
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
|
||||
setFnConstantB(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
|
||||
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
|
||||
setFnConstantB(m_fn_constants, pssel.fixed_one_a, GSMTLConstantIndex_PS_FIXED_ONE_A);
|
||||
setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE);
|
||||
setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR);
|
||||
|
|
|
@ -48,7 +48,7 @@ constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BL
|
|||
constant uint PS_CLR_HW [[function_constant(GSMTLConstantIndex_PS_CLR_HW)]];
|
||||
constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]];
|
||||
constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]];
|
||||
constant bool PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
|
||||
constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
|
||||
constant bool PS_FIXED_ONE_A [[function_constant(GSMTLConstantIndex_PS_FIXED_ONE_A)]];
|
||||
constant bool PS_PABE [[function_constant(GSMTLConstantIndex_PS_PABE)]];
|
||||
constant bool PS_NO_COLOR [[function_constant(GSMTLConstantIndex_PS_NO_COLOR)]];
|
||||
|
@ -708,7 +708,7 @@ struct PSMain
|
|||
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
|
||||
// GS: Color = 1, Alpha = 255 => output 1
|
||||
// GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875
|
||||
if (PS_DFMT == FMT_16 && (PS_HDR || !PS_BLEND_MIX))
|
||||
if (PS_DFMT == FMT_16 && (PS_HDR || PS_BLEND_MIX == 0))
|
||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||
C.rgb = float3(short3(C.rgb) & 0xF8);
|
||||
else if (PS_COLCLIP && !PS_HDR)
|
||||
|
@ -745,19 +745,21 @@ struct PSMain
|
|||
|
||||
// As/Af clamp alpha for Blend mix
|
||||
// We shouldn't clamp blend mix with clr1 as we want alpha higher
|
||||
if (PS_BLEND_MIX && PS_CLR_HW != 1)
|
||||
if (PS_BLEND_MIX > 0 && PS_CLR_HW != 1)
|
||||
C = min(C, 1.f);
|
||||
|
||||
if (PS_BLEND_A == PS_BLEND_B)
|
||||
Color.rgb = D;
|
||||
else if (PS_BLEND_MIX)
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
// In blend_mix, HW adds on some alpha factor * dst.
|
||||
// Truncating here wouldn't quite get the right result because it prevents the <1 bit here from combining with a <1 bit in dst to form a ≥1 amount that pushes over the truncation.
|
||||
// Instead, apply an offset to convert HW's round to a floor.
|
||||
// Since alpha is in 1/128 increments, subtracting (0.5 - 0.5/128 == 127/256) would get us what we want if GPUs blended in full precision.
|
||||
// But they don't. Details here: https://github.com/PCSX2/pcsx2/pull/6809#issuecomment-1211473399
|
||||
// Based on the scripts at the above link, the ideal choice for Intel GPUs is 126/256, AMD 120/256. Nvidia is a lost cause.
|
||||
// 124/256 seems like a reasonable compromise, providing the correct answer 99.3% of the time on Intel (vs 99.6% for 126/256), and 97% of the time on AMD (vs 97.4% for 120/256).
|
||||
else if (PS_BLEND_MIX == 2)
|
||||
Color.rgb = ((A - B) * C + D) + (124.f/256.f);
|
||||
else if (PS_BLEND_MIX == 1)
|
||||
Color.rgb = ((A - B) * C + D) - (124.f/256.f);
|
||||
else
|
||||
Color.rgb = trunc((A - B) * C + D);
|
||||
|
|
Loading…
Reference in New Issue