GS/HW: Allow blending on normal shuffles

This commit is contained in:
refractionpcsx2 2024-01-07 10:39:13 +00:00
parent 4cd385dbff
commit 01842a3c6b
6 changed files with 271 additions and 151 deletions

View File

@ -742,6 +742,25 @@ float4 ps_color(PS_INPUT input)
float4 T = sample_color(st, input.t.w);
#endif
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_before = uint4(T);
if (PS_READ_BA)
{
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
T.a = float(denorm_c_before.a & 0x80);
}
else
{
T.r = float((denorm_c_before.r << 3) & 0xF8);
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
}
}
float4 C = tfx(T, input.c);
atst(C);
@ -925,48 +944,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
discard;
}
if (PS_SHUFFLE)
{
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
else
C.ga = C.rg;
}
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
if (denorm_c.a & 0x80u)
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
else
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
{
C.rb = C.bb;
if (denorm_c.a & 0x80u)
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
else
{
C.rb = C.rr;
if (denorm_c.g & 0x80u)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
}
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
@ -1023,6 +1000,63 @@ PS_OUTPUT ps_main(PS_INPUT input)
ps_blend(C, alpha_blend, input.p.xy);
if (PS_SHUFFLE)
{
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_after = uint4(C);
if (PS_READ_BA)
{
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
}
else
{
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
}
}
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
else
C.ga = C.rg;
}
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
if (denorm_c.a & 0x80u)
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
else
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
{
C.rb = C.bb;
if (denorm_c.a & 0x80u)
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
else
{
C.rb = C.rr;
if (denorm_c.g & 0x80u)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
}
ps_dither(C.rgb, input.p.xy);
// Color clamp/wrap needs to be done after sw blending and dithering

View File

@ -687,6 +687,21 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
uvec4 denorm_c_before = uvec4(T);
#if PS_READ_BA
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
T.a = float(denorm_c_before.a & 0x80);
#else
T.r = float((denorm_c_before.r << 3) & 0xF8);
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
#endif
#endif
vec4 C = tfx(T, PSin.c);
atst(C);
@ -937,7 +952,56 @@ void ps_main()
vec4 C = ps_color();
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
#if PS_FIXED_ONE_A
C.a = 128.0f;
#endif
#if SW_AD_TO_HW
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
vec4 alpha_blend = vec4(RT.a / 128.0f);
#else
vec4 alpha_blend = vec4(C.a / 128.0f);
#endif
// Correct the ALPHA value based on the output format
#if (PS_DST_FMT == FMT_16)
float A_one = 128.0f; // alpha output will be 0x80
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
if(C.a < 128.0f) C.a += 128.0f;
#endif
// Get first primitive that will write a failling alpha value
#if PS_DATE == 1
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#elif PS_DATE == 2
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#endif
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
uvec4 denorm_c_after = uvec4(C);
#if PS_READ_BA
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#else
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#endif
#endif
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
@ -991,43 +1055,6 @@ void ps_main()
#endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
#if PS_FIXED_ONE_A
C.a = 128.0f;
#endif
#if SW_AD_TO_HW
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
vec4 alpha_blend = vec4(RT.a / 128.0f);
#else
vec4 alpha_blend = vec4(C.a / 128.0f);
#endif
// Correct the ALPHA value based on the output format
#if (PS_DST_FMT == FMT_16)
float A_one = 128.0f; // alpha output will be 0x80
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0)
if(C.a < 128.0f) C.a += 128.0f;
#endif
// Get first primitive that will write a failling alpha value
#if PS_DATE == 1
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#elif PS_DATE == 2
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
return;
#endif
ps_blend(C, alpha_blend);
ps_dither(C.rgb);
// Color clamp/wrap needs to be done after sw blending and dithering

View File

@ -933,6 +933,21 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
uvec4 denorm_c_before = uvec4(T);
#if PS_READ_BA
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
T.a = float(denorm_c_before.a & 0x80);
#else
T.r = float((denorm_c_before.r << 3) & 0xF8);
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
#endif
#endif
vec4 C = tfx(T, vsIn.c);
atst(C);
@ -1184,40 +1199,6 @@ void main()
vec4 C = ps_color();
#if PS_SHUFFLE
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
// Write RB part. Mask will take care of the correct destination
#elif PS_READ_BA
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
@ -1254,9 +1235,53 @@ void main()
o_col0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF);
#else
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
uvec4 denorm_c_after = uvec4(C);
#if PS_READ_BA
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#else
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#endif
#endif
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
// Write RB part. Mask will take care of the correct destination
#elif PS_READ_BA
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_SHUFFLE_SAME
#endif // PS_SHUFFLE
ps_dither(C.rgb);
// Color clamp/wrap needs to be done after sw blending and dithering

View File

@ -5169,7 +5169,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
}
bool blending_alpha_pass = false;
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && (m_conf.colormask.wrgba & 0x7))
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
{
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass);
}

View File

@ -807,6 +807,25 @@ struct PSMain
else
T = sample_color(st);
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_before = uint4(T);
if (PS_READ_BA)
{
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
T.a = float(denorm_c_before.a & 0x80);
}
else
{
T.r = float((denorm_c_before.r << 3) & 0xF8);
T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0));
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
}
}
float4 C = tfx(T, IIP ? in.c : in.fc);
if (!atst(C))
discard_fragment();
@ -1005,41 +1024,6 @@ struct PSMain
float4 C = ps_color();
if (PS_SHUFFLE)
{
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
else
C.ga = C.rg;
}
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
if (denorm_c.a & 0x80)
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
{
C.rb = C.bb;
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
else
{
C.rb = C.rr;
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
}
// Must be done before alpha correction
// AA (Fixed one) will output a coverage of 1.0 as alpha
@ -1077,6 +1061,56 @@ struct PSMain
ps_blend(C, alpha_blend);
if (PS_SHUFFLE)
{
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_after = uint4(C);
if (PS_READ_BA)
{
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
}
else
{
C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
}
}
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
else
C.ga = C.rg;
}
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
if (denorm_c.a & 0x80)
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
{
C.rb = C.bb;
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
else
{
C.rb = C.rr;
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
}
ps_dither(C);
// Color clamp/wrap needs to be done after sw blending and dithering

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 37;
static constexpr u32 SHADER_CACHE_VERSION = 38;