GS/HW: Correct TEXA behaviour on shuffles

This commit is contained in:
refractionpcsx2 2024-06-04 13:59:36 +01:00
parent 986a9773e6
commit d34f359621
6 changed files with 80 additions and 159 deletions

View File

@ -766,7 +766,7 @@ float4 ps_color(PS_INPUT input)
float4 T = sample_color(st, input.t.w);
#endif
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ)
@ -783,6 +783,8 @@ float4 ps_color(PS_INPUT input)
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
}
T.a = (T.a >= 127.5f ? TA.y : !PS_AEM || any(int3(T.rgb) & 0xF8) ? TA.x : 0) * 255.0f;
}
float4 C = tfx(T, input.c);
@ -1057,7 +1059,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
if (PS_SHUFFLE)
{
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
@ -1072,12 +1074,12 @@ PS_OUTPUT ps_main(PS_INPUT input)
}
}
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
uint4 denorm_c = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
else
@ -1086,6 +1088,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
if (denorm_c.a & 0x80u)
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
@ -1097,44 +1101,22 @@ PS_OUTPUT ps_main(PS_INPUT input)
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
float g_temp = C.g;
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
C.g = C.a;
C.a = g_temp;
}
else if(PS_PROCESS_BA & SHUFFLE_READ)
{
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
C.ga = C.aa;
}
else
{
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
C.ga = C.gg;
}
}
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
{
if ((denorm_c.g & 0x80u) != 0u)
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
}
}
ps_dither(C.rgb, alpha_blend.a, input.p.xy);

View File

@ -686,7 +686,7 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME
uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
@ -699,6 +699,8 @@ vec4 ps_color()
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
#endif
T.a = ((T.a >= 127.5f) ? TA.y : ((PS_AEM == 0 || any(bvec3(ivec3(T.rgb) & ivec3(0xF8)))) ? TA.x : 0.0f)) * 255.0f;
#endif
vec4 C = tfx(T, PSin.c);
@ -1042,7 +1044,7 @@ void ps_main()
#if PS_SHUFFLE
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME
uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
@ -1053,59 +1055,39 @@ void ps_main()
#endif
#endif
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_PROCESS_BA & SHUFFLE_READ)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
uvec4 denorm_c = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_PROCESS_BA
#else // PS_SHUFFLE_ACROSS
if ((denorm_c.g & 0x80u) != 0u)
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
#endif // PS_SHUFFLE_ACROSS
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
float g_temp = C.g;
C.g = C.a;
C.a = g_temp;
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
C.ga = C.aa;
#else
C.rb = C.rr;
C.ga = C.gg;
#endif // PS_PROCESS_BA
#endif // PS_SHUFFLE_ACROSS
#endif // PS_SHUFFLE
ps_dither(C.rgb, alpha_blend.a);

View File

@ -953,7 +953,7 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if PS_SHUFFLE && !PS_READ16_SRC && !PS_SHUFFLE_SAME
uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
@ -966,6 +966,8 @@ vec4 ps_color()
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
#endif
T.a = ((T.a >= 127.5f) ? TA.y : ((PS_AEM == 0 || any(bvec3(ivec3(T.rgb) & ivec3(0xF8)))) ? TA.x : 0.0f)) * 255.0f;
#endif
vec4 C = tfx(T, vsIn.c);
@ -1307,7 +1309,7 @@ void main()
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if (SW_BLEND || PS_TFX != 1) && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME
uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
@ -1318,59 +1320,37 @@ void main()
#endif
#endif
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_PROCESS_BA & SHUFFLE_READ)
uvec4 denorm_c = uvec4(C);
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
#endif
// Copy of a 16bit source in to this target
#elif PS_READ16_SRC
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
// Write RB part. Mask will take care of the correct destination
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
float g_temp = C.g;
C.g = C.a;
C.a = g_temp;
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
C.ga = C.aa;
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
C.ga = C.gg;
#endif // PS_PROCESS_BA
#else // PS_SHUFFLE_ACROSS
if ((denorm_c.g & 0x80u) != 0u)
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
#endif // PS_SHUFFLE_ACROSS
#endif // PS_SHUFFLE

View File

@ -4743,6 +4743,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
// m_ps_sel.tex_fmt = 0; // removed as an optimization
//ASSERT(tex->m_target);
m_conf.ps.aem = TEXA.AEM;
// Require a float conversion if the texure is a depth otherwise uses Integral scaling
if (psm.depth)
@ -4753,17 +4754,11 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
// Shuffle is a 16 bits format, so aem is always required
if (m_cached_ctx.TEX0.TCC)
{
m_conf.ps.aem = TEXA.AEM;
GSVector4 ta(TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
m_conf.cb_ps.TA_MaxDepth_Af.x = ta.x;
m_conf.cb_ps.TA_MaxDepth_Af.y = ta.y;
}
else
{
m_conf.cb_ps.TA_MaxDepth_Af.x = 0;
m_conf.cb_ps.TA_MaxDepth_Af.y = 1.0f;
}
// The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea.
bilinear &= m_vt.IsLinear();

View File

@ -831,7 +831,7 @@ struct PSMain
else
T = sample_color(st);
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ)
@ -848,6 +848,8 @@ struct PSMain
T.b = float((denorm_c_before.g << 1) & 0xF8);
T.a = float(denorm_c_before.g & 0x80);
}
T.a = (T.a >= 127.5 ? cb.ta.y : !PS_AEM || any((int3(T.rgb) & 0xF8) != 0) ? cb.ta.x : 0.f) * 255.f;
}
float4 C = tfx(T, IIP ? in.c : in.fc);
@ -1125,7 +1127,7 @@ struct PSMain
if (PS_SHUFFLE)
{
if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
{
uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
@ -1140,12 +1142,11 @@ struct PSMain
}
}
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f);
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
uint4 denorm_c = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ)
C = (denorm_c.b & 0x7F) | (denorm_c.a & 0x80);
else
@ -1154,6 +1155,9 @@ struct PSMain
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f);
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7) << 5);
if (denorm_c.a & 0x80)
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
@ -1165,44 +1169,22 @@ struct PSMain
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.rb = C.br;
if ((denorm_c.a & 0x80) != 0)
C.g = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.g = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
float g_temp = C.g;
if ((denorm_c.g & 0x80) != 0)
C.a = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.a = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
C.g = C.a;
C.a = g_temp;
}
else if(PS_PROCESS_BA & SHUFFLE_READ)
{
C.rb = C.bb;
if ((denorm_c.a & 0x80) != 0)
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
C.ga = C.aa;
}
else
{
C.rb = C.rr;
if ((denorm_c.g & 0x80) != 0)
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
C.ga = C.gg;
}
}
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
{
if ((denorm_c.g & 0x80) != 0)
C.g = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.g = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
if ((denorm_c.a & 0x80) != 0)
C.a = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.a = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
}
}
ps_dither(C, alpha_blend.a);

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 49;
static constexpr u32 SHADER_CACHE_VERSION = 50;