GS/HW: Improve channel + texture shuffle detection and processing

This commit is contained in:
refractionpcsx2 2024-03-03 18:02:03 +00:00
parent 9e42bf7385
commit 4ba43b8496
14 changed files with 335 additions and 160 deletions

View File

@ -5,6 +5,10 @@
#define FMT_24 1
#define FMT_16 2
#define SHUFFLE_READ 1
#define SHUFFLE_WRITE 2
#define SHUFFLE_READWRITE 3
#ifndef VS_TME
#define VS_IIP 0
#define VS_TME 1
@ -41,7 +45,9 @@
#define PS_REGION_RECT 0
#define PS_SHUFFLE 0
#define PS_SHUFFLE_SAME 0
#define PS_READ_BA 0
#define PS_PROCESS_BA 0
#define PS_PROCESS_RG 0
#define PS_SHUFFLE_ACROSS 0
#define PS_READ16_SRC 0
#define PS_DST_FMT 0
#define PS_DEPTH_FMT 0
@ -761,10 +767,10 @@ float4 ps_color(PS_INPUT input)
float4 T = sample_color(st, input.t.w);
#endif
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_before = uint4(T);
if (PS_READ_BA)
if (PS_PROCESS_BA & SHUFFLE_READ)
{
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
@ -1028,10 +1034,10 @@ PS_OUTPUT ps_main(PS_INPUT input)
if (PS_SHUFFLE)
{
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_after = uint4(C);
if (PS_READ_BA)
if (PS_PROCESS_BA & SHUFFLE_READ)
{
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
@ -1049,7 +1055,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
if (PS_PROCESS_BA & SHUFFLE_READ)
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
else
C.ga = C.rg;
@ -1063,23 +1069,48 @@ PS_OUTPUT ps_main(PS_INPUT input)
else
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
else if (PS_SHUFFLE_ACROSS)
{
C.rb = C.bb;
if (denorm_c.a & 0x80u)
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
}
else if(PS_PROCESS_BA & SHUFFLE_READ)
{
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
else
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
{
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
}
else
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
{
C.rb = C.rr;
if (denorm_c.g & 0x80u)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
if ((denorm_c.g & 0x80u) != 0u)
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
}
}

View File

@ -7,6 +7,10 @@
#define FMT_24 1
#define FMT_16 2
#define SHUFFLE_READ 1
#define SHUFFLE_WRITE 2
#define SHUFFLE_READWRITE 3
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
// to detect bad sampling due to upscaling
//#define TEX_COORD_DEBUG
@ -695,9 +699,9 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T);
#if PS_READ_BA
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
@ -1027,9 +1031,9 @@ void ps_main()
#if PS_SHUFFLE
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
#if PS_READ_BA
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#else
@ -1043,7 +1047,7 @@ void ps_main()
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
#if (PS_PROCESS_BA & SHUFFLE_READ)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
@ -1055,40 +1059,42 @@ void ps_main()
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
// Write RB part. Mask will take care of the correct destination
#elif PS_READ_BA
C.rb = C.bb;
// FIXME precompute my_TA & 0x80
// Write GA part. Mask will take care of the correct destination
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
// c.ga = vec2(float(denorm_c.a));
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_PROCESS_BA
#else // PS_SHUFFLE_ACROSS
if ((denorm_c.g & 0x80u) != 0u)
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
if (bool(denorm_c.g & 0x80u))
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
// Nice idea but step/mix requires 4 instructions
// set / trunc / I2F / Mad
//
// float sel = step(128.0f, c.g);
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
#endif // PS_SHUFFLE_SAME
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
#endif // PS_SHUFFLE_ACROSS
#endif // PS_SHUFFLE
ps_dither(C.rgb, alpha_blend.a);

View File

@ -233,6 +233,10 @@ void main()
#define FMT_24 1
#define FMT_16 2
#define SHUFFLE_READ 1
#define SHUFFLE_WRITE 2
#define SHUFFLE_READWRITE 3
#ifndef VS_TME
#define VS_TME 1
#define VS_FST 1
@ -266,7 +270,9 @@ void main()
#define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0
#define PS_SHUFFLE_SAME 0
#define PS_READ_BA 0
#define PS_PROCESS_BA 0
#define PS_PROCESS_RG 0
#define PS_SHUFFLE_ACROSS 0
#define PS_WRITE_RG 0
#define PS_READ16_SRC 0
#define PS_DST_FMT 0
@ -945,9 +951,9 @@ vec4 ps_color()
vec4 T = sample_color(st);
#endif
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T);
#if PS_READ_BA
#if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
T.b = float((denorm_c_before.a << 1) & 0xF8);
@ -1277,9 +1283,9 @@ void main()
ps_blend(C, alpha_blend);
#if PS_SHUFFLE
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
#if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C);
#if PS_READ_BA
#if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
#else
@ -1293,7 +1299,7 @@ void main()
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
#if PS_SHUFFLE_SAME
#if (PS_READ_BA)
#if (PS_PROCESS_BA & SHUFFLE_READ)
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
#else
C.ga = C.rg;
@ -1306,19 +1312,42 @@ void main()
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
// Write RB part. Mask will take care of the correct destination
#elif PS_READ_BA
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
#elif PS_SHUFFLE_ACROSS
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
C.rb = C.br;
if ((denorm_c.a & 0x80u) != 0u)
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.g & 0x80u) != 0u)
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
#elif(PS_PROCESS_BA & SHUFFLE_READ)
C.rb = C.bb;
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
C.rb = C.rr;
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_PROCESS_BA
#else // PS_SHUFFLE_ACROSS
if ((denorm_c.g & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif // PS_SHUFFLE_SAME
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
if ((denorm_c.a & 0x80u) != 0u)
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
else
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
#endif // PS_SHUFFLE_ACROSS
#endif // PS_SHUFFLE
ps_dither(C.rgb, alpha_blend.a);

View File

@ -317,7 +317,9 @@ struct alignas(16) GSHWDrawConfig
u32 shuffle : 1;
u32 shuffle_same : 1;
u32 real16src: 1;
u32 read_ba : 1;
u32 process_ba : 2;
u32 process_rg : 2;
u32 shuffle_across : 1;
u32 write_rg : 1;
u32 fbmask : 1;

View File

@ -1679,7 +1679,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
sm.AddMacro("PS_READ_BA", sel.read_ba);
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
sm.AddMacro("PS_READ16_SRC", sel.real16src);
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);

View File

@ -2833,7 +2833,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
sm.AddMacro("PS_READ_BA", sel.read_ba);
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
sm.AddMacro("PS_READ16_SRC", sel.real16src);
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);

View File

@ -328,7 +328,7 @@ void GSRendererHW::ExpandLineIndices()
}
// Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
{
const u32 count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0];
@ -336,16 +336,22 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
// Could be drawing upside down or just back to front on the actual verts.
const GSVertex* start_verts = (v[0].XYZ.X <= v[m_vertex.tail - 2].XYZ.X) ? &v[0] : &v[m_vertex.tail - 2];
const GSVertex first_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[0] : start_verts[1];
const GSVertex second_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[1] : start_verts[0];
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
const int pos = (first_vert.XYZ.X - o.OFX) & 0xFF;
write_ba = (pos > 112 && pos < 136);
// Read texture is 8 to 16 pixels (same as above)
const float tw = static_cast<float>(1u << m_cached_ctx.TEX0.TW);
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S);
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S * 16.0f);
tex_pos &= 0xFF;
shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8;
const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8;
process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0;
process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0;
// "same group" means it can read blue and write alpha using C32 tricks
read_ba = (tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000);
process_ba |= ((tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000) || full_width) ? SHUFFLE_READ : 0;
process_rg |= (!(process_ba & SHUFFLE_READ) || full_width) ? SHUFFLE_READ : 0;
// Another way of selecting whether to read RG/BA is to use region repeat.
// Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015.
@ -356,9 +362,29 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
m_cached_ctx.CLAMP.MAXV);
// offset coordinates swap around RG/BA.
const bool invert = read_ba; // (tex_pos > 112 && tex_pos < 144), i.e. 8 fixed point
const u32 minu = (m_cached_ctx.CLAMP.MINU & 8) ^ (invert ? 8 : 0);
read_ba = ((minu & 8) != 0);
const u32 maxu = (m_cached_ctx.CLAMP.MAXU & 8);
const u32 minu = (m_cached_ctx.CLAMP.MINU & 8);
if (maxu)
{
process_ba |= SHUFFLE_READ;
process_rg &= ~SHUFFLE_READ;
if (!PRIM->ABE && (process_rg & SHUFFLE_WRITE))
{
process_ba &= ~SHUFFLE_WRITE;
shuffle_across = true;
}
}
else if (minu == 0)
{
process_rg |= SHUFFLE_READ;
process_ba &= ~SHUFFLE_READ;
if (!PRIM->ABE && (process_ba & SHUFFLE_WRITE))
{
process_rg &= ~SHUFFLE_WRITE;
shuffle_across = true;
}
}
}
if (m_split_texture_shuffle_pages > 0)
@ -418,7 +444,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
// If a game does the texture and frame doubling differently, they can burn in hell.
if (!m_copy_16bit_to_target_shuffle && m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block())
{
unsigned int max_tex_draw_width = std::min(static_cast<int>(m_vt.m_max.t.x + (!read_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW);
unsigned int max_tex_draw_width = std::min(static_cast<int>(m_vt.m_max.t.x + (!process_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW);
const unsigned int clamp_minu = m_context->CLAMP.MINU;
const unsigned int clamp_maxu = m_context->CLAMP.MAXU;
@ -473,15 +499,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
const int reversed_U = (v[0].U > v[1].U) ? 1 : 0;
for (u32 i = 0; i < count; i += 2)
{
if (write_ba)
v[i + reversed_pos].XYZ.X -= 128u;
else
v[i + 1 - reversed_pos].XYZ.X += 128u;
if (read_ba)
v[i + reversed_U].U -= 128u;
else
v[i + 1 - reversed_U].U += 128u;
if (!full_width)
{
if (process_ba & SHUFFLE_WRITE)
v[i + reversed_pos].XYZ.X -= 128u;
else
v[i + 1 - reversed_pos].XYZ.X += 128u;
if (process_ba & SHUFFLE_READ)
v[i + reversed_U].U -= 128u;
else
v[i + 1 - reversed_U].U += 128u;
}
if (half_bottom_vert)
{
@ -530,15 +560,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
for (u32 i = 0; i < count; i += 2)
{
if (write_ba)
v[i + reversed_pos].XYZ.X -= 128u;
else
v[i + 1 - reversed_pos].XYZ.X += 128u;
if (read_ba)
v[i + reversed_S].ST.S -= offset_8pix;
else
v[i + 1 - reversed_S].ST.S += offset_8pix;
if (!full_width)
{
if (process_ba & SHUFFLE_WRITE)
v[i + reversed_pos].XYZ.X -= 128u;
else
v[i + 1 - reversed_pos].XYZ.X += 128u;
if (process_ba & SHUFFLE_READ)
v[i + reversed_S].ST.S -= offset_8pix;
else
v[i + 1 - reversed_S].ST.S += offset_8pix;
}
if (half_bottom_vert)
{
@ -579,18 +613,21 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
}
}
// Update vertex trace too. Avoid issue to compute bounding box
if (write_ba)
m_vt.m_min.p.x -= 8.0f;
else
m_vt.m_max.p.x += 8.0f;
if (!m_same_group_texture_shuffle)
if (!full_width)
{
if (read_ba)
m_vt.m_min.t.x -= 8.0f;
// Update vertex trace too. Avoid issue to compute bounding box
if (process_ba & SHUFFLE_WRITE)
m_vt.m_min.p.x -= 8.0f;
else
m_vt.m_max.t.x += 8.0f;
m_vt.m_max.p.x += 8.0f;
if (!m_same_group_texture_shuffle)
{
if (process_ba & SHUFFLE_WRITE)
m_vt.m_min.t.x -= 8.0f;
else
m_vt.m_max.t.x += 8.0f;
}
}
if (half_right_vert)
@ -1858,7 +1895,8 @@ void GSRendererHW::Draw()
// Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient.
// Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore
// we need to split on those too.
m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK;
m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK &&
m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block();
#ifdef ENABLE_OGL_DEBUG
if (m_channel_shuffle)
@ -2507,6 +2545,12 @@ void GSRendererHW::Draw()
}
}
if (rt && m_channel_shuffle)
{
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
m_last_channel_shuffle_end_block = rt->m_end_block;
}
GSTextureCache::Target* ds = nullptr;
GIFRegTEX0 ZBUF_TEX0;
if (!no_ds)
@ -2601,6 +2645,11 @@ void GSRendererHW::Draw()
GL_INS("Channel shuffle effect detected (2nd shot)");
m_channel_shuffle = true;
m_last_channel_shuffle_fbmsk = m_context->FRAME.FBMSK;
if (rt)
{
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
m_last_channel_shuffle_end_block = rt->m_end_block;
}
}
else
{
@ -3378,17 +3427,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
m_conf.ps.shuffle = 1;
m_conf.ps.dst_fmt = GSLocalMemory::PSM_FMT_32;
bool write_ba;
bool read_ba;
u32 process_rg = 0;
u32 process_ba = 0;
bool shuffle_across = true;
ConvertSpriteTextureShuffle(write_ba, read_ba, rt, tex);
ConvertSpriteTextureShuffle(process_rg, process_ba, shuffle_across, rt, tex);
// If date is enabled you need to test the green channel instead of the
// alpha channel. Only enable this code in DATE mode to reduce the number
// of shader.
m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_cached_ctx.TEST.DATE;
m_conf.ps.read_ba = read_ba;
// If date is enabled you need to test the green channel instead of the alpha channel.
// Only enable this code in DATE mode to reduce the number of shaders.
m_conf.ps.write_rg = (process_rg & SHUFFLE_WRITE) && features.texture_barrier && m_cached_ctx.TEST.DATE;
m_conf.ps.real16src = m_copy_16bit_to_target_shuffle;
m_conf.ps.shuffle_same = m_same_group_texture_shuffle;
// Please bang my head against the wall!
@ -3401,30 +3448,26 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
// r = rb mask, g = ga mask
const GSVector2i rb_ga_mask = GSVector2i(fbmask & 0xFF, (fbmask >> 8) & 0xFF);
m_conf.ps.process_rg = process_rg;
m_conf.ps.process_ba = process_ba;
m_conf.ps.shuffle_across = shuffle_across;
// Ace Combat 04 sets FBMSK to 0 for the shuffle, duplicating RG across RGBA.
// Given how touchy texture shuffles are, I'm not ready to make it 100% dependent on the real FBMSK yet.
// TODO: Remove this if, and see what breaks.
if (fbmask != 0)
{
m_conf.colormask.wrgba = 0;
}
else
{
m_conf.colormask.wr = m_conf.colormask.wg = (rb_ga_mask.r != 0xFF);
m_conf.colormask.wb = m_conf.colormask.wa = (rb_ga_mask.g != 0xFF);
}
m_conf.colormask.wrgba = 0;
// 2 Select the new mask
if (rb_ga_mask.r != 0xFF)
{
if (write_ba)
if (process_ba & SHUFFLE_WRITE)
{
GL_INS("Color shuffle %s => B", read_ba ? "B" : "R");
GL_INS("Color shuffle %s => B", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "R" : "B");
m_conf.colormask.wb = 1;
}
else
if (process_rg & SHUFFLE_WRITE)
{
GL_INS("Color shuffle %s => R", read_ba ? "B" : "R");
GL_INS("Color shuffle %s => R", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "B" : "R");
m_conf.colormask.wr = 1;
}
if (rb_ga_mask.r)
@ -3433,14 +3476,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
if (rb_ga_mask.g != 0xFF)
{
if (write_ba)
if (process_ba & SHUFFLE_WRITE)
{
GL_INS("Color shuffle %s => A", read_ba ? "A" : "G");
GL_INS("Color shuffle %s => A", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "G" : "A");
m_conf.colormask.wa = 1;
}
else
if (process_rg & SHUFFLE_WRITE)
{
GL_INS("Color shuffle %s => G", read_ba ? "A" : "G");
GL_INS("Color shuffle %s => G", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "A" : "G");
m_conf.colormask.wg = 1;
}
if (rb_ga_mask.g)
@ -3590,7 +3634,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
if (test_only)
return true;
ChannelFetch channel_select = (m_cached_ctx.CLAMP.WMT != 3 || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA;
ChannelFetch channel_select = ((m_cached_ctx.CLAMP.WMT != 3 && (m_vertex.buff[m_index.buff[0]].V & 0x20) == 0) || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA;
GL_INS("%s channel", (channel_select == ChannelFetch_BLUE) ? "blue" : "alpha");
@ -5316,7 +5360,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
if (m_texture_shuffle)
{
if (m_conf.ps.read_ba)
if (m_conf.ps.process_ba & SHUFFLE_READ)
{
m_can_correct_alpha = false;

View File

@ -66,6 +66,13 @@ private:
CLUTDrawOnGPU,
};
enum ShuffleProcessing
{
SHUFFLE_READ = 1,
SHUFFLE_WRITE,
SHUFFLE_READWRITE,
};
bool HasEEUpload(GSVector4i r);
CLUTDrawTestResult PossibleCLUTDraw();
CLUTDrawTestResult PossibleCLUTDrawAggressive();
@ -157,6 +164,8 @@ private:
u32 m_split_texture_shuffle_fbw = 0;
u32 m_last_channel_shuffle_fbmsk = 0;
u32 m_last_channel_shuffle_fbp = 0;
u32 m_last_channel_shuffle_end_block = 0;
GIFRegFRAME m_split_clear_start = {};
GIFRegZBUF m_split_clear_start_Z = {};
@ -193,7 +202,7 @@ public:
void Lines2Sprites();
bool VerifyIndices();
void ExpandLineIndices();
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
void ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
void MergeSprite(GSTextureCache::Source* tex);

View File

@ -3875,7 +3875,11 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
GSHWDrawConfig& config = GSRendererHW::GetInstance()->BeginHLEHardwareDraw(tgt->m_texture, nullptr, tgt->m_scale, tgt->m_texture, tgt->m_scale, bbox);
config.colormask.wrgba = (write_rg ? (1 | 2) : (4 | 8));
config.ps.read_ba = read_ba;
config.ps.process_ba = read_ba ? 1 : 0;
config.ps.process_rg = !read_ba ? 1 : 0;
config.ps.process_ba = !write_rg ? 2 : 0;
config.ps.process_rg = write_rg ? 2 : 0;
config.ps.shuffle_across = true;
config.ps.write_rg = write_rg;
config.ps.shuffle = true;
GSRendererHW::GetInstance()->EndHLEHardwareDraw(false);

View File

@ -1810,7 +1810,9 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
setFnConstantI(m_fn_constants, pssel.process_ba, GSMTLConstantIndex_PS_PROCESS_BA);
setFnConstantI(m_fn_constants, pssel.process_rg, GSMTLConstantIndex_PS_PROCESS_RG);
setFnConstantB(m_fn_constants, pssel.shuffle_across, GSMTLConstantIndex_PS_SHUFFLE_ACROSS);
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK);

View File

@ -171,7 +171,9 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_LTF,
GSMTLConstantIndex_PS_SHUFFLE,
GSMTLConstantIndex_PS_SHUFFLE_SAME,
GSMTLConstantIndex_PS_READ_BA,
GSMTLConstantIndex_PS_PROCESS_BA,
GSMTLConstantIndex_PS_PROCESS_RG,
GSMTLConstantIndex_PS_SHUFFLE_ACROSS,
GSMTLConstantIndex_PS_READ16_SRC,
GSMTLConstantIndex_PS_WRITE_RG,
GSMTLConstantIndex_PS_FBMASK,

View File

@ -7,6 +7,10 @@ constant uint FMT_32 = 0;
constant uint FMT_24 = 1;
constant uint FMT_16 = 2;
constant uint SHUFFLE_READ = 1;
constant uint SHUFFLE_WRITE = 2;
constant uint SHUFFLE_READWRITE = 3;
constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAMEBUFFER_FETCH)]];
constant bool FST [[function_constant(GSMTLConstantIndex_FST)]];
constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]];
@ -30,7 +34,9 @@ constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_AD
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
constant bool PS_SHUFFLE_SAME [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_SAME)]];
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
constant uint PS_PROCESS_BA [[function_constant(GSMTLConstantIndex_PS_PROCESS_BA)]];
constant uint PS_PROCESS_RG [[function_constant(GSMTLConstantIndex_PS_PROCESS_RG)]];
constant bool PS_SHUFFLE_ACROSS [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_ACROSS)]];
constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]];
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]];
@ -825,10 +831,10 @@ struct PSMain
else
T = sample_color(st);
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_before = uint4(T);
if (PS_READ_BA)
if (PS_PROCESS_BA & SHUFFLE_READ)
{
T.r = float((denorm_c_before.b << 3) & 0xF8);
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
@ -1097,10 +1103,10 @@ struct PSMain
if (PS_SHUFFLE)
{
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{
uint4 denorm_c_after = uint4(C);
if (PS_READ_BA)
if (PS_PROCESS_BA & SHUFFLE_READ)
{
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
@ -1118,30 +1124,62 @@ struct PSMain
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
if (PS_SHUFFLE_SAME)
{
if (PS_READ_BA)
C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
if (PS_PROCESS_BA & SHUFFLE_READ)
C = (denorm_c.b & 0x7F) | (denorm_c.a & 0x80);
else
C.ga = C.rg;
}
// Copy of a 16bit source in to this target
else if (PS_READ16_SRC)
{
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7) << 5);
if (denorm_c.a & 0x80)
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
}
// Write RB part. Mask will take care of the correct destination
else if (PS_READ_BA)
else if (PS_SHUFFLE_ACROSS)
{
C.rb = C.bb;
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
{
C.rb = C.br;
if ((denorm_c.a & 0x80) != 0)
C.g = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.g = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
if ((denorm_c.g & 0x80) != 0)
C.a = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.a = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
}
else if(PS_PROCESS_BA & SHUFFLE_READ)
{
C.rb = C.bb;
if ((denorm_c.a & 0x80) != 0)
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
}
else
{
C.rb = C.rr;
if ((denorm_c.g & 0x80) != 0)
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
}
}
else
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
{
C.rb = C.rr;
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
if ((denorm_c.g & 0x80) != 0)
C.g = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
else
C.g = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
if ((denorm_c.a & 0x80) != 0)
C.a = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
else
C.a = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
}
}

View File

@ -1367,7 +1367,9 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_IIP {}\n", sel.iip)
+ fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle)
+ fmt::format("#define PS_SHUFFLE_SAME {}\n", sel.shuffle_same)
+ fmt::format("#define PS_READ_BA {}\n", sel.read_ba)
+ fmt::format("#define PS_PROCESS_BA {}\n", sel.process_ba)
+ fmt::format("#define PS_PROCESS_RG {}\n", sel.process_rg)
+ fmt::format("#define PS_SHUFFLE_ACROSS {}\n", sel.shuffle_across)
+ fmt::format("#define PS_READ16_SRC {}\n", sel.real16src)
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask)

View File

@ -4813,7 +4813,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_IIP", sel.iip);
AddMacro(ss, "PS_SHUFFLE", sel.shuffle);
AddMacro(ss, "PS_SHUFFLE_SAME", sel.shuffle_same);
AddMacro(ss, "PS_READ_BA", sel.read_ba);
AddMacro(ss, "PS_PROCESS_BA", sel.process_ba);
AddMacro(ss, "PS_PROCESS_RG", sel.process_rg);
AddMacro(ss, "PS_SHUFFLE_ACROSS", sel.shuffle_across);
AddMacro(ss, "PS_READ16_SRC", sel.real16src);
AddMacro(ss, "PS_WRITE_RG", sel.write_rg);
AddMacro(ss, "PS_FBMASK", sel.fbmask);