mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Improve channel + texture shuffle detection and processing
This commit is contained in:
parent
9e42bf7385
commit
4ba43b8496
|
@ -5,6 +5,10 @@
|
|||
#define FMT_24 1
|
||||
#define FMT_16 2
|
||||
|
||||
#define SHUFFLE_READ 1
|
||||
#define SHUFFLE_WRITE 2
|
||||
#define SHUFFLE_READWRITE 3
|
||||
|
||||
#ifndef VS_TME
|
||||
#define VS_IIP 0
|
||||
#define VS_TME 1
|
||||
|
@ -41,7 +45,9 @@
|
|||
#define PS_REGION_RECT 0
|
||||
#define PS_SHUFFLE 0
|
||||
#define PS_SHUFFLE_SAME 0
|
||||
#define PS_READ_BA 0
|
||||
#define PS_PROCESS_BA 0
|
||||
#define PS_PROCESS_RG 0
|
||||
#define PS_SHUFFLE_ACROSS 0
|
||||
#define PS_READ16_SRC 0
|
||||
#define PS_DST_FMT 0
|
||||
#define PS_DEPTH_FMT 0
|
||||
|
@ -761,10 +767,10 @@ float4 ps_color(PS_INPUT input)
|
|||
float4 T = sample_color(st, input.t.w);
|
||||
#endif
|
||||
|
||||
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
|
||||
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
|
||||
{
|
||||
uint4 denorm_c_before = uint4(T);
|
||||
if (PS_READ_BA)
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
T.r = float((denorm_c_before.b << 3) & 0xF8);
|
||||
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
|
||||
|
@ -1028,10 +1034,10 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||
|
||||
if (PS_SHUFFLE)
|
||||
{
|
||||
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
|
||||
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
|
||||
{
|
||||
uint4 denorm_c_after = uint4(C);
|
||||
if (PS_READ_BA)
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
|
||||
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
|
||||
|
@ -1049,7 +1055,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
|
||||
if (PS_SHUFFLE_SAME)
|
||||
{
|
||||
if (PS_READ_BA)
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
|
||||
else
|
||||
C.ga = C.rg;
|
||||
|
@ -1063,23 +1069,48 @@ PS_OUTPUT ps_main(PS_INPUT input)
|
|||
else
|
||||
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
|
||||
}
|
||||
// Write RB part. Mask will take care of the correct destination
|
||||
else if (PS_READ_BA)
|
||||
else if (PS_SHUFFLE_ACROSS)
|
||||
{
|
||||
C.rb = C.bb;
|
||||
if (denorm_c.a & 0x80u)
|
||||
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
{
|
||||
C.rb = C.br;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
}
|
||||
else if(PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
C.rb = C.bb;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
}
|
||||
else
|
||||
C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
{
|
||||
C.rb = C.rr;
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
}
|
||||
}
|
||||
else
|
||||
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
|
||||
{
|
||||
C.rb = C.rr;
|
||||
if (denorm_c.g & 0x80u)
|
||||
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
#define FMT_24 1
|
||||
#define FMT_16 2
|
||||
|
||||
#define SHUFFLE_READ 1
|
||||
#define SHUFFLE_WRITE 2
|
||||
#define SHUFFLE_READWRITE 3
|
||||
|
||||
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
|
||||
// to detect bad sampling due to upscaling
|
||||
//#define TEX_COORD_DEBUG
|
||||
|
@ -695,9 +699,9 @@ vec4 ps_color()
|
|||
vec4 T = sample_color(st);
|
||||
#endif
|
||||
|
||||
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
|
||||
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_before = uvec4(T);
|
||||
#if PS_READ_BA
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
T.r = float((denorm_c_before.b << 3) & 0xF8);
|
||||
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
|
||||
T.b = float((denorm_c_before.a << 1) & 0xF8);
|
||||
|
@ -1027,9 +1031,9 @@ void ps_main()
|
|||
|
||||
|
||||
#if PS_SHUFFLE
|
||||
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
|
||||
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_after = uvec4(C);
|
||||
#if PS_READ_BA
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
|
||||
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
|
||||
#else
|
||||
|
@ -1043,7 +1047,7 @@ void ps_main()
|
|||
|
||||
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
|
||||
#if PS_SHUFFLE_SAME
|
||||
#if (PS_READ_BA)
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
|
||||
#else
|
||||
C.ga = C.rg;
|
||||
|
@ -1055,40 +1059,42 @@ void ps_main()
|
|||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
|
||||
// Write RB part. Mask will take care of the correct destination
|
||||
#elif PS_READ_BA
|
||||
C.rb = C.bb;
|
||||
// FIXME precompute my_TA & 0x80
|
||||
|
||||
// Write GA part. Mask will take care of the correct destination
|
||||
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
|
||||
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
|
||||
|
||||
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
||||
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
|
||||
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
|
||||
// c.ga = vec2(float(denorm_c.a));
|
||||
|
||||
if (bool(denorm_c.a & 0x80u))
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
#elif PS_SHUFFLE_ACROSS
|
||||
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
C.rb = C.br;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
|
||||
#elif(PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C.rb = C.bb;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#endif // PS_PROCESS_BA
|
||||
#else // PS_SHUFFLE_ACROSS
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
if (bool(denorm_c.g & 0x80u))
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
|
||||
// Nice idea but step/mix requires 4 instructions
|
||||
// set / trunc / I2F / Mad
|
||||
//
|
||||
// float sel = step(128.0f, c.g);
|
||||
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
|
||||
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
|
||||
|
||||
#endif // PS_SHUFFLE_SAME
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
#endif // PS_SHUFFLE_ACROSS
|
||||
#endif // PS_SHUFFLE
|
||||
|
||||
ps_dither(C.rgb, alpha_blend.a);
|
||||
|
|
|
@ -233,6 +233,10 @@ void main()
|
|||
#define FMT_24 1
|
||||
#define FMT_16 2
|
||||
|
||||
#define SHUFFLE_READ 1
|
||||
#define SHUFFLE_WRITE 2
|
||||
#define SHUFFLE_READWRITE 3
|
||||
|
||||
#ifndef VS_TME
|
||||
#define VS_TME 1
|
||||
#define VS_FST 1
|
||||
|
@ -266,7 +270,9 @@ void main()
|
|||
#define PS_POINT_SAMPLER 0
|
||||
#define PS_SHUFFLE 0
|
||||
#define PS_SHUFFLE_SAME 0
|
||||
#define PS_READ_BA 0
|
||||
#define PS_PROCESS_BA 0
|
||||
#define PS_PROCESS_RG 0
|
||||
#define PS_SHUFFLE_ACROSS 0
|
||||
#define PS_WRITE_RG 0
|
||||
#define PS_READ16_SRC 0
|
||||
#define PS_DST_FMT 0
|
||||
|
@ -945,9 +951,9 @@ vec4 ps_color()
|
|||
vec4 T = sample_color(st);
|
||||
#endif
|
||||
|
||||
#if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC
|
||||
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_before = uvec4(T);
|
||||
#if PS_READ_BA
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
T.r = float((denorm_c_before.b << 3) & 0xF8);
|
||||
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
|
||||
T.b = float((denorm_c_before.a << 1) & 0xF8);
|
||||
|
@ -1277,9 +1283,9 @@ void main()
|
|||
ps_blend(C, alpha_blend);
|
||||
|
||||
#if PS_SHUFFLE
|
||||
#if !PS_SHUFFLE_SAME && !PS_READ16_SRC
|
||||
#if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
uvec4 denorm_c_after = uvec4(C);
|
||||
#if PS_READ_BA
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
|
||||
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
|
||||
#else
|
||||
|
@ -1293,7 +1299,7 @@ void main()
|
|||
|
||||
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
|
||||
#if PS_SHUFFLE_SAME
|
||||
#if (PS_READ_BA)
|
||||
#if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u)));
|
||||
#else
|
||||
C.ga = C.rg;
|
||||
|
@ -1306,19 +1312,42 @@ void main()
|
|||
else
|
||||
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
|
||||
// Write RB part. Mask will take care of the correct destination
|
||||
#elif PS_READ_BA
|
||||
C.rb = C.bb;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
#elif PS_SHUFFLE_ACROSS
|
||||
#if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
C.rb = C.br;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
|
||||
#elif(PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C.rb = C.bb;
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#else
|
||||
C.rb = C.rr;
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#endif // PS_PROCESS_BA
|
||||
#else // PS_SHUFFLE_ACROSS
|
||||
if ((denorm_c.g & 0x80u) != 0u)
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
#endif // PS_SHUFFLE_SAME
|
||||
C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
if ((denorm_c.a & 0x80u) != 0u)
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u));
|
||||
else
|
||||
C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u));
|
||||
#endif // PS_SHUFFLE_ACROSS
|
||||
#endif // PS_SHUFFLE
|
||||
|
||||
ps_dither(C.rgb, alpha_blend.a);
|
||||
|
|
|
@ -317,7 +317,9 @@ struct alignas(16) GSHWDrawConfig
|
|||
u32 shuffle : 1;
|
||||
u32 shuffle_same : 1;
|
||||
u32 real16src: 1;
|
||||
u32 read_ba : 1;
|
||||
u32 process_ba : 2;
|
||||
u32 process_rg : 2;
|
||||
u32 shuffle_across : 1;
|
||||
u32 write_rg : 1;
|
||||
u32 fbmask : 1;
|
||||
|
||||
|
|
|
@ -1679,7 +1679,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
|
||||
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
|
||||
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
|
||||
sm.AddMacro("PS_READ_BA", sel.read_ba);
|
||||
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
|
||||
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
|
||||
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
|
||||
sm.AddMacro("PS_READ16_SRC", sel.real16src);
|
||||
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
|
||||
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
|
||||
|
|
|
@ -2833,7 +2833,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
|
|||
sm.AddMacro("PS_REGION_RECT", sel.region_rect);
|
||||
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
|
||||
sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same);
|
||||
sm.AddMacro("PS_READ_BA", sel.read_ba);
|
||||
sm.AddMacro("PS_PROCESS_BA", sel.process_ba);
|
||||
sm.AddMacro("PS_PROCESS_RG", sel.process_rg);
|
||||
sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across);
|
||||
sm.AddMacro("PS_READ16_SRC", sel.real16src);
|
||||
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
|
||||
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
|
||||
|
|
|
@ -328,7 +328,7 @@ void GSRendererHW::ExpandLineIndices()
|
|||
}
|
||||
|
||||
// Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color
|
||||
void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
|
||||
void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex)
|
||||
{
|
||||
const u32 count = m_vertex.next;
|
||||
GSVertex* v = &m_vertex.buff[0];
|
||||
|
@ -336,16 +336,22 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
// Could be drawing upside down or just back to front on the actual verts.
|
||||
const GSVertex* start_verts = (v[0].XYZ.X <= v[m_vertex.tail - 2].XYZ.X) ? &v[0] : &v[m_vertex.tail - 2];
|
||||
const GSVertex first_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[0] : start_verts[1];
|
||||
const GSVertex second_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[1] : start_verts[0];
|
||||
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
|
||||
const int pos = (first_vert.XYZ.X - o.OFX) & 0xFF;
|
||||
write_ba = (pos > 112 && pos < 136);
|
||||
|
||||
|
||||
// Read texture is 8 to 16 pixels (same as above)
|
||||
const float tw = static_cast<float>(1u << m_cached_ctx.TEX0.TW);
|
||||
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S);
|
||||
int tex_pos = (PRIM->FST) ? first_vert.U : static_cast<int>(tw * first_vert.ST.S * 16.0f);
|
||||
tex_pos &= 0xFF;
|
||||
shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8;
|
||||
|
||||
const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8;
|
||||
process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0;
|
||||
process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0;
|
||||
// "same group" means it can read blue and write alpha using C32 tricks
|
||||
read_ba = (tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000);
|
||||
process_ba |= ((tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000) || full_width) ? SHUFFLE_READ : 0;
|
||||
process_rg |= (!(process_ba & SHUFFLE_READ) || full_width) ? SHUFFLE_READ : 0;
|
||||
|
||||
// Another way of selecting whether to read RG/BA is to use region repeat.
|
||||
// Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015.
|
||||
|
@ -356,9 +362,29 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
m_cached_ctx.CLAMP.MAXV);
|
||||
|
||||
// offset coordinates swap around RG/BA.
|
||||
const bool invert = read_ba; // (tex_pos > 112 && tex_pos < 144), i.e. 8 fixed point
|
||||
const u32 minu = (m_cached_ctx.CLAMP.MINU & 8) ^ (invert ? 8 : 0);
|
||||
read_ba = ((minu & 8) != 0);
|
||||
const u32 maxu = (m_cached_ctx.CLAMP.MAXU & 8);
|
||||
const u32 minu = (m_cached_ctx.CLAMP.MINU & 8);
|
||||
if (maxu)
|
||||
{
|
||||
process_ba |= SHUFFLE_READ;
|
||||
process_rg &= ~SHUFFLE_READ;
|
||||
if (!PRIM->ABE && (process_rg & SHUFFLE_WRITE))
|
||||
{
|
||||
process_ba &= ~SHUFFLE_WRITE;
|
||||
shuffle_across = true;
|
||||
}
|
||||
}
|
||||
else if (minu == 0)
|
||||
{
|
||||
process_rg |= SHUFFLE_READ;
|
||||
process_ba &= ~SHUFFLE_READ;
|
||||
|
||||
if (!PRIM->ABE && (process_ba & SHUFFLE_WRITE))
|
||||
{
|
||||
process_rg &= ~SHUFFLE_WRITE;
|
||||
shuffle_across = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_split_texture_shuffle_pages > 0)
|
||||
|
@ -418,7 +444,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
// If a game does the texture and frame doubling differently, they can burn in hell.
|
||||
if (!m_copy_16bit_to_target_shuffle && m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block())
|
||||
{
|
||||
unsigned int max_tex_draw_width = std::min(static_cast<int>(m_vt.m_max.t.x + (!read_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW);
|
||||
unsigned int max_tex_draw_width = std::min(static_cast<int>(m_vt.m_max.t.x + (!process_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW);
|
||||
const unsigned int clamp_minu = m_context->CLAMP.MINU;
|
||||
const unsigned int clamp_maxu = m_context->CLAMP.MAXU;
|
||||
|
||||
|
@ -473,15 +499,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
const int reversed_U = (v[0].U > v[1].U) ? 1 : 0;
|
||||
for (u32 i = 0; i < count; i += 2)
|
||||
{
|
||||
if (write_ba)
|
||||
v[i + reversed_pos].XYZ.X -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_pos].XYZ.X += 128u;
|
||||
|
||||
if (read_ba)
|
||||
v[i + reversed_U].U -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_U].U += 128u;
|
||||
if (!full_width)
|
||||
{
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
v[i + reversed_pos].XYZ.X -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_pos].XYZ.X += 128u;
|
||||
|
||||
if (process_ba & SHUFFLE_READ)
|
||||
v[i + reversed_U].U -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_U].U += 128u;
|
||||
}
|
||||
|
||||
if (half_bottom_vert)
|
||||
{
|
||||
|
@ -530,15 +560,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
|
||||
for (u32 i = 0; i < count; i += 2)
|
||||
{
|
||||
if (write_ba)
|
||||
v[i + reversed_pos].XYZ.X -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_pos].XYZ.X += 128u;
|
||||
|
||||
if (read_ba)
|
||||
v[i + reversed_S].ST.S -= offset_8pix;
|
||||
else
|
||||
v[i + 1 - reversed_S].ST.S += offset_8pix;
|
||||
if (!full_width)
|
||||
{
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
v[i + reversed_pos].XYZ.X -= 128u;
|
||||
else
|
||||
v[i + 1 - reversed_pos].XYZ.X += 128u;
|
||||
|
||||
if (process_ba & SHUFFLE_READ)
|
||||
v[i + reversed_S].ST.S -= offset_8pix;
|
||||
else
|
||||
v[i + 1 - reversed_S].ST.S += offset_8pix;
|
||||
}
|
||||
|
||||
if (half_bottom_vert)
|
||||
{
|
||||
|
@ -579,18 +613,21 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
|
|||
}
|
||||
}
|
||||
|
||||
// Update vertex trace too. Avoid issue to compute bounding box
|
||||
if (write_ba)
|
||||
m_vt.m_min.p.x -= 8.0f;
|
||||
else
|
||||
m_vt.m_max.p.x += 8.0f;
|
||||
|
||||
if (!m_same_group_texture_shuffle)
|
||||
if (!full_width)
|
||||
{
|
||||
if (read_ba)
|
||||
m_vt.m_min.t.x -= 8.0f;
|
||||
// Update vertex trace too. Avoid issue to compute bounding box
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
m_vt.m_min.p.x -= 8.0f;
|
||||
else
|
||||
m_vt.m_max.t.x += 8.0f;
|
||||
m_vt.m_max.p.x += 8.0f;
|
||||
|
||||
if (!m_same_group_texture_shuffle)
|
||||
{
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
m_vt.m_min.t.x -= 8.0f;
|
||||
else
|
||||
m_vt.m_max.t.x += 8.0f;
|
||||
}
|
||||
}
|
||||
|
||||
if (half_right_vert)
|
||||
|
@ -1858,7 +1895,8 @@ void GSRendererHW::Draw()
|
|||
// Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient.
|
||||
// Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore
|
||||
// we need to split on those too.
|
||||
m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK;
|
||||
m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK &&
|
||||
m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block();
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
if (m_channel_shuffle)
|
||||
|
@ -2507,6 +2545,12 @@ void GSRendererHW::Draw()
|
|||
}
|
||||
}
|
||||
|
||||
if (rt && m_channel_shuffle)
|
||||
{
|
||||
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
|
||||
m_last_channel_shuffle_end_block = rt->m_end_block;
|
||||
}
|
||||
|
||||
GSTextureCache::Target* ds = nullptr;
|
||||
GIFRegTEX0 ZBUF_TEX0;
|
||||
if (!no_ds)
|
||||
|
@ -2601,6 +2645,11 @@ void GSRendererHW::Draw()
|
|||
GL_INS("Channel shuffle effect detected (2nd shot)");
|
||||
m_channel_shuffle = true;
|
||||
m_last_channel_shuffle_fbmsk = m_context->FRAME.FBMSK;
|
||||
if (rt)
|
||||
{
|
||||
m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0;
|
||||
m_last_channel_shuffle_end_block = rt->m_end_block;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3378,17 +3427,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
|
|||
m_conf.ps.shuffle = 1;
|
||||
m_conf.ps.dst_fmt = GSLocalMemory::PSM_FMT_32;
|
||||
|
||||
bool write_ba;
|
||||
bool read_ba;
|
||||
u32 process_rg = 0;
|
||||
u32 process_ba = 0;
|
||||
bool shuffle_across = true;
|
||||
|
||||
ConvertSpriteTextureShuffle(write_ba, read_ba, rt, tex);
|
||||
ConvertSpriteTextureShuffle(process_rg, process_ba, shuffle_across, rt, tex);
|
||||
|
||||
// If date is enabled you need to test the green channel instead of the
|
||||
// alpha channel. Only enable this code in DATE mode to reduce the number
|
||||
// of shader.
|
||||
m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_cached_ctx.TEST.DATE;
|
||||
|
||||
m_conf.ps.read_ba = read_ba;
|
||||
// If date is enabled you need to test the green channel instead of the alpha channel.
|
||||
// Only enable this code in DATE mode to reduce the number of shaders.
|
||||
m_conf.ps.write_rg = (process_rg & SHUFFLE_WRITE) && features.texture_barrier && m_cached_ctx.TEST.DATE;
|
||||
m_conf.ps.real16src = m_copy_16bit_to_target_shuffle;
|
||||
m_conf.ps.shuffle_same = m_same_group_texture_shuffle;
|
||||
// Please bang my head against the wall!
|
||||
|
@ -3401,30 +3448,26 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
|
|||
// r = rb mask, g = ga mask
|
||||
const GSVector2i rb_ga_mask = GSVector2i(fbmask & 0xFF, (fbmask >> 8) & 0xFF);
|
||||
|
||||
m_conf.ps.process_rg = process_rg;
|
||||
m_conf.ps.process_ba = process_ba;
|
||||
m_conf.ps.shuffle_across = shuffle_across;
|
||||
// Ace Combat 04 sets FBMSK to 0 for the shuffle, duplicating RG across RGBA.
|
||||
// Given how touchy texture shuffles are, I'm not ready to make it 100% dependent on the real FBMSK yet.
|
||||
// TODO: Remove this if, and see what breaks.
|
||||
if (fbmask != 0)
|
||||
{
|
||||
m_conf.colormask.wrgba = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_conf.colormask.wr = m_conf.colormask.wg = (rb_ga_mask.r != 0xFF);
|
||||
m_conf.colormask.wb = m_conf.colormask.wa = (rb_ga_mask.g != 0xFF);
|
||||
}
|
||||
m_conf.colormask.wrgba = 0;
|
||||
|
||||
// 2 Select the new mask
|
||||
if (rb_ga_mask.r != 0xFF)
|
||||
{
|
||||
if (write_ba)
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
{
|
||||
GL_INS("Color shuffle %s => B", read_ba ? "B" : "R");
|
||||
GL_INS("Color shuffle %s => B", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "R" : "B");
|
||||
m_conf.colormask.wb = 1;
|
||||
}
|
||||
else
|
||||
|
||||
if (process_rg & SHUFFLE_WRITE)
|
||||
{
|
||||
GL_INS("Color shuffle %s => R", read_ba ? "B" : "R");
|
||||
GL_INS("Color shuffle %s => R", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "B" : "R");
|
||||
m_conf.colormask.wr = 1;
|
||||
}
|
||||
if (rb_ga_mask.r)
|
||||
|
@ -3433,14 +3476,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS
|
|||
|
||||
if (rb_ga_mask.g != 0xFF)
|
||||
{
|
||||
if (write_ba)
|
||||
if (process_ba & SHUFFLE_WRITE)
|
||||
{
|
||||
GL_INS("Color shuffle %s => A", read_ba ? "A" : "G");
|
||||
GL_INS("Color shuffle %s => A", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "G" : "A");
|
||||
m_conf.colormask.wa = 1;
|
||||
}
|
||||
else
|
||||
|
||||
if (process_rg & SHUFFLE_WRITE)
|
||||
{
|
||||
GL_INS("Color shuffle %s => G", read_ba ? "A" : "G");
|
||||
GL_INS("Color shuffle %s => G", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "A" : "G");
|
||||
m_conf.colormask.wg = 1;
|
||||
}
|
||||
if (rb_ga_mask.g)
|
||||
|
@ -3590,7 +3634,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
|
|||
if (test_only)
|
||||
return true;
|
||||
|
||||
ChannelFetch channel_select = (m_cached_ctx.CLAMP.WMT != 3 || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA;
|
||||
ChannelFetch channel_select = ((m_cached_ctx.CLAMP.WMT != 3 && (m_vertex.buff[m_index.buff[0]].V & 0x20) == 0) || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA;
|
||||
|
||||
GL_INS("%s channel", (channel_select == ChannelFetch_BLUE) ? "blue" : "alpha");
|
||||
|
||||
|
@ -5316,7 +5360,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
|
|||
{
|
||||
if (m_texture_shuffle)
|
||||
{
|
||||
if (m_conf.ps.read_ba)
|
||||
if (m_conf.ps.process_ba & SHUFFLE_READ)
|
||||
{
|
||||
m_can_correct_alpha = false;
|
||||
|
||||
|
|
|
@ -66,6 +66,13 @@ private:
|
|||
CLUTDrawOnGPU,
|
||||
};
|
||||
|
||||
enum ShuffleProcessing
|
||||
{
|
||||
SHUFFLE_READ = 1,
|
||||
SHUFFLE_WRITE,
|
||||
SHUFFLE_READWRITE,
|
||||
};
|
||||
|
||||
bool HasEEUpload(GSVector4i r);
|
||||
CLUTDrawTestResult PossibleCLUTDraw();
|
||||
CLUTDrawTestResult PossibleCLUTDrawAggressive();
|
||||
|
@ -157,6 +164,8 @@ private:
|
|||
u32 m_split_texture_shuffle_fbw = 0;
|
||||
|
||||
u32 m_last_channel_shuffle_fbmsk = 0;
|
||||
u32 m_last_channel_shuffle_fbp = 0;
|
||||
u32 m_last_channel_shuffle_end_block = 0;
|
||||
|
||||
GIFRegFRAME m_split_clear_start = {};
|
||||
GIFRegZBUF m_split_clear_start_Z = {};
|
||||
|
@ -193,7 +202,7 @@ public:
|
|||
void Lines2Sprites();
|
||||
bool VerifyIndices();
|
||||
void ExpandLineIndices();
|
||||
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
||||
void ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex);
|
||||
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
||||
GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale);
|
||||
void MergeSprite(GSTextureCache::Source* tex);
|
||||
|
|
|
@ -3875,7 +3875,11 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
|
|||
|
||||
GSHWDrawConfig& config = GSRendererHW::GetInstance()->BeginHLEHardwareDraw(tgt->m_texture, nullptr, tgt->m_scale, tgt->m_texture, tgt->m_scale, bbox);
|
||||
config.colormask.wrgba = (write_rg ? (1 | 2) : (4 | 8));
|
||||
config.ps.read_ba = read_ba;
|
||||
config.ps.process_ba = read_ba ? 1 : 0;
|
||||
config.ps.process_rg = !read_ba ? 1 : 0;
|
||||
config.ps.process_ba = !write_rg ? 2 : 0;
|
||||
config.ps.process_rg = write_rg ? 2 : 0;
|
||||
config.ps.shuffle_across = true;
|
||||
config.ps.write_rg = write_rg;
|
||||
config.ps.shuffle = true;
|
||||
GSRendererHW::GetInstance()->EndHLEHardwareDraw(false);
|
||||
|
|
|
@ -1810,7 +1810,9 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
|
|||
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
|
||||
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
|
||||
setFnConstantI(m_fn_constants, pssel.process_ba, GSMTLConstantIndex_PS_PROCESS_BA);
|
||||
setFnConstantI(m_fn_constants, pssel.process_rg, GSMTLConstantIndex_PS_PROCESS_RG);
|
||||
setFnConstantB(m_fn_constants, pssel.shuffle_across, GSMTLConstantIndex_PS_SHUFFLE_ACROSS);
|
||||
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
|
||||
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
|
||||
setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK);
|
||||
|
|
|
@ -171,7 +171,9 @@ enum GSMTLFnConstants
|
|||
GSMTLConstantIndex_PS_LTF,
|
||||
GSMTLConstantIndex_PS_SHUFFLE,
|
||||
GSMTLConstantIndex_PS_SHUFFLE_SAME,
|
||||
GSMTLConstantIndex_PS_READ_BA,
|
||||
GSMTLConstantIndex_PS_PROCESS_BA,
|
||||
GSMTLConstantIndex_PS_PROCESS_RG,
|
||||
GSMTLConstantIndex_PS_SHUFFLE_ACROSS,
|
||||
GSMTLConstantIndex_PS_READ16_SRC,
|
||||
GSMTLConstantIndex_PS_WRITE_RG,
|
||||
GSMTLConstantIndex_PS_FBMASK,
|
||||
|
|
|
@ -7,6 +7,10 @@ constant uint FMT_32 = 0;
|
|||
constant uint FMT_24 = 1;
|
||||
constant uint FMT_16 = 2;
|
||||
|
||||
constant uint SHUFFLE_READ = 1;
|
||||
constant uint SHUFFLE_WRITE = 2;
|
||||
constant uint SHUFFLE_READWRITE = 3;
|
||||
|
||||
constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAMEBUFFER_FETCH)]];
|
||||
constant bool FST [[function_constant(GSMTLConstantIndex_FST)]];
|
||||
constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]];
|
||||
|
@ -30,7 +34,9 @@ constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_AD
|
|||
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
|
||||
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
|
||||
constant bool PS_SHUFFLE_SAME [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_SAME)]];
|
||||
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
|
||||
constant uint PS_PROCESS_BA [[function_constant(GSMTLConstantIndex_PS_PROCESS_BA)]];
|
||||
constant uint PS_PROCESS_RG [[function_constant(GSMTLConstantIndex_PS_PROCESS_RG)]];
|
||||
constant bool PS_SHUFFLE_ACROSS [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_ACROSS)]];
|
||||
constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]];
|
||||
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
|
||||
constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]];
|
||||
|
@ -825,10 +831,10 @@ struct PSMain
|
|||
else
|
||||
T = sample_color(st);
|
||||
|
||||
if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC)
|
||||
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
|
||||
{
|
||||
uint4 denorm_c_before = uint4(T);
|
||||
if (PS_READ_BA)
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
T.r = float((denorm_c_before.b << 3) & 0xF8);
|
||||
T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0));
|
||||
|
@ -1097,10 +1103,10 @@ struct PSMain
|
|||
|
||||
if (PS_SHUFFLE)
|
||||
{
|
||||
if (!PS_SHUFFLE_SAME && !PS_READ16_SRC)
|
||||
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
|
||||
{
|
||||
uint4 denorm_c_after = uint4(C);
|
||||
if (PS_READ_BA)
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));
|
||||
C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80));
|
||||
|
@ -1118,30 +1124,62 @@ struct PSMain
|
|||
// Special case for 32bit input and 16bit output, shuffle used by The Godfather
|
||||
if (PS_SHUFFLE_SAME)
|
||||
{
|
||||
if (PS_READ_BA)
|
||||
C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80);
|
||||
if (PS_PROCESS_BA & SHUFFLE_READ)
|
||||
C = (denorm_c.b & 0x7F) | (denorm_c.a & 0x80);
|
||||
else
|
||||
C.ga = C.rg;
|
||||
}
|
||||
// Copy of a 16bit source in to this target
|
||||
else if (PS_READ16_SRC)
|
||||
{
|
||||
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
|
||||
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7) << 5);
|
||||
if (denorm_c.a & 0x80)
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
// Write RB part. Mask will take care of the correct destination
|
||||
else if (PS_READ_BA)
|
||||
else if (PS_SHUFFLE_ACROSS)
|
||||
{
|
||||
C.rb = C.bb;
|
||||
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
|
||||
{
|
||||
C.rb = C.br;
|
||||
if ((denorm_c.a & 0x80) != 0)
|
||||
C.g = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.g = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
|
||||
|
||||
if ((denorm_c.g & 0x80) != 0)
|
||||
C.a = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.a = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
else if(PS_PROCESS_BA & SHUFFLE_READ)
|
||||
{
|
||||
C.rb = C.bb;
|
||||
if ((denorm_c.a & 0x80) != 0)
|
||||
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.ga = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
else
|
||||
{
|
||||
C.rb = C.rr;
|
||||
if ((denorm_c.g & 0x80) != 0)
|
||||
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.ga = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
}
|
||||
else
|
||||
else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA
|
||||
{
|
||||
C.rb = C.rr;
|
||||
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
|
||||
if ((denorm_c.g & 0x80) != 0)
|
||||
C.g = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.g = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80);
|
||||
if ((denorm_c.a & 0x80) != 0)
|
||||
C.a = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80);
|
||||
else
|
||||
C.a = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1367,7 +1367,9 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||
+ fmt::format("#define PS_IIP {}\n", sel.iip)
|
||||
+ fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle)
|
||||
+ fmt::format("#define PS_SHUFFLE_SAME {}\n", sel.shuffle_same)
|
||||
+ fmt::format("#define PS_READ_BA {}\n", sel.read_ba)
|
||||
+ fmt::format("#define PS_PROCESS_BA {}\n", sel.process_ba)
|
||||
+ fmt::format("#define PS_PROCESS_RG {}\n", sel.process_rg)
|
||||
+ fmt::format("#define PS_SHUFFLE_ACROSS {}\n", sel.shuffle_across)
|
||||
+ fmt::format("#define PS_READ16_SRC {}\n", sel.real16src)
|
||||
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)
|
||||
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask)
|
||||
|
|
|
@ -4813,7 +4813,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
|
|||
AddMacro(ss, "PS_IIP", sel.iip);
|
||||
AddMacro(ss, "PS_SHUFFLE", sel.shuffle);
|
||||
AddMacro(ss, "PS_SHUFFLE_SAME", sel.shuffle_same);
|
||||
AddMacro(ss, "PS_READ_BA", sel.read_ba);
|
||||
AddMacro(ss, "PS_PROCESS_BA", sel.process_ba);
|
||||
AddMacro(ss, "PS_PROCESS_RG", sel.process_rg);
|
||||
AddMacro(ss, "PS_SHUFFLE_ACROSS", sel.shuffle_across);
|
||||
AddMacro(ss, "PS_READ16_SRC", sel.real16src);
|
||||
AddMacro(ss, "PS_WRITE_RG", sel.write_rg);
|
||||
AddMacro(ss, "PS_FBMASK", sel.fbmask);
|
||||
|
|
Loading…
Reference in New Issue