diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index c6fdc3b170..e9e0ebcb1d 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -5,6 +5,10 @@ #define FMT_24 1 #define FMT_16 2 +#define SHUFFLE_READ 1 +#define SHUFFLE_WRITE 2 +#define SHUFFLE_READWRITE 3 + #ifndef VS_TME #define VS_IIP 0 #define VS_TME 1 @@ -41,7 +45,9 @@ #define PS_REGION_RECT 0 #define PS_SHUFFLE 0 #define PS_SHUFFLE_SAME 0 -#define PS_READ_BA 0 +#define PS_PROCESS_BA 0 +#define PS_PROCESS_RG 0 +#define PS_SHUFFLE_ACROSS 0 #define PS_READ16_SRC 0 #define PS_DST_FMT 0 #define PS_DEPTH_FMT 0 @@ -761,10 +767,10 @@ float4 ps_color(PS_INPUT input) float4 T = sample_color(st, input.t.w); #endif - if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC) + if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_before = uint4(T); - if (PS_READ_BA) + if (PS_PROCESS_BA & SHUFFLE_READ) { T.r = float((denorm_c_before.b << 3) & 0xF8); T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); @@ -1028,10 +1034,10 @@ PS_OUTPUT ps_main(PS_INPUT input) if (PS_SHUFFLE) { - if (!PS_SHUFFLE_SAME && !PS_READ16_SRC) + if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_after = uint4(C); - if (PS_READ_BA) + if (PS_PROCESS_BA & SHUFFLE_READ) { C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); @@ -1049,7 +1055,7 @@ PS_OUTPUT ps_main(PS_INPUT input) // Special case for 32bit input and 16bit output, shuffle used by The Godfather if (PS_SHUFFLE_SAME) { - if (PS_READ_BA) + if (PS_PROCESS_BA & SHUFFLE_READ) C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); else C.ga = C.rg; @@ -1063,23 +1069,48 @@ PS_OUTPUT ps_main(PS_INPUT input) else C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)); } - // Write RB part. Mask will take care of the correct destination - else if (PS_READ_BA) + else if (PS_SHUFFLE_ACROSS) { - C.rb = C.bb; - if (denorm_c.a & 0x80u) - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) + { + C.rb = C.br; + if ((denorm_c.a & 0x80u) != 0u) + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); + + if ((denorm_c.g & 0x80u) != 0u) + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + } + else if(PS_PROCESS_BA & SHUFFLE_READ) + { + C.rb = C.bb; + if ((denorm_c.a & 0x80u) != 0u) + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + } else - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + { + C.rb = C.rr; + if ((denorm_c.g & 0x80u) != 0u) + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + } } - else + else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA { - C.rb = C.rr; - if (denorm_c.g & 0x80u) - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - + if ((denorm_c.g & 0x80u) != 0u) + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); else - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + if ((denorm_c.a & 0x80u) != 0u) + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); } } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 304d2e5d4c..2a0d26101f 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -7,6 +7,10 @@ #define FMT_24 1 #define FMT_16 2 +#define SHUFFLE_READ 1 +#define SHUFFLE_WRITE 2 +#define SHUFFLE_READWRITE 3 + // TEX_COORD_DEBUG output the uv coordinate as color. It is useful // to detect bad sampling due to upscaling //#define TEX_COORD_DEBUG @@ -695,9 +699,9 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC + #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); - #if PS_READ_BA + #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8); T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); T.b = float((denorm_c_before.a << 1) & 0xF8); @@ -1027,9 +1031,9 @@ void ps_main() #if PS_SHUFFLE - #if !PS_SHUFFLE_SAME && !PS_READ16_SRC + #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); - #if PS_READ_BA + #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); #else @@ -1043,7 +1047,7 @@ void ps_main() // Special case for 32bit input and 16bit output, shuffle used by The Godfather #if PS_SHUFFLE_SAME -#if (PS_READ_BA) +#if (PS_PROCESS_BA & SHUFFLE_READ) C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); #else C.ga = C.rg; @@ -1055,40 +1059,42 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u))); else C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); -// Write RB part. Mask will take care of the correct destination -#elif PS_READ_BA - C.rb = C.bb; - // FIXME precompute my_TA & 0x80 - - // Write GA part. Mask will take care of the correct destination - // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n" - // However Nvidia emulate it with an if (at least on kepler arch) ...\n" - - // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below - // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x; - // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1); - // c.ga = vec2(float(denorm_c.a)); - - if (bool(denorm_c.a & 0x80u)) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); +#elif PS_SHUFFLE_ACROSS + #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) + C.rb = C.br; + if ((denorm_c.a & 0x80u) != 0u) + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); + + if ((denorm_c.g & 0x80u) != 0u) + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + + #elif(PS_PROCESS_BA & SHUFFLE_READ) + C.rb = C.bb; + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + #else + C.rb = C.rr; + if ((denorm_c.g & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + #endif // PS_PROCESS_BA +#else // PS_SHUFFLE_ACROSS + if ((denorm_c.g & 0x80u) != 0u) + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - -#else - C.rb = C.rr; - if (bool(denorm_c.g & 0x80u)) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + if ((denorm_c.a & 0x80u) != 0u) + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - - // Nice idea but step/mix requires 4 instructions - // set / trunc / I2F / Mad - // - // float sel = step(128.0f, c.g); - // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)); - // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel); - -#endif // PS_SHUFFLE_SAME + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); +#endif // PS_SHUFFLE_ACROSS #endif // PS_SHUFFLE ps_dither(C.rgb, alpha_blend.a); diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 1b319daa72..626910b97c 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -233,6 +233,10 @@ void main() #define FMT_24 1 #define FMT_16 2 +#define SHUFFLE_READ 1 +#define SHUFFLE_WRITE 2 +#define SHUFFLE_READWRITE 3 + #ifndef VS_TME #define VS_TME 1 #define VS_FST 1 @@ -266,7 +270,9 @@ void main() #define PS_POINT_SAMPLER 0 #define PS_SHUFFLE 0 #define PS_SHUFFLE_SAME 0 -#define PS_READ_BA 0 +#define PS_PROCESS_BA 0 +#define PS_PROCESS_RG 0 +#define PS_SHUFFLE_ACROSS 0 #define PS_WRITE_RG 0 #define PS_READ16_SRC 0 #define PS_DST_FMT 0 @@ -945,9 +951,9 @@ vec4 ps_color() vec4 T = sample_color(st); #endif - #if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC + #if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_before = uvec4(T); - #if PS_READ_BA + #if (PS_PROCESS_BA & SHUFFLE_READ) T.r = float((denorm_c_before.b << 3) & 0xF8); T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); T.b = float((denorm_c_before.a << 1) & 0xF8); @@ -1277,9 +1283,9 @@ void main() ps_blend(C, alpha_blend); #if PS_SHUFFLE - #if !PS_SHUFFLE_SAME && !PS_READ16_SRC + #if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) uvec4 denorm_c_after = uvec4(C); - #if PS_READ_BA + #if (PS_PROCESS_BA & SHUFFLE_READ) C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); #else @@ -1293,7 +1299,7 @@ void main() // Special case for 32bit input and 16bit output, shuffle used by The Godfather #if PS_SHUFFLE_SAME - #if (PS_READ_BA) + #if (PS_PROCESS_BA & SHUFFLE_READ) C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); #else C.ga = C.rg; @@ -1306,19 +1312,42 @@ void main() else C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); // Write RB part. Mask will take care of the correct destination - #elif PS_READ_BA - C.rb = C.bb; - if ((denorm_c.a & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - #else - C.rb = C.rr; + #elif PS_SHUFFLE_ACROSS + #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) + C.rb = C.br; + if ((denorm_c.a & 0x80u) != 0u) + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.g = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); + + if ((denorm_c.g & 0x80u) != 0u) + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + + #elif(PS_PROCESS_BA & SHUFFLE_READ) + C.rb = C.bb; + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + #else + C.rb = C.rr; + if ((denorm_c.g & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + #endif // PS_PROCESS_BA + #else // PS_SHUFFLE_ACROSS if ((denorm_c.g & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)); else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - #endif // PS_SHUFFLE_SAME + C.g = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)); + if ((denorm_c.a & 0x80u) != 0u) + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)); + else + C.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)); + #endif // PS_SHUFFLE_ACROSS #endif // PS_SHUFFLE ps_dither(C.rgb, alpha_blend.a); diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 4a9050aaf0..4b704894dc 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -317,7 +317,9 @@ struct alignas(16) GSHWDrawConfig u32 shuffle : 1; u32 shuffle_same : 1; u32 real16src: 1; - u32 read_ba : 1; + u32 process_ba : 2; + u32 process_rg : 2; + u32 shuffle_across : 1; u32 write_rg : 1; u32 fbmask : 1; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index f575e18ba0..6142a12bd3 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1679,7 +1679,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_REGION_RECT", sel.region_rect); sm.AddMacro("PS_SHUFFLE", sel.shuffle); sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same); - sm.AddMacro("PS_READ_BA", sel.read_ba); + sm.AddMacro("PS_PROCESS_BA", sel.process_ba); + sm.AddMacro("PS_PROCESS_RG", sel.process_rg); + sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across); sm.AddMacro("PS_READ16_SRC", sel.real16src); sm.AddMacro("PS_CHANNEL_FETCH", sel.channel); sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 00e1b4ce7a..0e2cc8f7c0 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -2833,7 +2833,9 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_REGION_RECT", sel.region_rect); sm.AddMacro("PS_SHUFFLE", sel.shuffle); sm.AddMacro("PS_SHUFFLE_SAME", sel.shuffle_same); - sm.AddMacro("PS_READ_BA", sel.read_ba); + sm.AddMacro("PS_PROCESS_BA", sel.process_ba); + sm.AddMacro("PS_PROCESS_RG", sel.process_rg); + sm.AddMacro("PS_SHUFFLE_ACROSS", sel.shuffle_across); sm.AddMacro("PS_READ16_SRC", sel.real16src); sm.AddMacro("PS_CHANNEL_FETCH", sel.channel); sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index ce2aba53fa..d0721695cf 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -328,7 +328,7 @@ void GSRendererHW::ExpandLineIndices() } // Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color -void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex) +void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex) { const u32 count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; @@ -336,16 +336,22 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS // Could be drawing upside down or just back to front on the actual verts. const GSVertex* start_verts = (v[0].XYZ.X <= v[m_vertex.tail - 2].XYZ.X) ? &v[0] : &v[m_vertex.tail - 2]; const GSVertex first_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[0] : start_verts[1]; + const GSVertex second_vert = (start_verts[0].XYZ.X <= start_verts[1].XYZ.X) ? start_verts[1] : start_verts[0]; // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors const int pos = (first_vert.XYZ.X - o.OFX) & 0xFF; - write_ba = (pos > 112 && pos < 136); - + // Read texture is 8 to 16 pixels (same as above) const float tw = static_cast(1u << m_cached_ctx.TEX0.TW); - int tex_pos = (PRIM->FST) ? first_vert.U : static_cast(tw * first_vert.ST.S); + int tex_pos = (PRIM->FST) ? first_vert.U : static_cast(tw * first_vert.ST.S * 16.0f); tex_pos &= 0xFF; + shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; + + const bool full_width = !shuffle_across && ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; + process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks - read_ba = (tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000); + process_ba |= ((tex_pos > 112 && tex_pos < 144) || (m_same_group_texture_shuffle && (m_cached_ctx.FRAME.FBMSK & 0xFFFF0000) != 0xFFFF0000) || full_width) ? SHUFFLE_READ : 0; + process_rg |= (!(process_ba & SHUFFLE_READ) || full_width) ? SHUFFLE_READ : 0; // Another way of selecting whether to read RG/BA is to use region repeat. // Ace Combat 04 reads RG, writes to RGBA by setting a MINU of 1015. @@ -356,9 +362,29 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS m_cached_ctx.CLAMP.MAXV); // offset coordinates swap around RG/BA. - const bool invert = read_ba; // (tex_pos > 112 && tex_pos < 144), i.e. 8 fixed point - const u32 minu = (m_cached_ctx.CLAMP.MINU & 8) ^ (invert ? 8 : 0); - read_ba = ((minu & 8) != 0); + const u32 maxu = (m_cached_ctx.CLAMP.MAXU & 8); + const u32 minu = (m_cached_ctx.CLAMP.MINU & 8); + if (maxu) + { + process_ba |= SHUFFLE_READ; + process_rg &= ~SHUFFLE_READ; + if (!PRIM->ABE && (process_rg & SHUFFLE_WRITE)) + { + process_ba &= ~SHUFFLE_WRITE; + shuffle_across = true; + } + } + else if (minu == 0) + { + process_rg |= SHUFFLE_READ; + process_ba &= ~SHUFFLE_READ; + + if (!PRIM->ABE && (process_ba & SHUFFLE_WRITE)) + { + process_rg &= ~SHUFFLE_WRITE; + shuffle_across = true; + } + } } if (m_split_texture_shuffle_pages > 0) @@ -418,7 +444,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS // If a game does the texture and frame doubling differently, they can burn in hell. if (!m_copy_16bit_to_target_shuffle && m_cached_ctx.TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { - unsigned int max_tex_draw_width = std::min(static_cast(m_vt.m_max.t.x + (!read_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW); + unsigned int max_tex_draw_width = std::min(static_cast(m_vt.m_max.t.x + (!process_ba ? 8 : 0)), 1 << m_cached_ctx.TEX0.TW); const unsigned int clamp_minu = m_context->CLAMP.MINU; const unsigned int clamp_maxu = m_context->CLAMP.MAXU; @@ -473,15 +499,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS const int reversed_U = (v[0].U > v[1].U) ? 1 : 0; for (u32 i = 0; i < count; i += 2) { - if (write_ba) - v[i + reversed_pos].XYZ.X -= 128u; - else - v[i + 1 - reversed_pos].XYZ.X += 128u; - if (read_ba) - v[i + reversed_U].U -= 128u; - else - v[i + 1 - reversed_U].U += 128u; + if (!full_width) + { + if (process_ba & SHUFFLE_WRITE) + v[i + reversed_pos].XYZ.X -= 128u; + else + v[i + 1 - reversed_pos].XYZ.X += 128u; + + if (process_ba & SHUFFLE_READ) + v[i + reversed_U].U -= 128u; + else + v[i + 1 - reversed_U].U += 128u; + } if (half_bottom_vert) { @@ -530,15 +560,19 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS for (u32 i = 0; i < count; i += 2) { - if (write_ba) - v[i + reversed_pos].XYZ.X -= 128u; - else - v[i + 1 - reversed_pos].XYZ.X += 128u; - if (read_ba) - v[i + reversed_S].ST.S -= offset_8pix; - else - v[i + 1 - reversed_S].ST.S += offset_8pix; + if (!full_width) + { + if (process_ba & SHUFFLE_WRITE) + v[i + reversed_pos].XYZ.X -= 128u; + else + v[i + 1 - reversed_pos].XYZ.X += 128u; + + if (process_ba & SHUFFLE_READ) + v[i + reversed_S].ST.S -= offset_8pix; + else + v[i + 1 - reversed_S].ST.S += offset_8pix; + } if (half_bottom_vert) { @@ -579,18 +613,21 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS } } - // Update vertex trace too. Avoid issue to compute bounding box - if (write_ba) - m_vt.m_min.p.x -= 8.0f; - else - m_vt.m_max.p.x += 8.0f; - - if (!m_same_group_texture_shuffle) + if (!full_width) { - if (read_ba) - m_vt.m_min.t.x -= 8.0f; + // Update vertex trace too. Avoid issue to compute bounding box + if (process_ba & SHUFFLE_WRITE) + m_vt.m_min.p.x -= 8.0f; else - m_vt.m_max.t.x += 8.0f; + m_vt.m_max.p.x += 8.0f; + + if (!m_same_group_texture_shuffle) + { + if (process_ba & SHUFFLE_WRITE) + m_vt.m_min.t.x -= 8.0f; + else + m_vt.m_max.t.x += 8.0f; + } } if (half_right_vert) @@ -1858,7 +1895,8 @@ void GSRendererHW::Draw() // Fortunately, it seems to change the FBMSK along the way, so this check alone is sufficient. // Tomb Raider: Underworld does similar, except with R, G, B in separate palettes, therefore // we need to split on those too. - m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK; + m_channel_shuffle = IsPossibleChannelShuffle() && m_last_channel_shuffle_fbmsk == m_context->FRAME.FBMSK && + m_last_channel_shuffle_fbp <= m_context->FRAME.Block() && m_last_channel_shuffle_end_block > m_context->FRAME.Block(); #ifdef ENABLE_OGL_DEBUG if (m_channel_shuffle) @@ -2507,6 +2545,12 @@ void GSRendererHW::Draw() } } + if (rt && m_channel_shuffle) + { + m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_end_block = rt->m_end_block; + } + GSTextureCache::Target* ds = nullptr; GIFRegTEX0 ZBUF_TEX0; if (!no_ds) @@ -2601,6 +2645,11 @@ void GSRendererHW::Draw() GL_INS("Channel shuffle effect detected (2nd shot)"); m_channel_shuffle = true; m_last_channel_shuffle_fbmsk = m_context->FRAME.FBMSK; + if (rt) + { + m_last_channel_shuffle_fbp = rt->m_TEX0.TBP0; + m_last_channel_shuffle_end_block = rt->m_end_block; + } } else { @@ -3378,17 +3427,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS m_conf.ps.shuffle = 1; m_conf.ps.dst_fmt = GSLocalMemory::PSM_FMT_32; - bool write_ba; - bool read_ba; + u32 process_rg = 0; + u32 process_ba = 0; + bool shuffle_across = true; - ConvertSpriteTextureShuffle(write_ba, read_ba, rt, tex); + ConvertSpriteTextureShuffle(process_rg, process_ba, shuffle_across, rt, tex); - // If date is enabled you need to test the green channel instead of the - // alpha channel. Only enable this code in DATE mode to reduce the number - // of shader. - m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_cached_ctx.TEST.DATE; - - m_conf.ps.read_ba = read_ba; + // If date is enabled you need to test the green channel instead of the alpha channel. + // Only enable this code in DATE mode to reduce the number of shaders. + m_conf.ps.write_rg = (process_rg & SHUFFLE_WRITE) && features.texture_barrier && m_cached_ctx.TEST.DATE; m_conf.ps.real16src = m_copy_16bit_to_target_shuffle; m_conf.ps.shuffle_same = m_same_group_texture_shuffle; // Please bang my head against the wall! @@ -3401,30 +3448,26 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS // r = rb mask, g = ga mask const GSVector2i rb_ga_mask = GSVector2i(fbmask & 0xFF, (fbmask >> 8) & 0xFF); + m_conf.ps.process_rg = process_rg; + m_conf.ps.process_ba = process_ba; + m_conf.ps.shuffle_across = shuffle_across; // Ace Combat 04 sets FBMSK to 0 for the shuffle, duplicating RG across RGBA. // Given how touchy texture shuffles are, I'm not ready to make it 100% dependent on the real FBMSK yet. // TODO: Remove this if, and see what breaks. - if (fbmask != 0) - { - m_conf.colormask.wrgba = 0; - } - else - { - m_conf.colormask.wr = m_conf.colormask.wg = (rb_ga_mask.r != 0xFF); - m_conf.colormask.wb = m_conf.colormask.wa = (rb_ga_mask.g != 0xFF); - } + m_conf.colormask.wrgba = 0; // 2 Select the new mask if (rb_ga_mask.r != 0xFF) { - if (write_ba) + if (process_ba & SHUFFLE_WRITE) { - GL_INS("Color shuffle %s => B", read_ba ? "B" : "R"); + GL_INS("Color shuffle %s => B", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "R" : "B"); m_conf.colormask.wb = 1; } - else + + if (process_rg & SHUFFLE_WRITE) { - GL_INS("Color shuffle %s => R", read_ba ? "B" : "R"); + GL_INS("Color shuffle %s => R", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "B" : "R"); m_conf.colormask.wr = 1; } if (rb_ga_mask.r) @@ -3433,14 +3476,15 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GS if (rb_ga_mask.g != 0xFF) { - if (write_ba) + if (process_ba & SHUFFLE_WRITE) { - GL_INS("Color shuffle %s => A", read_ba ? "A" : "G"); + GL_INS("Color shuffle %s => A", ((process_rg & SHUFFLE_READ) && shuffle_across) ? "G" : "A"); m_conf.colormask.wa = 1; } - else + + if (process_rg & SHUFFLE_WRITE) { - GL_INS("Color shuffle %s => G", read_ba ? "A" : "G"); + GL_INS("Color shuffle %s => G", ((process_ba & SHUFFLE_READ) && shuffle_across) ? "A" : "G"); m_conf.colormask.wg = 1; } if (rb_ga_mask.g) @@ -3590,7 +3634,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool if (test_only) return true; - ChannelFetch channel_select = (m_cached_ctx.CLAMP.WMT != 3 || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA; + ChannelFetch channel_select = ((m_cached_ctx.CLAMP.WMT != 3 && (m_vertex.buff[m_index.buff[0]].V & 0x20) == 0) || (m_cached_ctx.CLAMP.WMT == 3 && ((m_cached_ctx.CLAMP.MAXV & 0x2) == 0))) ? ChannelFetch_BLUE : ChannelFetch_ALPHA; GL_INS("%s channel", (channel_select == ChannelFetch_BLUE) ? "blue" : "alpha"); @@ -5316,7 +5360,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { if (m_texture_shuffle) { - if (m_conf.ps.read_ba) + if (m_conf.ps.process_ba & SHUFFLE_READ) { m_can_correct_alpha = false; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 109a510998..71a248f020 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -66,6 +66,13 @@ private: CLUTDrawOnGPU, }; + enum ShuffleProcessing + { + SHUFFLE_READ = 1, + SHUFFLE_WRITE, + SHUFFLE_READWRITE, + }; + bool HasEEUpload(GSVector4i r); CLUTDrawTestResult PossibleCLUTDraw(); CLUTDrawTestResult PossibleCLUTDrawAggressive(); @@ -157,6 +164,8 @@ private: u32 m_split_texture_shuffle_fbw = 0; u32 m_last_channel_shuffle_fbmsk = 0; + u32 m_last_channel_shuffle_fbp = 0; + u32 m_last_channel_shuffle_end_block = 0; GIFRegFRAME m_split_clear_start = {}; GIFRegZBUF m_split_clear_start_Z = {}; @@ -193,7 +202,7 @@ public: void Lines2Sprites(); bool VerifyIndices(); void ExpandLineIndices(); - void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GSTextureCache::Target* rt, GSTextureCache::Source* tex); + void ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, bool& shuffle_across, GSTextureCache::Target* rt, GSTextureCache::Source* tex); GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex); GSVector4i ComputeBoundingBox(const GSVector2i& rtsize, float rtscale); void MergeSprite(GSTextureCache::Source* tex); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index b0b1c25e7c..9c1fe7f955 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -3875,7 +3875,11 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx GSHWDrawConfig& config = GSRendererHW::GetInstance()->BeginHLEHardwareDraw(tgt->m_texture, nullptr, tgt->m_scale, tgt->m_texture, tgt->m_scale, bbox); config.colormask.wrgba = (write_rg ? (1 | 2) : (4 | 8)); - config.ps.read_ba = read_ba; + config.ps.process_ba = read_ba ? 1 : 0; + config.ps.process_rg = !read_ba ? 1 : 0; + config.ps.process_ba = !write_rg ? 2 : 0; + config.ps.process_rg = write_rg ? 2 : 0; + config.ps.shuffle_across = true; config.ps.write_rg = write_rg; config.ps.shuffle = true; GSRendererHW::GetInstance()->EndHLEHardwareDraw(false); diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index 23ab9cb7b7..50ee6117c9 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1810,7 +1810,9 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME); - setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); + setFnConstantI(m_fn_constants, pssel.process_ba, GSMTLConstantIndex_PS_PROCESS_BA); + setFnConstantI(m_fn_constants, pssel.process_rg, GSMTLConstantIndex_PS_PROCESS_RG); + setFnConstantB(m_fn_constants, pssel.shuffle_across, GSMTLConstantIndex_PS_SHUFFLE_ACROSS); setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC); setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG); setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index 6907c325dd..d6aa9cecde 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -171,7 +171,9 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_LTF, GSMTLConstantIndex_PS_SHUFFLE, GSMTLConstantIndex_PS_SHUFFLE_SAME, - GSMTLConstantIndex_PS_READ_BA, + GSMTLConstantIndex_PS_PROCESS_BA, + GSMTLConstantIndex_PS_PROCESS_RG, + GSMTLConstantIndex_PS_SHUFFLE_ACROSS, GSMTLConstantIndex_PS_READ16_SRC, GSMTLConstantIndex_PS_WRITE_RG, GSMTLConstantIndex_PS_FBMASK, diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index ebe7d23bf7..265adb5b3c 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -7,6 +7,10 @@ constant uint FMT_32 = 0; constant uint FMT_24 = 1; constant uint FMT_16 = 2; +constant uint SHUFFLE_READ = 1; +constant uint SHUFFLE_WRITE = 2; +constant uint SHUFFLE_READWRITE = 3; + constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAMEBUFFER_FETCH)]]; constant bool FST [[function_constant(GSMTLConstantIndex_FST)]]; constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]]; @@ -30,7 +34,9 @@ constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_AD constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]]; constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]]; constant bool PS_SHUFFLE_SAME [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_SAME)]]; -constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]]; +constant uint PS_PROCESS_BA [[function_constant(GSMTLConstantIndex_PS_PROCESS_BA)]]; +constant uint PS_PROCESS_RG [[function_constant(GSMTLConstantIndex_PS_PROCESS_RG)]]; +constant bool PS_SHUFFLE_ACROSS [[function_constant(GSMTLConstantIndex_PS_SHUFFLE_ACROSS)]]; constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]]; constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]]; constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]]; @@ -825,10 +831,10 @@ struct PSMain else T = sample_color(st); - if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC) + if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_before = uint4(T); - if (PS_READ_BA) + if (PS_PROCESS_BA & SHUFFLE_READ) { T.r = float((denorm_c_before.b << 3) & 0xF8); T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); @@ -1097,10 +1103,10 @@ struct PSMain if (PS_SHUFFLE) { - if (!PS_SHUFFLE_SAME && !PS_READ16_SRC) + if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) { uint4 denorm_c_after = uint4(C); - if (PS_READ_BA) + if (PS_PROCESS_BA & SHUFFLE_READ) { C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); @@ -1118,30 +1124,62 @@ struct PSMain // Special case for 32bit input and 16bit output, shuffle used by The Godfather if (PS_SHUFFLE_SAME) { - if (PS_READ_BA) - C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80); + if (PS_PROCESS_BA & SHUFFLE_READ) + C = (denorm_c.b & 0x7F) | (denorm_c.a & 0x80); else C.ga = C.rg; } // Copy of a 16bit source in to this target else if (PS_READ16_SRC) { - C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5); + C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7) << 5); if (denorm_c.a & 0x80) C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80); else C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80); } - // Write RB part. Mask will take care of the correct destination - else if (PS_READ_BA) + else if (PS_SHUFFLE_ACROSS) { - C.rb = C.bb; - C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) + { + C.rb = C.br; + if ((denorm_c.a & 0x80) != 0) + C.g = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80); + else + C.g = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80); + + if ((denorm_c.g & 0x80) != 0) + C.a = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80); + else + C.a = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80); + } + else if(PS_PROCESS_BA & SHUFFLE_READ) + { + C.rb = C.bb; + if ((denorm_c.a & 0x80) != 0) + C.ga = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80); + else + C.ga = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80); + } + else + { + C.rb = C.rr; + if ((denorm_c.g & 0x80) != 0) + C.ga = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80); + else + C.ga = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80); + } } - else + else // Basically a direct copy but a shuffle of both pairs of channels, so green and alpha get modified by TEXA { - C.rb = C.rr; - C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + if ((denorm_c.g & 0x80) != 0) + C.g = (denorm_c.g & 0x7F) | (denorm_TA.y & 0x80); + else + C.g = (denorm_c.g & 0x7F) | (denorm_TA.x & 0x80); + if ((denorm_c.a & 0x80) != 0) + C.a = (denorm_c.a & 0x7F) | (denorm_TA.y & 0x80); + else + C.a = (denorm_c.a & 0x7F) | (denorm_TA.x & 0x80); } } diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 1449f793c4..96d80834e3 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1367,7 +1367,9 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_IIP {}\n", sel.iip) + fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle) + fmt::format("#define PS_SHUFFLE_SAME {}\n", sel.shuffle_same) - + fmt::format("#define PS_READ_BA {}\n", sel.read_ba) + + fmt::format("#define PS_PROCESS_BA {}\n", sel.process_ba) + + fmt::format("#define PS_PROCESS_RG {}\n", sel.process_rg) + + fmt::format("#define PS_SHUFFLE_ACROSS {}\n", sel.shuffle_across) + fmt::format("#define PS_READ16_SRC {}\n", sel.real16src) + fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg) + fmt::format("#define PS_FBMASK {}\n", sel.fbmask) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 96c107d97a..782aee3460 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -4813,7 +4813,9 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_IIP", sel.iip); AddMacro(ss, "PS_SHUFFLE", sel.shuffle); AddMacro(ss, "PS_SHUFFLE_SAME", sel.shuffle_same); - AddMacro(ss, "PS_READ_BA", sel.read_ba); + AddMacro(ss, "PS_PROCESS_BA", sel.process_ba); + AddMacro(ss, "PS_PROCESS_RG", sel.process_rg); + AddMacro(ss, "PS_SHUFFLE_ACROSS", sel.shuffle_across); AddMacro(ss, "PS_READ16_SRC", sel.real16src); AddMacro(ss, "PS_WRITE_RG", sel.write_rg); AddMacro(ss, "PS_FBMASK", sel.fbmask);