diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 3d50921e08..9d9a45105d 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -742,6 +742,25 @@ float4 ps_color(PS_INPUT input) float4 T = sample_color(st, input.t.w); #endif + if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC) + { + uint4 denorm_c_before = uint4(T); + if (PS_READ_BA) + { + T.r = float((denorm_c_before.b << 3) & 0xF8); + T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); + T.b = float((denorm_c_before.a << 1) & 0xF8); + T.a = float(denorm_c_before.a & 0x80); + } + else + { + T.r = float((denorm_c_before.r << 3) & 0xF8); + T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0)); + T.b = float((denorm_c_before.g << 1) & 0xF8); + T.a = float(denorm_c_before.g & 0x80); + } + } + float4 C = tfx(T, input.c); atst(C); @@ -925,48 +944,6 @@ PS_OUTPUT ps_main(PS_INPUT input) discard; } - if (PS_SHUFFLE) - { - uint4 denorm_c = uint4(C); - uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); - - // Special case for 32bit input and 16bit output, shuffle used by The Godfather - if (PS_SHUFFLE_SAME) - { - if (PS_READ_BA) - C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); - else - C.ga = C.rg; - } - // Copy of a 16bit source in to this target - else if (PS_READ16_SRC) - { - C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)); - if (denorm_c.a & 0x80u) - C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)); - else - C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)); - } - // Write RB part. Mask will take care of the correct destination - else if (PS_READ_BA) - { - C.rb = C.bb; - if (denorm_c.a & 0x80u) - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - } - else - { - C.rb = C.rr; - if (denorm_c.g & 0x80u) - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - - else - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - } - } - // Must be done before alpha correction // AA (Fixed one) will output a coverage of 1.0 as alpha @@ -1023,6 +1000,63 @@ PS_OUTPUT ps_main(PS_INPUT input) ps_blend(C, alpha_blend, input.p.xy); + if (PS_SHUFFLE) + { + if (!PS_SHUFFLE_SAME && !PS_READ16_SRC) + { + uint4 denorm_c_after = uint4(C); + if (PS_READ_BA) + { + C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + } + else + { + C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + } + } + + uint4 denorm_c = uint4(C); + uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); + + // Special case for 32bit input and 16bit output, shuffle used by The Godfather + if (PS_SHUFFLE_SAME) + { + if (PS_READ_BA) + C = (float4)(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); + else + C.ga = C.rg; + } + // Copy of a 16bit source in to this target + else if (PS_READ16_SRC) + { + C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)); + if (denorm_c.a & 0x80u) + C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)); + else + C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)); + } + // Write RB part. Mask will take care of the correct destination + else if (PS_READ_BA) + { + C.rb = C.bb; + if (denorm_c.a & 0x80u) + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + } + else + { + C.rb = C.rr; + if (denorm_c.g & 0x80u) + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + + else + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + } + } + ps_dither(C.rgb, input.p.xy); // Color clamp/wrap needs to be done after sw blending and dithering diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 96b5d0e56f..e12a4ae6d1 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -687,6 +687,21 @@ vec4 ps_color() vec4 T = sample_color(st); #endif + #if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC + uvec4 denorm_c_before = uvec4(T); + #if PS_READ_BA + T.r = float((denorm_c_before.b << 3) & 0xF8); + T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); + T.b = float((denorm_c_before.a << 1) & 0xF8); + T.a = float(denorm_c_before.a & 0x80); + #else + T.r = float((denorm_c_before.r << 3) & 0xF8); + T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0)); + T.b = float((denorm_c_before.g << 1) & 0xF8); + T.a = float(denorm_c_before.g & 0x80); + #endif + #endif + vec4 C = tfx(T, PSin.c); atst(C); @@ -937,7 +952,56 @@ void ps_main() vec4 C = ps_color(); + // Must be done before alpha correction + + // AA (Fixed one) will output a coverage of 1.0 as alpha +#if PS_FIXED_ONE_A + C.a = 128.0f; +#endif + +#if SW_AD_TO_HW + vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f); + vec4 alpha_blend = vec4(RT.a / 128.0f); +#else + vec4 alpha_blend = vec4(C.a / 128.0f); +#endif + + // Correct the ALPHA value based on the output format +#if (PS_DST_FMT == FMT_16) + float A_one = 128.0f; // alpha output will be 0x80 + C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; +#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0) + if(C.a < 128.0f) C.a += 128.0f; +#endif + + // Get first primitive that will write a failling alpha value +#if PS_DATE == 1 + // DATM == 0 + // Pixel with alpha equal to 1 will failed (128-255) + SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); + return; +#elif PS_DATE == 2 + // DATM == 1 + // Pixel with alpha equal to 0 will failed (0-127) + SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); + return; +#endif + + ps_blend(C, alpha_blend); + + #if PS_SHUFFLE + #if !PS_SHUFFLE_SAME && !PS_READ16_SRC + uvec4 denorm_c_after = uvec4(C); + #if PS_READ_BA + C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + #else + C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + #endif + #endif + uvec4 denorm_c = uvec4(C); uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); @@ -991,43 +1055,6 @@ void ps_main() #endif // PS_SHUFFLE_SAME #endif // PS_SHUFFLE - // Must be done before alpha correction - - // AA (Fixed one) will output a coverage of 1.0 as alpha -#if PS_FIXED_ONE_A - C.a = 128.0f; -#endif - -#if SW_AD_TO_HW - vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f); - vec4 alpha_blend = vec4(RT.a / 128.0f); -#else - vec4 alpha_blend = vec4(C.a / 128.0f); -#endif - - // Correct the ALPHA value based on the output format -#if (PS_DST_FMT == FMT_16) - float A_one = 128.0f; // alpha output will be 0x80 - C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; -#elif (PS_DST_FMT == FMT_32) && (PS_FBA != 0) - if(C.a < 128.0f) C.a += 128.0f; -#endif - - // Get first primitive that will write a failling alpha value -#if PS_DATE == 1 - // DATM == 0 - // Pixel with alpha equal to 1 will failed (128-255) - SV_Target0 = (C.a > 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); - return; -#elif PS_DATE == 2 - // DATM == 1 - // Pixel with alpha equal to 0 will failed (0-127) - SV_Target0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); - return; -#endif - - ps_blend(C, alpha_blend); - ps_dither(C.rgb); // Color clamp/wrap needs to be done after sw blending and dithering diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 921ac7e7b3..a40bd9535d 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -933,6 +933,21 @@ vec4 ps_color() vec4 T = sample_color(st); #endif + #if PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC + uvec4 denorm_c_before = uvec4(T); + #if PS_READ_BA + T.r = float((denorm_c_before.b << 3) & 0xF8); + T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); + T.b = float((denorm_c_before.a << 1) & 0xF8); + T.a = float(denorm_c_before.a & 0x80); + #else + T.r = float((denorm_c_before.r << 3) & 0xF8); + T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0)); + T.b = float((denorm_c_before.g << 1) & 0xF8); + T.a = float(denorm_c_before.g & 0x80); + #endif + #endif + vec4 C = tfx(T, vsIn.c); atst(C); @@ -1184,40 +1199,6 @@ void main() vec4 C = ps_color(); - #if PS_SHUFFLE - uvec4 denorm_c = uvec4(C); - uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); - - // Special case for 32bit input and 16bit output, shuffle used by The Godfather - #if PS_SHUFFLE_SAME - #if (PS_READ_BA) - C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); - #else - C.ga = C.rg; - #endif - // Copy of a 16bit source in to this target - #elif PS_READ16_SRC - C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5))); - if ((denorm_c.a & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); - // Write RB part. Mask will take care of the correct destination - #elif PS_READ_BA - C.rb = C.bb; - if ((denorm_c.a & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - #else - C.rb = C.rr; - if ((denorm_c.g & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - #endif // PS_SHUFFLE_SAME - #endif // PS_SHUFFLE - // Must be done before alpha correction // AA (Fixed one) will output a coverage of 1.0 as alpha @@ -1254,9 +1235,53 @@ void main() o_col0 = (C.a < 127.5f) ? vec4(gl_PrimitiveID) : vec4(0x7FFFFFFF); #else - ps_blend(C, alpha_blend); +#if PS_SHUFFLE + #if !PS_SHUFFLE_SAME && !PS_READ16_SRC + uvec4 denorm_c_after = uvec4(C); + #if PS_READ_BA + C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + #else + C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + #endif + #endif + + uvec4 denorm_c = uvec4(C); + uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); + + // Special case for 32bit input and 16bit output, shuffle used by The Godfather + #if PS_SHUFFLE_SAME + #if (PS_READ_BA) + C = vec4(float((denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80u))); + #else + C.ga = C.rg; + #endif + // Copy of a 16bit source in to this target + #elif PS_READ16_SRC + C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5))); + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); + // Write RB part. Mask will take care of the correct destination + #elif PS_READ_BA + C.rb = C.bb; + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + #else + C.rb = C.rr; + if ((denorm_c.g & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + #endif // PS_SHUFFLE_SAME + #endif // PS_SHUFFLE + ps_dither(C.rgb); // Color clamp/wrap needs to be done after sw blending and dithering diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 21b0dfb994..b0549ab01c 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5169,7 +5169,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta } bool blending_alpha_pass = false; - if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && (m_conf.colormask.wrgba & 0x7)) + if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle))) { EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass); } diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index c35a232d4d..0dc78180ad 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -807,6 +807,25 @@ struct PSMain else T = sample_color(st); + if (PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC) + { + uint4 denorm_c_before = uint4(T); + if (PS_READ_BA) + { + T.r = float((denorm_c_before.b << 3) & 0xF8); + T.g = float(((denorm_c_before.b >> 2) & 0x38) | ((denorm_c_before.a << 6) & 0xC0)); + T.b = float((denorm_c_before.a << 1) & 0xF8); + T.a = float(denorm_c_before.a & 0x80); + } + else + { + T.r = float((denorm_c_before.r << 3) & 0xF8); + T.g = float(((denorm_c_before.r >> 2) & 0x38) | ((denorm_c_before.g << 6) & 0xC0)); + T.b = float((denorm_c_before.g << 1) & 0xF8); + T.a = float(denorm_c_before.g & 0x80); + } + } + float4 C = tfx(T, IIP ? in.c : in.fc); if (!atst(C)) discard_fragment(); @@ -1005,41 +1024,6 @@ struct PSMain float4 C = ps_color(); - if (PS_SHUFFLE) - { - uint4 denorm_c = uint4(C); - uint2 denorm_TA = uint2(cb.ta * 255.5f); - - // Special case for 32bit input and 16bit output, shuffle used by The Godfather - if (PS_SHUFFLE_SAME) - { - if (PS_READ_BA) - C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80); - else - C.ga = C.rg; - } - // Copy of a 16bit source in to this target - else if (PS_READ16_SRC) - { - C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5); - if (denorm_c.a & 0x80) - C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80); - else - C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80); - } - // Write RB part. Mask will take care of the correct destination - else if (PS_READ_BA) - { - C.rb = C.bb; - C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); - } - else - { - C.rb = C.rr; - C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); - } - } - // Must be done before alpha correction // AA (Fixed one) will output a coverage of 1.0 as alpha @@ -1077,6 +1061,56 @@ struct PSMain ps_blend(C, alpha_blend); + if (PS_SHUFFLE) + { + if (!PS_SHUFFLE_SAME && !PS_READ16_SRC) + { + uint4 denorm_c_after = uint4(C); + if (PS_READ_BA) + { + C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.a = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + } + else + { + C.r = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); + C.g = float(((denorm_c_after.g >> 6) & 0x3) | ((denorm_c_after.b >> 1) & 0x7C) | (denorm_c_after.a & 0x80)); + } + } + + uint4 denorm_c = uint4(C); + uint2 denorm_TA = uint2(cb.ta * 255.5f); + + // Special case for 32bit input and 16bit output, shuffle used by The Godfather + if (PS_SHUFFLE_SAME) + { + if (PS_READ_BA) + C = (denorm_c.b & 0x7Fu) | (denorm_c.a & 0x80); + else + C.ga = C.rg; + } + // Copy of a 16bit source in to this target + else if (PS_READ16_SRC) + { + C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5); + if (denorm_c.a & 0x80) + C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80); + else + C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80); + } + // Write RB part. Mask will take care of the correct destination + else if (PS_READ_BA) + { + C.rb = C.bb; + C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + } + else + { + C.rb = C.rr; + C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80); + } + } + ps_dither(C); // Color clamp/wrap needs to be done after sw blending and dithering diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 456224d88f..f267c91147 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 37; +static constexpr u32 SHADER_CACHE_VERSION = 38;