diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx index 300708df4d..b07ae04d7f 100644 --- a/bin/resources/shaders/dx11/convert.fx +++ b/bin/resources/shaders/dx11/convert.fx @@ -139,7 +139,7 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); - output.c = float4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f); + output.c = float4(value.rgb, value.a / (128.5f / 255.0f)); return output; } @@ -147,7 +147,7 @@ PS_OUTPUT ps_rta_decorrection(PS_INPUT input) { PS_OUTPUT output; float4 value = sample_c(input.t); - output.c = float4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f); + output.c = float4(value.rgb, value.a * (128.5f / 255.0f)); return output; } diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 6713fa28f9..c6fdc3b170 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -51,6 +51,7 @@ #define PS_URBAN_CHAOS_HLE 0 #define PS_HDR 0 #define PS_RTA_CORRECTION 0 +#define PS_RTA_SRC_CORRECTION 0 #define PS_COLCLIP 0 #define PS_BLEND_A 0 #define PS_BLEND_B 0 @@ -330,7 +331,16 @@ uint4 sample_4_index(float4 uv, float uv_w) c.w = sample_c(uv.zw, uv_w).a; // Denormalize value - uint4 i = uint4(c * 255.5f); + uint4 i; + + if (PS_RTA_SRC_CORRECTION) + { + i = uint4(c * 128.25f); // Denormalize value + } + else + { + i = uint4(c * 255.5f); // Denormalize value + } if (PS_PAL_FMT == 1) { @@ -650,6 +660,9 @@ float4 sample_color(float2 st, float uv_w) t = c[0]; } + if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION) + t.a = t.a * (128.5f / 255.0f); + return trunc(t * 255.0f + 0.05f); } @@ -850,7 +863,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy) float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f; - float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 127.5f + 0.05f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f; + float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f; float3 Cd = trunc(RT.rgb * 255.0f + 0.1f); float3 Cs = Color.rgb; @@ -968,7 +981,7 @@ PS_OUTPUT ps_main(PS_INPUT input) float4 alpha_blend = (float4)0.0f; if (SW_AD_TO_HW) { - float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 127.5f + 0.05f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f); + float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + 0.1f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f); alpha_blend = (float4)(RT.a / 128.0f); } else diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl index 28c55955d6..f740d0b1fa 100644 --- a/bin/resources/shaders/opengl/convert.glsl +++ b/bin/resources/shaders/opengl/convert.glsl @@ -337,7 +337,7 @@ void ps_datm0_rta_correction() void ps_rta_correction() { vec4 value = sample_c(); - SV_Target0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f); + SV_Target0 = vec4(value.rgb, value.a / (128.5f / 255.0f)); } #endif @@ -345,7 +345,7 @@ void ps_rta_correction() void ps_rta_decorrection() { vec4 value = sample_c(); - SV_Target0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f); + SV_Target0 = vec4(value.rgb, value.a * (128.5f / 255.0f)); } #endif diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index fe0ccd2d1a..304d2e5d4c 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -281,8 +281,12 @@ uvec4 sample_4_index(vec4 uv) c.y = sample_c(uv.zy).a; c.z = sample_c(uv.xw).a; c.w = sample_c(uv.zw).a; - + +#if PS_RTA_SRC_CORRECTION + uvec4 i = uvec4(c * 128.25f); // Denormalize value +#else uvec4 i = uvec4(c * 255.5f); // Denormalize value +#endif #if PS_PAL_FMT == 1 // 4HL @@ -591,6 +595,10 @@ vec4 sample_color(vec2 st) t = c[0]; #endif +#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION + t.a = t.a * (128.5f / 255.0f); +#endif + // The 0.05f helps to fix the overbloom of sotc // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) // interpolation could be slightly below the correct one. @@ -803,7 +811,7 @@ float As = As_rgba.a; #endif #if PS_RTA_CORRECTION - float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f; + float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f; #else float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; #endif @@ -984,7 +992,7 @@ void ps_main() #if SW_AD_TO_HW #if PS_RTA_CORRECTION - vec4 RT = trunc(fetch_rt() * 127.5f + 0.05f); + vec4 RT = trunc(fetch_rt() * 128.0f + 0.1f); #else vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f); #endif diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index ebcdd8125f..d4bb979a02 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -114,7 +114,7 @@ void ps_datm0_rta_correction() void ps_rta_correction() { vec4 value = sample_c(v_tex); - o_col0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f); + o_col0 = vec4(value.rgb, value.a / (128.5f / 255.0f)); } #endif @@ -122,7 +122,7 @@ void ps_rta_correction() void ps_rta_decorrection() { vec4 value = sample_c(v_tex); - o_col0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f); + o_col0 = vec4(value.rgb, value.a * (128.5f / 255.0f)); } #endif diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 79ce3de6c0..1b319daa72 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -523,7 +523,12 @@ uvec4 sample_4_index(vec4 uv) c.w = sample_c(uv.zw).a; // Denormalize value + +#if PS_RTA_SRC_CORRECTION + uvec4 i = uvec4(c * 128.25f); +#else uvec4 i = uvec4(c * 255.5f); +#endif #if PS_PAL_FMT == 1 // 4HL @@ -835,7 +840,9 @@ vec4 sample_color(vec2 st) t = c[0]; } #endif - +#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION + t.a = t.a * (128.5f / 255.0f); +#endif return trunc(t * 255.0f + 0.05f); } @@ -1056,7 +1063,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba) #endif #if PS_RTA_CORRECTION - float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f; + float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f; #else float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; #endif @@ -1235,7 +1242,7 @@ void main() #if SW_AD_TO_HW #if PS_RTA_CORRECTION - vec4 RT = trunc(sample_from_rt() * 127.5f + 0.05f); + vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f); #else vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f); #endif diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 9f76181e75..a0fa004746 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -331,6 +331,7 @@ struct alignas(16) GSHWDrawConfig u32 a_masked : 1; u32 hdr : 1; u32 rta_correction : 1; + u32 rta_source_correction : 1; u32 colclip : 1; u32 blend_mix : 2; u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index b4d0bb16b3..016cafe5b7 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1689,6 +1689,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); sm.AddMacro("PS_HDR", sel.hdr); sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction); + sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); sm.AddMacro("PS_BLEND_B", sel.blend_b); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index a03fde9b88..206a6a8046 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -2817,6 +2817,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); sm.AddMacro("PS_HDR", sel.hdr); sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction); + sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); sm.AddMacro("PS_BLEND_B", sel.blend_b); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 1b98e8a843..30cb22234c 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -4397,6 +4397,9 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, float scale = tex->GetScale(); HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy); + if (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale) + m_conf.ps.rta_source_correction = 1; + // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; @@ -5304,7 +5307,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta ds->m_alpha_min &= 128; } } - + + // If we Correct/Decorrect and tex is rt, we will need to update the texture reference + const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture; + if (rt) { const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || std::max(blend_alpha_max, rt->m_alpha_max) > 128 || m_conf.ps.fbmask || m_conf.ps.tex_is_fb; @@ -5317,6 +5323,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; + + if (req_src_update) + tex->m_texture = rt->m_texture; } else if (m_conf.colormask.wa) { @@ -5328,6 +5337,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; + + if (req_src_update) + tex->m_texture = rt->m_texture; } } } @@ -5337,6 +5349,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; + + if (req_src_update) + tex->m_texture = rt->m_texture; } } else if (rt->m_last_draw == s_n) @@ -5347,6 +5362,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta { rt->RTADecorrect(rt); m_conf.rt = rt->m_texture; + + if (req_src_update) + tex->m_texture = rt->m_texture; } } @@ -5357,6 +5375,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle))) { EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass, rt); + + if (req_src_update && tex->m_texture != rt->m_texture) + tex->m_texture = rt->m_texture; } else { diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 064ebf6c62..5a2d222730 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1690,9 +1690,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } #endif - if (dst && (GSUtil::GetChannelMask(TEX0.PSM) & 0x8)) - dst->RTADecorrect(dst); - src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region); if (!src) [[unlikely]] return nullptr; @@ -4299,7 +4296,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); if (!area.rempty()) - g_gs_device->CopyRect(sTex, dTex, area, 0, 0); + { + if (dst->m_rt_alpha_scale) + { + const GSVector4 sRectF = GSVector4(area) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight()); + g_gs_device->StretchRect( + sTex, sRectF, dTex, GSVector4(area), ShaderConvert::RTA_DECORRECTION, false); + } + else + g_gs_device->CopyRect(sTex, dTex, area, 0, 0); + } src->m_texture = dTex; src->m_unscaled_size = GSVector2i(tw, th); @@ -4317,6 +4323,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_region.SetY(y_offset, region.GetMaxY() + y_offset); else src->m_region.SetY(y_offset, y_offset + th); + + src->m_target_direct = true; src->m_texture = dst->m_texture; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; @@ -4554,6 +4562,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { // sample the target directly src->m_texture = dst->m_texture; + src->m_target_direct = true; src->m_scale = dst->m_scale; src->m_unscaled_size = dst->m_unscaled_size; src->m_shared_texture = true; @@ -4600,7 +4609,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (use_texture) { - g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY); + if (dst->m_rt_alpha_scale) + { + const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight()); + g_gs_device->StretchRect( + sTex, sRectF, dTex, GSVector4(destX, destY, sRect.width(), sRect.height()), ShaderConvert::RTA_DECORRECTION, false); + } + else + g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY); g_perfmon.Put(GSPerfMon::TextureCopies, 1); #ifdef PCSX2_DEVBUILD @@ -4615,12 +4631,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { if (is_8bits) { + if (dst->m_rt_alpha_scale) + { + dst->RTADecorrect(dst); + sTex = dst->m_texture; + } + g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); } else { + if (dst->m_rt_alpha_scale && shader == ShaderConvert::COPY) + shader = ShaderConvert::RTA_DECORRECTION; + const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight()); g_gs_device->StretchRect( sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index da8e54fa82..332192cf97 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -283,6 +283,7 @@ public: u8 m_valid_hashes = 0; u8 m_complete_layers = 0; bool m_target = false; + bool m_target_direct = false; bool m_repeating = false; std::pair m_alpha_minmax = {0u, 255u}; std::vector* m_p2t = nullptr; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index b10960d50d..63a286403d 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1789,60 +1789,61 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr } else { - setFnConstantB(m_fn_constants, pssel.fst, GSMTLConstantIndex_FST); - setFnConstantB(m_fn_constants, pssel.iip, GSMTLConstantIndex_IIP); - setFnConstantI(m_fn_constants, pssel.aem_fmt, GSMTLConstantIndex_PS_AEM_FMT); - setFnConstantI(m_fn_constants, pssel.pal_fmt, GSMTLConstantIndex_PS_PAL_FMT); - setFnConstantI(m_fn_constants, pssel.dst_fmt, GSMTLConstantIndex_PS_DST_FMT); - setFnConstantI(m_fn_constants, pssel.depth_fmt, GSMTLConstantIndex_PS_DEPTH_FMT); - setFnConstantB(m_fn_constants, pssel.aem, GSMTLConstantIndex_PS_AEM); - setFnConstantB(m_fn_constants, pssel.fba, GSMTLConstantIndex_PS_FBA); - setFnConstantB(m_fn_constants, pssel.fog, GSMTLConstantIndex_PS_FOG); - setFnConstantI(m_fn_constants, pssel.date, GSMTLConstantIndex_PS_DATE); - setFnConstantI(m_fn_constants, pssel.atst, GSMTLConstantIndex_PS_ATST); - setFnConstantI(m_fn_constants, pssel.tfx, GSMTLConstantIndex_PS_TFX); - setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC); - setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS); - setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT); - setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS); - setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT); - setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); - setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); - setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME); - setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); - setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC); - setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG); - setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK); - setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A); - setFnConstantI(m_fn_constants, pssel.blend_b, GSMTLConstantIndex_PS_BLEND_B); - setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C); - setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D); - setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW); - setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED); - setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR); - setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION); - setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP); - setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX); - setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV); - setFnConstantB(m_fn_constants, pssel.fixed_one_a, GSMTLConstantIndex_PS_FIXED_ONE_A); - setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE); - setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR); - setFnConstantB(m_fn_constants, pssel.no_color1, GSMTLConstantIndex_PS_NO_COLOR1); + setFnConstantB(m_fn_constants, pssel.fst, GSMTLConstantIndex_FST); + setFnConstantB(m_fn_constants, pssel.iip, GSMTLConstantIndex_IIP); + setFnConstantI(m_fn_constants, pssel.aem_fmt, GSMTLConstantIndex_PS_AEM_FMT); + setFnConstantI(m_fn_constants, pssel.pal_fmt, GSMTLConstantIndex_PS_PAL_FMT); + setFnConstantI(m_fn_constants, pssel.dst_fmt, GSMTLConstantIndex_PS_DST_FMT); + setFnConstantI(m_fn_constants, pssel.depth_fmt, GSMTLConstantIndex_PS_DEPTH_FMT); + setFnConstantB(m_fn_constants, pssel.aem, GSMTLConstantIndex_PS_AEM); + setFnConstantB(m_fn_constants, pssel.fba, GSMTLConstantIndex_PS_FBA); + setFnConstantB(m_fn_constants, pssel.fog, GSMTLConstantIndex_PS_FOG); + setFnConstantI(m_fn_constants, pssel.date, GSMTLConstantIndex_PS_DATE); + setFnConstantI(m_fn_constants, pssel.atst, GSMTLConstantIndex_PS_ATST); + setFnConstantI(m_fn_constants, pssel.tfx, GSMTLConstantIndex_PS_TFX); + setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC); + setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS); + setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT); + setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS); + setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT); + setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF); + setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE); + setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME); + setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA); + setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC); + setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG); + setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK); + setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A); + setFnConstantI(m_fn_constants, pssel.blend_b, GSMTLConstantIndex_PS_BLEND_B); + setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C); + setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D); + setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW); + setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED); + setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR); + setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION); + setFnConstantB(m_fn_constants, pssel.rta_source_correction, GSMTLConstantIndex_PS_RTA_SRC_CORRECTION); + setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP); + setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX); + setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV); + setFnConstantB(m_fn_constants, pssel.fixed_one_a, GSMTLConstantIndex_PS_FIXED_ONE_A); + setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE); + setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR); + setFnConstantB(m_fn_constants, pssel.no_color1, GSMTLConstantIndex_PS_NO_COLOR1); // no_ablend ignored for now (No Metal driver has had DSB so broken that it's needed to be disabled, though Intel's was pretty close) - setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA); - setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL); - setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER); - setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST); - setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP); - setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK); - setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE); - setFnConstantB(m_fn_constants, pssel.tales_of_abyss_hle, GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE); - setFnConstantB(m_fn_constants, pssel.tex_is_fb, GSMTLConstantIndex_PS_TEX_IS_FB); - setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD); - setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD); - setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER); - setFnConstantB(m_fn_constants, pssel.region_rect, GSMTLConstantIndex_PS_REGION_RECT); - setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK); + setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA); + setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL); + setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER); + setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST); + setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP); + setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK); + setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE); + setFnConstantB(m_fn_constants, pssel.tales_of_abyss_hle, GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE); + setFnConstantB(m_fn_constants, pssel.tex_is_fb, GSMTLConstantIndex_PS_TEX_IS_FB); + setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD); + setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD); + setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER); + setFnConstantB(m_fn_constants, pssel.region_rect, GSMTLConstantIndex_PS_REGION_RECT); + setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK); auto newps = LoadShader(@"ps_main"); ps = newps; m_hw_ps.insert(std::make_pair(pssel, std::move(newps))); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index ec96d08e21..6907c325dd 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -183,6 +183,7 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_A_MASKED, GSMTLConstantIndex_PS_HDR, GSMTLConstantIndex_PS_RTA_CORRECTION, + GSMTLConstantIndex_PS_RTA_SRC_CORRECTION, GSMTLConstantIndex_PS_COLCLIP, GSMTLConstantIndex_PS_BLEND_MIX, GSMTLConstantIndex_PS_ROUND_INV, diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index 25bfa98834..617f14410c 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -129,13 +129,13 @@ fragment float4 ps_primid_rta_init_datm0(float4 p [[position]], DirectReadTextur fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn tex) { float4 in = tex.read(p); - return float4(in.rgb, (in.a * 255.f + 0.1f) / 127.5f); + return float4(in.rgb, in.a / (128.5f / 255.0f)); } fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn tex) { float4 in = tex.read(p); - return float4(in.rgb, (in.a * 127.5f + 0.1f) / 255.f); + return float4(in.rgb, in.a * (128.5f / 255.0f)); } fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn tex) diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index b1280f677e..ebe7d23bf7 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -42,6 +42,7 @@ constant uint PS_BLEND_HW [[function_constant(GSMTLConstantIndex_PS_BL constant bool PS_A_MASKED [[function_constant(GSMTLConstantIndex_PS_A_MASKED)]]; constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]]; constant bool PS_RTA_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_CORRECTION)]]; +constant bool PS_RTA_SRC_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_SRC_CORRECTION)]]; constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]]; constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]]; constant bool PS_ROUND_INV [[function_constant(GSMTLConstantIndex_PS_ROUND_INV)]]; @@ -480,9 +481,18 @@ struct PSMain c.y = sample_c(uv.zy).a; c.z = sample_c(uv.xw).a; c.w = sample_c(uv.zw).a; - - uint4 i = uint4(c * 255.5f); // Denormalize value - + + uint4 i; + + if (PS_RTA_SRC_CORRECTION) + { + i = uint4(c * 128.25f); // Denormalize value + } + else + { + i = uint4(c * 255.5f); // Denormalize value + } + if (PS_PAL_FMT == 1) return i & 0xF; if (PS_PAL_FMT == 2) @@ -714,9 +724,13 @@ struct PSMain else t = c[0]; + if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION) + t.a = t.a * (128.5f / 255.0f); + // The 0.05f helps to fix the overbloom of sotc // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) // interpolation could be slightly below the correct one. + return trunc(t * 255.f + 0.05f); } @@ -915,7 +929,7 @@ struct PSMain return; } - float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 127.75f) / 128.f : trunc(current_color.a * 255.5f) / 128.f; + float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f; float3 Cd = trunc(current_color.rgb * 255.5f); float3 Cs = Color.rgb; @@ -1047,7 +1061,7 @@ struct PSMain float4 alpha_blend = float4(0.f); if (SW_AD_TO_HW) { - alpha_blend = PS_RTA_CORRECTION ? float4(trunc(current_color.a * 127.75f) / 128.f) : float4(trunc(current_color.a * 255.5f) / 128.f); + alpha_blend = PS_RTA_CORRECTION ? float4(trunc(current_color.a * 128.f) / 128.f) : float4(trunc(current_color.a * 255.5f) / 128.f); } else { diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index dc2ff65654..06ee84a16c 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1373,6 +1373,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_FBMASK {}\n", sel.fbmask) + fmt::format("#define PS_HDR {}\n", sel.hdr) + fmt::format("#define PS_RTA_CORRECTION {}\n", sel.rta_correction) + + fmt::format("#define PS_RTA_SRC_CORRECTION {}\n", sel.rta_source_correction) + fmt::format("#define PS_DITHER {}\n", sel.dither) + fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust) + fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index f47115e36f..736b94a57e 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -4791,6 +4791,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_FBMASK", sel.fbmask); AddMacro(ss, "PS_HDR", sel.hdr); AddMacro(ss, "PS_RTA_CORRECTION", sel.rta_correction); + AddMacro(ss, "PS_RTA_SRC_CORRECTION", sel.rta_source_correction); AddMacro(ss, "PS_DITHER", sel.dither); AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust); AddMacro(ss, "PS_ZCLAMP", sel.zclamp);