From 6c9f132093a60e5bb891d35e9fda4d29e717e074 Mon Sep 17 00:00:00 2001 From: lightningterror <18107717+lightningterror@users.noreply.github.com> Date: Fri, 9 Feb 2024 06:49:03 +0100 Subject: [PATCH] GS/HW: Add support for Ad (RTA) correction. The idea is to adjust the alpha destination for more accurate hw blending which will work on all renderers. Old behavior has Ad in range within 0-1 whereas for blending 0-2 is needed. copy rt -> adjust the alpha -> copy back the adjusted alpha-> restore old alpha after blending is done --- bin/resources/shaders/dx11/convert.fx | 16 ++++++++ bin/resources/shaders/dx11/tfx.fx | 4 +- bin/resources/shaders/opengl/convert.glsl | 16 ++++++++ bin/resources/shaders/opengl/tfx_fs.glsl | 11 ++++-- bin/resources/shaders/vulkan/convert.glsl | 16 ++++++++ bin/resources/shaders/vulkan/tfx.glsl | 11 ++++-- pcsx2/GS/Renderers/Common/GSDevice.cpp | 2 + pcsx2/GS/Renderers/Common/GSDevice.h | 35 +++++++++-------- pcsx2/GS/Renderers/DX11/GSDevice11.cpp | 1 + pcsx2/GS/Renderers/DX12/GSDevice12.cpp | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 30 +++++++++++++-- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 40 ++++++++++++++++++++ pcsx2/GS/Renderers/HW/GSTextureCache.h | 4 ++ pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm | 3 ++ pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h | 1 + pcsx2/GS/Renderers/Metal/convert.metal | 12 ++++++ pcsx2/GS/Renderers/Metal/tfx.metal | 4 +- pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp | 1 + pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 1 + pcsx2/ShaderCacheVersion.h | 2 +- 21 files changed, 183 insertions(+), 30 deletions(-) diff --git a/bin/resources/shaders/dx11/convert.fx b/bin/resources/shaders/dx11/convert.fx index 8f8f751cb5..2681f65eb2 100644 --- a/bin/resources/shaders/dx11/convert.fx +++ b/bin/resources/shaders/dx11/convert.fx @@ -113,6 +113,22 @@ PS_OUTPUT ps_datm0(PS_INPUT input) return output; } +PS_OUTPUT ps_rta_correction(PS_INPUT input) +{ + PS_OUTPUT output; + float4 value = sample_c(input.t); + output.c = float4(value.rgb, (value.a * 255.0f) / 127.5f); + return output; +} + +PS_OUTPUT ps_rta_decorrection(PS_INPUT input) +{ + PS_OUTPUT output; + float4 value = sample_c(input.t); + output.c = float4(value.rgb, (value.a * 127.5f) / 255.0f); + return output; +} + PS_OUTPUT ps_hdr_init(PS_INPUT input) { PS_OUTPUT output; diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 75597446f1..9e55ef6c78 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -50,6 +50,7 @@ #define PS_TALES_OF_ABYSS_HLE 0 #define PS_URBAN_CHAOS_HLE 0 #define PS_HDR 0 +#define PS_RTA_CORRECTION 0 #define PS_COLCLIP 0 #define PS_BLEND_A 0 #define PS_BLEND_B 0 @@ -1078,7 +1079,8 @@ PS_OUTPUT ps_main(PS_INPUT input) ps_fbmask(C, input.p.xy); #if !PS_NO_COLOR - output.c0 = PS_HDR ? float4(C.rgb / 65535.0f, C.a / 255.0f) : C / 255.0f; + output.c0.a = PS_RTA_CORRECTION ? C.a / 128.0f : C.a / 255.0f; + output.c0.rgb = PS_HDR ? float3(C.rgb / 65535.0f) : C.rgb / 255.0f; #if !PS_NO_COLOR1 output.c1 = alpha_blend; #endif diff --git a/bin/resources/shaders/opengl/convert.glsl b/bin/resources/shaders/opengl/convert.glsl index 9ff42b99b4..c3d9a40be8 100644 --- a/bin/resources/shaders/opengl/convert.glsl +++ b/bin/resources/shaders/opengl/convert.glsl @@ -313,6 +313,22 @@ void ps_datm0() } #endif +#ifdef ps_rta_correction +void ps_rta_correction() +{ + vec4 value = sample_c(); + SV_Target0 = vec4(value.rgb, (value.a * 255.0f) / 127.5f); +} +#endif + +#ifdef ps_rta_decorrection +void ps_rta_decorrection() +{ + vec4 value = sample_c(); + SV_Target0 = vec4(value.rgb, (value.a * 127.5f) / 255.0f); +} +#endif + #ifdef ps_hdr_init void ps_hdr_init() { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index a19abe058c..b5467e3679 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1075,10 +1075,15 @@ void ps_main() ps_fbmask(C); #if !PS_NO_COLOR - #if PS_HDR == 1 - SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f); + #if PS_RTA_CORRECTION + SV_Target0.a = C.a / 128.0f; #else - SV_Target0 = C / 255.0f; + SV_Target0.a = C.a / 255.0f; + #endif + #if PS_HDR == 1 + SV_Target0.rgb = vec3(C.rgb / 65535.0f); + #else + SV_Target0.rgb = C.rgb / 255.0f; #endif #if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1 SV_Target1 = alpha_blend; diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index fc742b6b5f..b85f319865 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -92,6 +92,22 @@ void ps_datm0() } #endif +#ifdef ps_rta_correction +void ps_rta_correction() +{ + vec4 value = sample_c(v_tex); + o_col0 = vec4(value.rgb, (value.a * 255.0f) / 127.5f); +} +#endif + +#ifdef ps_rta_decorrection +void ps_rta_decorrection() +{ + vec4 value = sample_c(v_tex); + o_col0 = vec4(value.rgb, (value.a * 127.5f) / 255.0f); +} +#endif + #ifdef ps_hdr_init void ps_hdr_init() { diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index db28fdcc17..a8463d0c14 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1307,10 +1307,15 @@ void main() ps_fbmask(C); #if !PS_NO_COLOR - #if PS_HDR == 1 - o_col0 = vec4(C.rgb / 65535.0f, C.a / 255.0f); + #if PS_RTA_CORRECTION + o_col0.a = C.a / 128.0f; #else - o_col0 = C / 255.0f; + o_col0.a = C.a / 255.0f; + #endif + #if PS_HDR == 1 + o_col0.rgb = vec3(C.rgb / 65535.0f); + #else + o_col0.rgb = C.rgb / 255.0f; #endif #if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1 o_col1 = alpha_blend; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index 7fc883bafc..8e31b220a8 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -28,6 +28,8 @@ const char* shaderName(ShaderConvert value) case ShaderConvert::DATM_0: return "ps_datm0"; case ShaderConvert::HDR_INIT: return "ps_hdr_init"; case ShaderConvert::HDR_RESOLVE: return "ps_hdr_resolve"; + case ShaderConvert::RTA_CORRECTION: return "ps_rta_correction"; + case ShaderConvert::RTA_DECORRECTION: return "ps_rta_decorrection"; case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency"; case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits"; case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits"; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 576a17a28d..adbcbb0f60 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -21,6 +21,8 @@ enum class ShaderConvert DATM_0, HDR_INIT, HDR_RESOLVE, + RTA_CORRECTION, + RTA_DECORRECTION, TRANSPARENCY_FILTER, FLOAT32_TO_16_BITS, FLOAT32_TO_32_BITS, @@ -307,22 +309,23 @@ struct alignas(16) GSHWDrawConfig u32 fbmask : 1; // Blend and Colclip - u32 blend_a : 2; - u32 blend_b : 2; - u32 blend_c : 2; - u32 blend_d : 2; - u32 fixed_one_a : 1; - u32 blend_hw : 2; - u32 a_masked : 1; - u32 hdr : 1; - u32 colclip : 1; - u32 blend_mix : 2; - u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way - u32 pabe : 1; - u32 no_color : 1; // disables color output entirely (depth only) - u32 no_color1 : 1; // disables second color output (when unnecessary) - u32 no_ablend : 1; // output alpha blend in col0 (for no-DSB) - u32 only_alpha : 1; // don't bother computing RGB + u32 blend_a : 2; + u32 blend_b : 2; + u32 blend_c : 2; + u32 blend_d : 2; + u32 fixed_one_a : 1; + u32 blend_hw : 2; + u32 a_masked : 1; + u32 hdr : 1; + u32 rta_correction : 1; + u32 colclip : 1; + u32 blend_mix : 2; + u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way + u32 pabe : 1; + u32 no_color : 1; // disables color output entirely (depth only) + u32 no_color1 : 1; // disables second color output (when unnecessary) + u32 no_ablend : 1; // output alpha blend in col0 (for no-DSB) + u32 only_alpha : 1; // don't bother computing RGB // Others ways to fetch the texture u32 channel : 3; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index 8c8d62045c..dfda3905cf 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1688,6 +1688,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt); sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); sm.AddMacro("PS_HDR", sel.hdr); + sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); sm.AddMacro("PS_BLEND_B", sel.blend_b); diff --git a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp index 75cc3883f9..e07ea69eea 100644 --- a/pcsx2/GS/Renderers/DX12/GSDevice12.cpp +++ b/pcsx2/GS/Renderers/DX12/GSDevice12.cpp @@ -2814,6 +2814,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector& sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt); sm.AddMacro("PS_PAL_FMT", sel.pal_fmt); sm.AddMacro("PS_HDR", sel.hdr); + sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction); sm.AddMacro("PS_COLCLIP", sel.colclip); sm.AddMacro("PS_BLEND_A", sel.blend_a); sm.AddMacro("PS_BLEND_B", sel.blend_b); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6c013cae0e..2466510b96 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -3729,7 +3729,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool return true; } -void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass) +void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass, GSTextureCache::Target* rt) { { // AA1: Blending needs to be enabled on draw. @@ -3895,7 +3895,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked; // Blend can be done on hw. As and F cases should be accurate. - // BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 Cs > 0.5f will require sw blend. + // BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend. // BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw. const bool clr_blend = !!(blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2 | BLEND_HW_CLR3)); bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // Make sure it isn't an Ad case @@ -4137,6 +4137,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_a, m_conf.ps.blend_b, m_conf.ps.blend_c, m_conf.ps.blend_d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending); #endif + if (color_dest_blend) { // Blend output will be Cd, disable hw/sw blending. @@ -4300,6 +4301,16 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_b = 0; m_conf.ps.blend_d = 0; + // TODO: Make it work on DATE, switch to new shaders with Ad doubled. + const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || rt_alpha_max > 128; + const bool rta_correction = !rta_decorrection && !m_cached_ctx.TEST.DATE && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1; + if (rta_correction) + { + rt->RTACorrect(rt); + m_conf.ps.rta_correction = rt->m_rt_alpha_scale && m_conf.colormask.wa; + m_conf.rt = rt->m_texture; + } + // Care for hw blend value, 6 is for hw/sw, sw blending used. if (blend_flag & BLEND_HW_CLR1) { @@ -4312,7 +4323,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT m_conf.ps.blend_hw = 2; } - else if (blend_flag & BLEND_HW_CLR3) + else if (!rta_correction && (blend_flag & BLEND_HW_CLR3)) { m_conf.ps.blend_hw = 3; } @@ -5291,11 +5302,22 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta ds->m_alpha_min &= 128; } } + + { + const bool rta_decorrection = m_cached_ctx.TEST.DATE || m_channel_shuffle || m_texture_shuffle || blend_alpha_max > 128; + if (rt && rta_decorrection) + { + rt->RTADecorrect(rt); + m_conf.rt = rt->m_texture; + } + else if (rt) + m_conf.ps.rta_correction = rt->m_rt_alpha_scale && m_conf.colormask.wa; + } bool blending_alpha_pass = false; if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle))) { - EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass); + EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass, rt); } else { diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index f31c7e5400..109a510998 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -81,7 +81,7 @@ private: void SetupIA(float target_scale, float sx, float sy); void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex); bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only); - void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass); + void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass, GSTextureCache::Target* rt); void CleanupDraw(bool invalidate_temp_src); void EmulateTextureSampler(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds, diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index cdbe6aea8b..fc170872d3 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1689,6 +1689,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); } #endif + + if (dst && dst->m_rt_alpha_scale) + dst->RTADecorrect(dst); + src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region); if (!src) [[unlikely]] return nullptr; @@ -2645,6 +2649,42 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con return can_create ? CreateTarget(TEX0, size, size, scale, RenderTarget, true, 0, true) : nullptr; } +void GSTextureCache::Target::RTACorrect(Target* rt) +{ + if (!rt->m_rt_alpha_scale && rt->m_type == RenderTarget) + { + const GSVector2i rtsize(rt->m_texture->GetSize()); + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false)) + { + const GSVector4 dRect(rt->m_texture->GetRect()); + const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); + g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false); + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + g_gs_device->Recycle(rt->m_texture); + rt->m_texture = temp_rt; + rt->m_rt_alpha_scale = true; + } + } +} + +void GSTextureCache::Target::RTADecorrect(Target* rt) +{ + if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget) + { + const GSVector2i rtsize(rt->m_texture->GetSize()); + if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false)) + { + const GSVector4 dRect(rt->m_texture->GetRect()); + const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); + g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false); + g_perfmon.Put(GSPerfMon::TextureCopies, 1); + g_gs_device->Recycle(rt->m_texture); + rt->m_texture = temp_rt; + rt->m_rt_alpha_scale = false; + } + } +} + void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, int real_w, int real_h) { // This handles a case where you have two images stacked on top of one another (usually FMVs), and diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index fd60b9cb35..1012a235c3 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -216,6 +216,7 @@ public: bool m_valid_alpha_low = false; bool m_valid_alpha_high = false; bool m_valid_rgb = false; + bool m_rt_alpha_scale = false; bool m_is_frame = false; bool m_used = false; @@ -239,6 +240,9 @@ public: void ResizeValidity(const GSVector4i& rect); void UpdateValidity(const GSVector4i& rect, bool can_resize = true); + void RTACorrect(Target* rt); + void RTADecorrect(Target* rt); + void Update(); /// Updates the target, if the dirty area intersects with the specified rectangle. diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index bea5902754..d4a4430308 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -1105,6 +1105,8 @@ bool GSDeviceMTL::Create() break; case ShaderConvert::COPY: case ShaderConvert::RGBA_TO_8I: // Yes really + case ShaderConvert::RTA_CORRECTION: + case ShaderConvert::RTA_DECORRECTION: case ShaderConvert::TRANSPARENCY_FILTER: case ShaderConvert::FLOAT32_TO_RGBA8: case ShaderConvert::FLOAT32_TO_RGB8: @@ -1810,6 +1812,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW); setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED); setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR); + setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION); setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP); setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX); setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV); diff --git a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h index f763c0e4b7..ec96d08e21 100644 --- a/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h +++ b/pcsx2/GS/Renderers/Metal/GSMTLSharedHeader.h @@ -182,6 +182,7 @@ enum GSMTLFnConstants GSMTLConstantIndex_PS_BLEND_HW, GSMTLConstantIndex_PS_A_MASKED, GSMTLConstantIndex_PS_HDR, + GSMTLConstantIndex_PS_RTA_CORRECTION, GSMTLConstantIndex_PS_COLCLIP, GSMTLConstantIndex_PS_BLEND_MIX, GSMTLConstantIndex_PS_ROUND_INV, diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index 470e47bb81..dd4c28e896 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -104,6 +104,18 @@ fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn< return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX; } +fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn tex) +{ + float4 in = tex.read(p); + return float4(in.rgb, (in.a * 255.f) / 127.5f); +} + +fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn tex) +{ + float4 in = tex.read(p); + return float4(in.rgb, (in.a * 127.5f) / 255.f); +} + fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn tex) { float4 in = tex.read(p); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 1da2c753ba..2e7041aded 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -41,6 +41,7 @@ constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BL constant uint PS_BLEND_HW [[function_constant(GSMTLConstantIndex_PS_BLEND_HW)]]; constant bool PS_A_MASKED [[function_constant(GSMTLConstantIndex_PS_A_MASKED)]]; constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]]; +constant bool PS_RTA_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_CORRECTION)]]; constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]]; constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]]; constant bool PS_ROUND_INV [[function_constant(GSMTLConstantIndex_PS_ROUND_INV)]]; @@ -1130,7 +1131,8 @@ struct PSMain ps_fbmask(C); if (PS_COLOR0) - out.c0 = PS_HDR ? float4(C.rgb / 65535.f, C.a / 255.f) : C / 255.f; + out.c0.a = PS_RTA_CORRECTION ? C.a / 128.f : C.a / 255.f; + out.c0.rgb = PS_HDR ? float3(C.rgb / 65535.f) : C.rgb / 255.f; if (PS_COLOR0 && PS_ONLY_ALPHA) out.c0.rgb = 0; if (PS_COLOR1) diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 1bb4fd2575..3f3e6f5ea3 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -1372,6 +1372,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel) + fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg) + fmt::format("#define PS_FBMASK {}\n", sel.fbmask) + fmt::format("#define PS_HDR {}\n", sel.hdr) + + fmt::format("#define PS_RTA_CORRECTION {}\n", sel.rta_correction) + fmt::format("#define PS_DITHER {}\n", sel.dither) + fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust) + fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index a01607be92..1160b355c8 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -4787,6 +4787,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_WRITE_RG", sel.write_rg); AddMacro(ss, "PS_FBMASK", sel.fbmask); AddMacro(ss, "PS_HDR", sel.hdr); + AddMacro(ss, "PS_RTA_CORRECTION", sel.rta_correction); AddMacro(ss, "PS_DITHER", sel.dither); AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust); AddMacro(ss, "PS_ZCLAMP", sel.zclamp); diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index 582eac275c..249eb690d0 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 41; +static constexpr u32 SHADER_CACHE_VERSION = 42;