GS/HW: Add support for Ad (RTA) correction.

The idea is to adjust the alpha destination for more
accurate hw blending which will work on all renderers.

Old behavior has Ad in range within 0-1 whereas for blending 0-2 is needed.

copy rt -> adjust the alpha -> copy back the adjusted alpha-> restore old alpha after blending is done
This commit is contained in:
lightningterror 2024-02-09 06:49:03 +01:00
parent 3b7ad788bf
commit 6c9f132093
21 changed files with 183 additions and 30 deletions

View File

@ -113,6 +113,22 @@ PS_OUTPUT ps_datm0(PS_INPUT input)
return output;
}
PS_OUTPUT ps_rta_correction(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 255.0f) / 127.5f);
return output;
}
PS_OUTPUT ps_rta_decorrection(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 127.5f) / 255.0f);
return output;
}
PS_OUTPUT ps_hdr_init(PS_INPUT input)
{
PS_OUTPUT output;

View File

@ -50,6 +50,7 @@
#define PS_TALES_OF_ABYSS_HLE 0
#define PS_URBAN_CHAOS_HLE 0
#define PS_HDR 0
#define PS_RTA_CORRECTION 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
#define PS_BLEND_B 0
@ -1078,7 +1079,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
ps_fbmask(C, input.p.xy);
#if !PS_NO_COLOR
output.c0 = PS_HDR ? float4(C.rgb / 65535.0f, C.a / 255.0f) : C / 255.0f;
output.c0.a = PS_RTA_CORRECTION ? C.a / 128.0f : C.a / 255.0f;
output.c0.rgb = PS_HDR ? float3(C.rgb / 65535.0f) : C.rgb / 255.0f;
#if !PS_NO_COLOR1
output.c1 = alpha_blend;
#endif

View File

@ -313,6 +313,22 @@ void ps_datm0()
}
#endif
#ifdef ps_rta_correction
void ps_rta_correction()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 255.0f) / 127.5f);
}
#endif
#ifdef ps_rta_decorrection
void ps_rta_decorrection()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 127.5f) / 255.0f);
}
#endif
#ifdef ps_hdr_init
void ps_hdr_init()
{

View File

@ -1075,10 +1075,15 @@ void ps_main()
ps_fbmask(C);
#if !PS_NO_COLOR
#if PS_HDR == 1
SV_Target0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
#if PS_RTA_CORRECTION
SV_Target0.a = C.a / 128.0f;
#else
SV_Target0 = C / 255.0f;
SV_Target0.a = C.a / 255.0f;
#endif
#if PS_HDR == 1
SV_Target0.rgb = vec3(C.rgb / 65535.0f);
#else
SV_Target0.rgb = C.rgb / 255.0f;
#endif
#if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1
SV_Target1 = alpha_blend;

View File

@ -92,6 +92,22 @@ void ps_datm0()
}
#endif
#ifdef ps_rta_correction
void ps_rta_correction()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 255.0f) / 127.5f);
}
#endif
#ifdef ps_rta_decorrection
void ps_rta_decorrection()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 127.5f) / 255.0f);
}
#endif
#ifdef ps_hdr_init
void ps_hdr_init()
{

View File

@ -1307,10 +1307,15 @@ void main()
ps_fbmask(C);
#if !PS_NO_COLOR
#if PS_HDR == 1
o_col0 = vec4(C.rgb / 65535.0f, C.a / 255.0f);
#if PS_RTA_CORRECTION
o_col0.a = C.a / 128.0f;
#else
o_col0 = C / 255.0f;
o_col0.a = C.a / 255.0f;
#endif
#if PS_HDR == 1
o_col0.rgb = vec3(C.rgb / 65535.0f);
#else
o_col0.rgb = C.rgb / 255.0f;
#endif
#if !defined(DISABLE_DUAL_SOURCE) && !PS_NO_COLOR1
o_col1 = alpha_blend;

View File

@ -28,6 +28,8 @@ const char* shaderName(ShaderConvert value)
case ShaderConvert::DATM_0: return "ps_datm0";
case ShaderConvert::HDR_INIT: return "ps_hdr_init";
case ShaderConvert::HDR_RESOLVE: return "ps_hdr_resolve";
case ShaderConvert::RTA_CORRECTION: return "ps_rta_correction";
case ShaderConvert::RTA_DECORRECTION: return "ps_rta_decorrection";
case ShaderConvert::TRANSPARENCY_FILTER: return "ps_filter_transparency";
case ShaderConvert::FLOAT32_TO_16_BITS: return "ps_convert_float32_32bits";
case ShaderConvert::FLOAT32_TO_32_BITS: return "ps_convert_float32_32bits";

View File

@ -21,6 +21,8 @@ enum class ShaderConvert
DATM_0,
HDR_INIT,
HDR_RESOLVE,
RTA_CORRECTION,
RTA_DECORRECTION,
TRANSPARENCY_FILTER,
FLOAT32_TO_16_BITS,
FLOAT32_TO_32_BITS,
@ -307,22 +309,23 @@ struct alignas(16) GSHWDrawConfig
u32 fbmask : 1;
// Blend and Colclip
u32 blend_a : 2;
u32 blend_b : 2;
u32 blend_c : 2;
u32 blend_d : 2;
u32 fixed_one_a : 1;
u32 blend_hw : 2;
u32 a_masked : 1;
u32 hdr : 1;
u32 colclip : 1;
u32 blend_mix : 2;
u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way
u32 pabe : 1;
u32 no_color : 1; // disables color output entirely (depth only)
u32 no_color1 : 1; // disables second color output (when unnecessary)
u32 no_ablend : 1; // output alpha blend in col0 (for no-DSB)
u32 only_alpha : 1; // don't bother computing RGB
u32 blend_a : 2;
u32 blend_b : 2;
u32 blend_c : 2;
u32 blend_d : 2;
u32 fixed_one_a : 1;
u32 blend_hw : 2;
u32 a_masked : 1;
u32 hdr : 1;
u32 rta_correction : 1;
u32 colclip : 1;
u32 blend_mix : 2;
u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way
u32 pabe : 1;
u32 no_color : 1; // disables color output entirely (depth only)
u32 no_color1 : 1; // disables second color output (when unnecessary)
u32 no_ablend : 1; // output alpha blend in col0 (for no-DSB)
u32 only_alpha : 1; // don't bother computing RGB
// Others ways to fetch the texture
u32 channel : 3;

View File

@ -1688,6 +1688,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);

View File

@ -2814,6 +2814,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_DEPTH_FMT", sel.depth_fmt);
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);

View File

@ -3729,7 +3729,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool
return true;
}
void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass)
void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass, GSTextureCache::Target* rt)
{
{
// AA1: Blending needs to be enabled on draw.
@ -3895,7 +3895,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
const bool one_barrier = m_conf.require_one_barrier || blend_ad_alpha_masked;
// Blend can be done on hw. As and F cases should be accurate.
// BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 Cs > 0.5f will require sw blend.
// BLEND_HW_CLR1 with Ad, BLEND_HW_CLR3 might require sw blend.
// BLEND_HW_CLR1 with As/F and BLEND_HW_CLR2 can be done in hw.
const bool clr_blend = !!(blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2 | BLEND_HW_CLR3));
bool clr_blend1_2 = (blend_flag & (BLEND_HW_CLR1 | BLEND_HW_CLR2)) && (m_conf.ps.blend_c != 1) // Make sure it isn't an Ad case
@ -4137,6 +4137,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_a, m_conf.ps.blend_b, m_conf.ps.blend_c, m_conf.ps.blend_d,
m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, m_drawlist.size(), sw_blending);
#endif
if (color_dest_blend)
{
// Blend output will be Cd, disable hw/sw blending.
@ -4300,6 +4301,16 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_b = 0;
m_conf.ps.blend_d = 0;
// TODO: Make it work on DATE, switch to new shaders with Ad doubled.
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || rt_alpha_max > 128;
const bool rta_correction = !rta_decorrection && !m_cached_ctx.TEST.DATE && !blend_ad_alpha_masked && m_conf.ps.blend_c == 1;
if (rta_correction)
{
rt->RTACorrect(rt);
m_conf.ps.rta_correction = rt->m_rt_alpha_scale && m_conf.colormask.wa;
m_conf.rt = rt->m_texture;
}
// Care for hw blend value, 6 is for hw/sw, sw blending used.
if (blend_flag & BLEND_HW_CLR1)
{
@ -4312,7 +4323,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
m_conf.ps.blend_hw = 2;
}
else if (blend_flag & BLEND_HW_CLR3)
else if (!rta_correction && (blend_flag & BLEND_HW_CLR3))
{
m_conf.ps.blend_hw = 3;
}
@ -5291,11 +5302,22 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
ds->m_alpha_min &= 128;
}
}
{
const bool rta_decorrection = m_cached_ctx.TEST.DATE || m_channel_shuffle || m_texture_shuffle || blend_alpha_max > 128;
if (rt && rta_decorrection)
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
}
else if (rt)
m_conf.ps.rta_correction = rt->m_rt_alpha_scale && m_conf.colormask.wa;
}
bool blending_alpha_pass = false;
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
{
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass);
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass, rt);
}
else
{

View File

@ -81,7 +81,7 @@ private:
void SetupIA(float target_scale, float sx, float sy);
void EmulateTextureShuffleAndFbmask(GSTextureCache::Target* rt, GSTextureCache::Source* tex);
bool EmulateChannelShuffle(GSTextureCache::Target* src, bool test_only);
void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass);
void EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DATE_PRIMID, bool& DATE_BARRIER, bool& blending_alpha_pass, GSTextureCache::Target* rt);
void CleanupDraw(bool invalidate_temp_src);
void EmulateTextureSampler(const GSTextureCache::Target* rt, const GSTextureCache::Target* ds,

View File

@ -1689,6 +1689,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
}
#endif
if (dst && dst->m_rt_alpha_scale)
dst->RTADecorrect(dst);
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region);
if (!src) [[unlikely]]
return nullptr;
@ -2645,6 +2649,42 @@ GSTextureCache::Target* GSTextureCache::LookupDisplayTarget(GIFRegTEX0 TEX0, con
return can_create ? CreateTarget(TEX0, size, size, scale, RenderTarget, true, 0, true) : nullptr;
}
void GSTextureCache::Target::RTACorrect(Target* rt)
{
if (!rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{
const GSVector2i rtsize(rt->m_texture->GetSize());
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false))
{
const GSVector4 dRect(rt->m_texture->GetRect());
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_CORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(rt->m_texture);
rt->m_texture = temp_rt;
rt->m_rt_alpha_scale = true;
}
}
}
void GSTextureCache::Target::RTADecorrect(Target* rt)
{
if (rt->m_rt_alpha_scale && rt->m_type == RenderTarget)
{
const GSVector2i rtsize(rt->m_texture->GetSize());
if (GSTexture* temp_rt = g_gs_device->CreateRenderTarget(rtsize.x, rtsize.y, GSTexture::Format::Color, false))
{
const GSVector4 dRect(rt->m_texture->GetRect());
const GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
g_gs_device->StretchRect(rt->m_texture, sRect, temp_rt, dRect, ShaderConvert::RTA_DECORRECTION, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->Recycle(rt->m_texture);
rt->m_texture = temp_rt;
rt->m_rt_alpha_scale = false;
}
}
}
void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, int real_w, int real_h)
{
// This handles a case where you have two images stacked on top of one another (usually FMVs), and

View File

@ -216,6 +216,7 @@ public:
bool m_valid_alpha_low = false;
bool m_valid_alpha_high = false;
bool m_valid_rgb = false;
bool m_rt_alpha_scale = false;
bool m_is_frame = false;
bool m_used = false;
@ -239,6 +240,9 @@ public:
void ResizeValidity(const GSVector4i& rect);
void UpdateValidity(const GSVector4i& rect, bool can_resize = true);
void RTACorrect(Target* rt);
void RTADecorrect(Target* rt);
void Update();
/// Updates the target, if the dirty area intersects with the specified rectangle.

View File

@ -1105,6 +1105,8 @@ bool GSDeviceMTL::Create()
break;
case ShaderConvert::COPY:
case ShaderConvert::RGBA_TO_8I: // Yes really
case ShaderConvert::RTA_CORRECTION:
case ShaderConvert::RTA_DECORRECTION:
case ShaderConvert::TRANSPARENCY_FILTER:
case ShaderConvert::FLOAT32_TO_RGBA8:
case ShaderConvert::FLOAT32_TO_RGB8:
@ -1810,6 +1812,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW);
setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED);
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION);
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV);

View File

@ -182,6 +182,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_BLEND_HW,
GSMTLConstantIndex_PS_A_MASKED,
GSMTLConstantIndex_PS_HDR,
GSMTLConstantIndex_PS_RTA_CORRECTION,
GSMTLConstantIndex_PS_COLCLIP,
GSMTLConstantIndex_PS_BLEND_MIX,
GSMTLConstantIndex_PS_ROUND_INV,

View File

@ -104,6 +104,18 @@ fragment float4 ps_primid_init_datm1(float4 p [[position]], DirectReadTextureIn<
return tex.read(p).a < (127.5f / 255.f) ? -1 : FLT_MAX;
}
fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);
return float4(in.rgb, (in.a * 255.f) / 127.5f);
}
fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);
return float4(in.rgb, (in.a * 127.5f) / 255.f);
}
fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);

View File

@ -41,6 +41,7 @@ constant uint PS_BLEND_D [[function_constant(GSMTLConstantIndex_PS_BL
constant uint PS_BLEND_HW [[function_constant(GSMTLConstantIndex_PS_BLEND_HW)]];
constant bool PS_A_MASKED [[function_constant(GSMTLConstantIndex_PS_A_MASKED)]];
constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]];
constant bool PS_RTA_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_CORRECTION)]];
constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]];
constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
constant bool PS_ROUND_INV [[function_constant(GSMTLConstantIndex_PS_ROUND_INV)]];
@ -1130,7 +1131,8 @@ struct PSMain
ps_fbmask(C);
if (PS_COLOR0)
out.c0 = PS_HDR ? float4(C.rgb / 65535.f, C.a / 255.f) : C / 255.f;
out.c0.a = PS_RTA_CORRECTION ? C.a / 128.f : C.a / 255.f;
out.c0.rgb = PS_HDR ? float3(C.rgb / 65535.f) : C.rgb / 255.f;
if (PS_COLOR0 && PS_ONLY_ALPHA)
out.c0.rgb = 0;
if (PS_COLOR1)

View File

@ -1372,6 +1372,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask)
+ fmt::format("#define PS_HDR {}\n", sel.hdr)
+ fmt::format("#define PS_RTA_CORRECTION {}\n", sel.rta_correction)
+ fmt::format("#define PS_DITHER {}\n", sel.dither)
+ fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust)
+ fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp)

View File

@ -4787,6 +4787,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_WRITE_RG", sel.write_rg);
AddMacro(ss, "PS_FBMASK", sel.fbmask);
AddMacro(ss, "PS_HDR", sel.hdr);
AddMacro(ss, "PS_RTA_CORRECTION", sel.rta_correction);
AddMacro(ss, "PS_DITHER", sel.dither);
AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust);
AddMacro(ss, "PS_ZCLAMP", sel.zclamp);

View File

@ -3,4 +3,4 @@
/// Version number for GS and other shaders. Increment whenever any of the contents of the
/// shaders change, to invalidate the cache.
static constexpr u32 SHADER_CACHE_VERSION = 41;
static constexpr u32 SHADER_CACHE_VERSION = 42;