GS/HW: Allow source is rt to use corrected alpha in most cases.

This commit is contained in:
refractionpcsx2 2024-03-03 12:43:05 +00:00 committed by lightningterror
parent 886a368297
commit bfef8397d6
18 changed files with 177 additions and 81 deletions

View File

@ -139,7 +139,7 @@ PS_OUTPUT ps_rta_correction(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
output.c = float4(value.rgb, value.a / (128.5f / 255.0f));
return output;
}
@ -147,7 +147,7 @@ PS_OUTPUT ps_rta_decorrection(PS_INPUT input)
{
PS_OUTPUT output;
float4 value = sample_c(input.t);
output.c = float4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
output.c = float4(value.rgb, value.a * (128.5f / 255.0f));
return output;
}

View File

@ -51,6 +51,7 @@
#define PS_URBAN_CHAOS_HLE 0
#define PS_HDR 0
#define PS_RTA_CORRECTION 0
#define PS_RTA_SRC_CORRECTION 0
#define PS_COLCLIP 0
#define PS_BLEND_A 0
#define PS_BLEND_B 0
@ -330,7 +331,16 @@ uint4 sample_4_index(float4 uv, float uv_w)
c.w = sample_c(uv.zw, uv_w).a;
// Denormalize value
uint4 i = uint4(c * 255.5f);
uint4 i;
if (PS_RTA_SRC_CORRECTION)
{
i = uint4(c * 128.25f); // Denormalize value
}
else
{
i = uint4(c * 255.5f); // Denormalize value
}
if (PS_PAL_FMT == 1)
{
@ -650,6 +660,9 @@ float4 sample_color(float2 st, float uv_w)
t = c[0];
}
if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION)
t.a = t.a * (128.5f / 255.0f);
return trunc(t * 255.0f + 0.05f);
}
@ -850,7 +863,7 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f;
float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 127.5f + 0.05f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
float3 Cs = Color.rgb;
@ -968,7 +981,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
float4 alpha_blend = (float4)0.0f;
if (SW_AD_TO_HW)
{
float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 127.5f + 0.05f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
float4 RT = PS_RTA_CORRECTION ? trunc(RtTexture.Load(int3(input.p.xy, 0)) * 128.0f + 0.1f) : trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f);
alpha_blend = (float4)(RT.a / 128.0f);
}
else

View File

@ -337,7 +337,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
SV_Target0 = vec4(value.rgb, value.a / (128.5f / 255.0f));
}
#endif
@ -345,7 +345,7 @@ void ps_rta_correction()
void ps_rta_decorrection()
{
vec4 value = sample_c();
SV_Target0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
SV_Target0 = vec4(value.rgb, value.a * (128.5f / 255.0f));
}
#endif

View File

@ -281,8 +281,12 @@ uvec4 sample_4_index(vec4 uv)
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
#if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f); // Denormalize value
#else
uvec4 i = uvec4(c * 255.5f); // Denormalize value
#endif
#if PS_PAL_FMT == 1
// 4HL
@ -591,6 +595,10 @@ vec4 sample_color(vec2 st)
t = c[0];
#endif
#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION
t.a = t.a * (128.5f / 255.0f);
#endif
// The 0.05f helps to fix the overbloom of sotc
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
// interpolation could be slightly below the correct one.
@ -803,7 +811,7 @@ float As = As_rgba.a;
#endif
#if PS_RTA_CORRECTION
float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f;
float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f;
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
@ -984,7 +992,7 @@ void ps_main()
#if SW_AD_TO_HW
#if PS_RTA_CORRECTION
vec4 RT = trunc(fetch_rt() * 127.5f + 0.05f);
vec4 RT = trunc(fetch_rt() * 128.0f + 0.1f);
#else
vec4 RT = trunc(fetch_rt() * 255.0f + 0.1f);
#endif

View File

@ -114,7 +114,7 @@ void ps_datm0_rta_correction()
void ps_rta_correction()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 255.0f + 0.1f) / 127.5f);
o_col0 = vec4(value.rgb, value.a / (128.5f / 255.0f));
}
#endif
@ -122,7 +122,7 @@ void ps_rta_correction()
void ps_rta_decorrection()
{
vec4 value = sample_c(v_tex);
o_col0 = vec4(value.rgb, (value.a * 127.5f + 0.1f) / 255.0f);
o_col0 = vec4(value.rgb, value.a * (128.5f / 255.0f));
}
#endif

View File

@ -523,7 +523,12 @@ uvec4 sample_4_index(vec4 uv)
c.w = sample_c(uv.zw).a;
// Denormalize value
#if PS_RTA_SRC_CORRECTION
uvec4 i = uvec4(c * 128.25f);
#else
uvec4 i = uvec4(c * 255.5f);
#endif
#if PS_PAL_FMT == 1
// 4HL
@ -835,7 +840,9 @@ vec4 sample_color(vec2 st)
t = c[0];
}
#endif
#if PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION
t.a = t.a * (128.5f / 255.0f);
#endif
return trunc(t * 255.0f + 0.05f);
}
@ -1056,7 +1063,7 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#endif
#if PS_RTA_CORRECTION
float Ad = trunc(RT.a * 127.5f + 0.05f) / 128.0f;
float Ad = trunc(RT.a * 128.0f + 0.1f) / 128.0f;
#else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif
@ -1235,7 +1242,7 @@ void main()
#if SW_AD_TO_HW
#if PS_RTA_CORRECTION
vec4 RT = trunc(sample_from_rt() * 127.5f + 0.05f);
vec4 RT = trunc(sample_from_rt() * 128.0f + 0.1f);
#else
vec4 RT = trunc(sample_from_rt() * 255.0f + 0.1f);
#endif

View File

@ -331,6 +331,7 @@ struct alignas(16) GSHWDrawConfig
u32 a_masked : 1;
u32 hdr : 1;
u32 rta_correction : 1;
u32 rta_source_correction : 1;
u32 colclip : 1;
u32 blend_mix : 2;
u32 round_inv : 1; // Blending will invert the value, so rounding needs to go the other way

View File

@ -1689,6 +1689,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);

View File

@ -2817,6 +2817,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_PAL_FMT", sel.pal_fmt);
sm.AddMacro("PS_HDR", sel.hdr);
sm.AddMacro("PS_RTA_CORRECTION", sel.rta_correction);
sm.AddMacro("PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
sm.AddMacro("PS_COLCLIP", sel.colclip);
sm.AddMacro("PS_BLEND_A", sel.blend_a);
sm.AddMacro("PS_BLEND_B", sel.blend_b);

View File

@ -4397,6 +4397,9 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
float scale = tex->GetScale();
HandleTextureHazards(rt, ds, tex, tmm, source_region, target_region, unscaled_size, scale, src_copy);
if (tex->m_target && tex->m_from_target && tex->m_target_direct && tex->m_from_target->m_rt_alpha_scale)
m_conf.ps.rta_source_correction = 1;
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
//const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM];
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
@ -5304,7 +5307,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
ds->m_alpha_min &= 128;
}
}
// If we Correct/Decorrect and tex is rt, we will need to update the texture reference
const bool req_src_update = tex && rt && tex->m_target && tex->m_target_direct && tex->m_texture == rt->m_texture;
if (rt)
{
const bool rta_decorrection = m_channel_shuffle || m_texture_shuffle || std::max(blend_alpha_max, rt->m_alpha_max) > 128 || m_conf.ps.fbmask || m_conf.ps.tex_is_fb;
@ -5317,6 +5323,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
if (req_src_update)
tex->m_texture = rt->m_texture;
}
else if (m_conf.colormask.wa)
{
@ -5328,6 +5337,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
if (req_src_update)
tex->m_texture = rt->m_texture;
}
}
}
@ -5337,6 +5349,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
if (req_src_update)
tex->m_texture = rt->m_texture;
}
}
else if (rt->m_last_draw == s_n)
@ -5347,6 +5362,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
{
rt->RTADecorrect(rt);
m_conf.rt = rt->m_texture;
if (req_src_update)
tex->m_texture = rt->m_texture;
}
}
@ -5357,6 +5375,9 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
if ((!IsOpaque() || m_context->ALPHA.IsBlack()) && rt && ((m_conf.colormask.wrgba & 0x7) || (m_texture_shuffle && !m_copy_16bit_to_target_shuffle && !m_same_group_texture_shuffle)))
{
EmulateBlending(blend_alpha_min, blend_alpha_max, DATE_PRIMID, DATE_BARRIER, blending_alpha_pass, rt);
if (req_src_update && tex->m_texture != rt->m_texture)
tex->m_texture = rt->m_texture;
}
else
{

View File

@ -1690,9 +1690,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const
}
#endif
if (dst && (GSUtil::GetChannelMask(TEX0.PSM) & 0x8))
dst->RTADecorrect(dst);
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region);
if (!src) [[unlikely]]
return nullptr;
@ -4299,7 +4296,16 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// copy the rt in
const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy()));
if (!area.rempty())
g_gs_device->CopyRect(sTex, dTex, area, 0, 0);
{
if (dst->m_rt_alpha_scale)
{
const GSVector4 sRectF = GSVector4(area) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(area), ShaderConvert::RTA_DECORRECTION, false);
}
else
g_gs_device->CopyRect(sTex, dTex, area, 0, 0);
}
src->m_texture = dTex;
src->m_unscaled_size = GSVector2i(tw, th);
@ -4317,6 +4323,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_region.SetY(y_offset, region.GetMaxY() + y_offset);
else
src->m_region.SetY(y_offset, y_offset + th);
src->m_target_direct = true;
src->m_texture = dst->m_texture;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
@ -4554,6 +4562,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{
// sample the target directly
src->m_texture = dst->m_texture;
src->m_target_direct = true;
src->m_scale = dst->m_scale;
src->m_unscaled_size = dst->m_unscaled_size;
src->m_shared_texture = true;
@ -4600,7 +4609,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
if (use_texture)
{
g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY);
if (dst->m_rt_alpha_scale)
{
const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(destX, destY, sRect.width(), sRect.height()), ShaderConvert::RTA_DECORRECTION, false);
}
else
g_gs_device->CopyRect(sTex, dTex, sRect, destX, destY);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
#ifdef PCSX2_DEVBUILD
@ -4615,12 +4631,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{
if (is_8bits)
{
if (dst->m_rt_alpha_scale)
{
dst->RTADecorrect(dst);
sTex = dst->m_texture;
}
g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset,
std::max<u32>(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex,
std::max<u32>(TEX0.TBW, 1u) * 64, TEX0.PSM);
}
else
{
if (dst->m_rt_alpha_scale && shader == ShaderConvert::COPY)
shader = ShaderConvert::RTA_DECORRECTION;
const GSVector4 sRectF = GSVector4(sRect) / GSVector4(1, 1, sTex->GetWidth(), sTex->GetHeight());
g_gs_device->StretchRect(
sTex, sRectF, dTex, GSVector4(destX, destY, new_size.x, new_size.y), shader, false);

View File

@ -283,6 +283,7 @@ public:
u8 m_valid_hashes = 0;
u8 m_complete_layers = 0;
bool m_target = false;
bool m_target_direct = false;
bool m_repeating = false;
std::pair<u8, u8> m_alpha_minmax = {0u, 255u};
std::vector<GSVector2i>* m_p2t = nullptr;

View File

@ -1789,60 +1789,61 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
}
else
{
setFnConstantB(m_fn_constants, pssel.fst, GSMTLConstantIndex_FST);
setFnConstantB(m_fn_constants, pssel.iip, GSMTLConstantIndex_IIP);
setFnConstantI(m_fn_constants, pssel.aem_fmt, GSMTLConstantIndex_PS_AEM_FMT);
setFnConstantI(m_fn_constants, pssel.pal_fmt, GSMTLConstantIndex_PS_PAL_FMT);
setFnConstantI(m_fn_constants, pssel.dst_fmt, GSMTLConstantIndex_PS_DST_FMT);
setFnConstantI(m_fn_constants, pssel.depth_fmt, GSMTLConstantIndex_PS_DEPTH_FMT);
setFnConstantB(m_fn_constants, pssel.aem, GSMTLConstantIndex_PS_AEM);
setFnConstantB(m_fn_constants, pssel.fba, GSMTLConstantIndex_PS_FBA);
setFnConstantB(m_fn_constants, pssel.fog, GSMTLConstantIndex_PS_FOG);
setFnConstantI(m_fn_constants, pssel.date, GSMTLConstantIndex_PS_DATE);
setFnConstantI(m_fn_constants, pssel.atst, GSMTLConstantIndex_PS_ATST);
setFnConstantI(m_fn_constants, pssel.tfx, GSMTLConstantIndex_PS_TFX);
setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC);
setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS);
setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT);
setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS);
setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT);
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK);
setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A);
setFnConstantI(m_fn_constants, pssel.blend_b, GSMTLConstantIndex_PS_BLEND_B);
setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C);
setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D);
setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW);
setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED);
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION);
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV);
setFnConstantB(m_fn_constants, pssel.fixed_one_a, GSMTLConstantIndex_PS_FIXED_ONE_A);
setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE);
setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR);
setFnConstantB(m_fn_constants, pssel.no_color1, GSMTLConstantIndex_PS_NO_COLOR1);
setFnConstantB(m_fn_constants, pssel.fst, GSMTLConstantIndex_FST);
setFnConstantB(m_fn_constants, pssel.iip, GSMTLConstantIndex_IIP);
setFnConstantI(m_fn_constants, pssel.aem_fmt, GSMTLConstantIndex_PS_AEM_FMT);
setFnConstantI(m_fn_constants, pssel.pal_fmt, GSMTLConstantIndex_PS_PAL_FMT);
setFnConstantI(m_fn_constants, pssel.dst_fmt, GSMTLConstantIndex_PS_DST_FMT);
setFnConstantI(m_fn_constants, pssel.depth_fmt, GSMTLConstantIndex_PS_DEPTH_FMT);
setFnConstantB(m_fn_constants, pssel.aem, GSMTLConstantIndex_PS_AEM);
setFnConstantB(m_fn_constants, pssel.fba, GSMTLConstantIndex_PS_FBA);
setFnConstantB(m_fn_constants, pssel.fog, GSMTLConstantIndex_PS_FOG);
setFnConstantI(m_fn_constants, pssel.date, GSMTLConstantIndex_PS_DATE);
setFnConstantI(m_fn_constants, pssel.atst, GSMTLConstantIndex_PS_ATST);
setFnConstantI(m_fn_constants, pssel.tfx, GSMTLConstantIndex_PS_TFX);
setFnConstantB(m_fn_constants, pssel.tcc, GSMTLConstantIndex_PS_TCC);
setFnConstantI(m_fn_constants, pssel.wms, GSMTLConstantIndex_PS_WMS);
setFnConstantI(m_fn_constants, pssel.wmt, GSMTLConstantIndex_PS_WMT);
setFnConstantB(m_fn_constants, pssel.adjs, GSMTLConstantIndex_PS_ADJS);
setFnConstantB(m_fn_constants, pssel.adjt, GSMTLConstantIndex_PS_ADJT);
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
setFnConstantB(m_fn_constants, pssel.shuffle_same, GSMTLConstantIndex_PS_SHUFFLE_SAME);
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK);
setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A);
setFnConstantI(m_fn_constants, pssel.blend_b, GSMTLConstantIndex_PS_BLEND_B);
setFnConstantI(m_fn_constants, pssel.blend_c, GSMTLConstantIndex_PS_BLEND_C);
setFnConstantI(m_fn_constants, pssel.blend_d, GSMTLConstantIndex_PS_BLEND_D);
setFnConstantI(m_fn_constants, pssel.blend_hw, GSMTLConstantIndex_PS_BLEND_HW);
setFnConstantB(m_fn_constants, pssel.a_masked, GSMTLConstantIndex_PS_A_MASKED);
setFnConstantB(m_fn_constants, pssel.hdr, GSMTLConstantIndex_PS_HDR);
setFnConstantB(m_fn_constants, pssel.rta_correction, GSMTLConstantIndex_PS_RTA_CORRECTION);
setFnConstantB(m_fn_constants, pssel.rta_source_correction, GSMTLConstantIndex_PS_RTA_SRC_CORRECTION);
setFnConstantB(m_fn_constants, pssel.colclip, GSMTLConstantIndex_PS_COLCLIP);
setFnConstantI(m_fn_constants, pssel.blend_mix, GSMTLConstantIndex_PS_BLEND_MIX);
setFnConstantB(m_fn_constants, pssel.round_inv, GSMTLConstantIndex_PS_ROUND_INV);
setFnConstantB(m_fn_constants, pssel.fixed_one_a, GSMTLConstantIndex_PS_FIXED_ONE_A);
setFnConstantB(m_fn_constants, pssel.pabe, GSMTLConstantIndex_PS_PABE);
setFnConstantB(m_fn_constants, pssel.no_color, GSMTLConstantIndex_PS_NO_COLOR);
setFnConstantB(m_fn_constants, pssel.no_color1, GSMTLConstantIndex_PS_NO_COLOR1);
// no_ablend ignored for now (No Metal driver has had DSB so broken that it's needed to be disabled, though Intel's was pretty close)
setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA);
setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL);
setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER);
setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST);
setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP);
setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK);
setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE);
setFnConstantB(m_fn_constants, pssel.tales_of_abyss_hle, GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE);
setFnConstantB(m_fn_constants, pssel.tex_is_fb, GSMTLConstantIndex_PS_TEX_IS_FB);
setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD);
setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD);
setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER);
setFnConstantB(m_fn_constants, pssel.region_rect, GSMTLConstantIndex_PS_REGION_RECT);
setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK);
setFnConstantB(m_fn_constants, pssel.only_alpha, GSMTLConstantIndex_PS_ONLY_ALPHA);
setFnConstantI(m_fn_constants, pssel.channel, GSMTLConstantIndex_PS_CHANNEL);
setFnConstantI(m_fn_constants, pssel.dither, GSMTLConstantIndex_PS_DITHER);
setFnConstantI(m_fn_constants, pssel.dither_adjust, GSMTLConstantIndex_PS_DITHER_ADJUST);
setFnConstantB(m_fn_constants, pssel.zclamp, GSMTLConstantIndex_PS_ZCLAMP);
setFnConstantB(m_fn_constants, pssel.tcoffsethack, GSMTLConstantIndex_PS_TCOFFSETHACK);
setFnConstantB(m_fn_constants, pssel.urban_chaos_hle, GSMTLConstantIndex_PS_URBAN_CHAOS_HLE);
setFnConstantB(m_fn_constants, pssel.tales_of_abyss_hle, GSMTLConstantIndex_PS_TALES_OF_ABYSS_HLE);
setFnConstantB(m_fn_constants, pssel.tex_is_fb, GSMTLConstantIndex_PS_TEX_IS_FB);
setFnConstantB(m_fn_constants, pssel.automatic_lod, GSMTLConstantIndex_PS_AUTOMATIC_LOD);
setFnConstantB(m_fn_constants, pssel.manual_lod, GSMTLConstantIndex_PS_MANUAL_LOD);
setFnConstantB(m_fn_constants, pssel.point_sampler, GSMTLConstantIndex_PS_POINT_SAMPLER);
setFnConstantB(m_fn_constants, pssel.region_rect, GSMTLConstantIndex_PS_REGION_RECT);
setFnConstantI(m_fn_constants, pssel.scanmsk, GSMTLConstantIndex_PS_SCANMSK);
auto newps = LoadShader(@"ps_main");
ps = newps;
m_hw_ps.insert(std::make_pair(pssel, std::move(newps)));

View File

@ -183,6 +183,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_A_MASKED,
GSMTLConstantIndex_PS_HDR,
GSMTLConstantIndex_PS_RTA_CORRECTION,
GSMTLConstantIndex_PS_RTA_SRC_CORRECTION,
GSMTLConstantIndex_PS_COLCLIP,
GSMTLConstantIndex_PS_BLEND_MIX,
GSMTLConstantIndex_PS_ROUND_INV,

View File

@ -129,13 +129,13 @@ fragment float4 ps_primid_rta_init_datm0(float4 p [[position]], DirectReadTextur
fragment float4 ps_rta_correction(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);
return float4(in.rgb, (in.a * 255.f + 0.1f) / 127.5f);
return float4(in.rgb, in.a / (128.5f / 255.0f));
}
fragment float4 ps_rta_decorrection(float4 p [[position]], DirectReadTextureIn<float> tex)
{
float4 in = tex.read(p);
return float4(in.rgb, (in.a * 127.5f + 0.1f) / 255.f);
return float4(in.rgb, in.a * (128.5f / 255.0f));
}
fragment float4 ps_hdr_init(float4 p [[position]], DirectReadTextureIn<float> tex)

View File

@ -42,6 +42,7 @@ constant uint PS_BLEND_HW [[function_constant(GSMTLConstantIndex_PS_BL
constant bool PS_A_MASKED [[function_constant(GSMTLConstantIndex_PS_A_MASKED)]];
constant bool PS_HDR [[function_constant(GSMTLConstantIndex_PS_HDR)]];
constant bool PS_RTA_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_CORRECTION)]];
constant bool PS_RTA_SRC_CORRECTION [[function_constant(GSMTLConstantIndex_PS_RTA_SRC_CORRECTION)]];
constant bool PS_COLCLIP [[function_constant(GSMTLConstantIndex_PS_COLCLIP)]];
constant uint PS_BLEND_MIX [[function_constant(GSMTLConstantIndex_PS_BLEND_MIX)]];
constant bool PS_ROUND_INV [[function_constant(GSMTLConstantIndex_PS_ROUND_INV)]];
@ -480,9 +481,18 @@ struct PSMain
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a;
uint4 i = uint4(c * 255.5f); // Denormalize value
uint4 i;
if (PS_RTA_SRC_CORRECTION)
{
i = uint4(c * 128.25f); // Denormalize value
}
else
{
i = uint4(c * 255.5f); // Denormalize value
}
if (PS_PAL_FMT == 1)
return i & 0xF;
if (PS_PAL_FMT == 2)
@ -714,9 +724,13 @@ struct PSMain
else
t = c[0];
if (PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_RTA_SRC_CORRECTION)
t.a = t.a * (128.5f / 255.0f);
// The 0.05f helps to fix the overbloom of sotc
// I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)
// interpolation could be slightly below the correct one.
return trunc(t * 255.f + 0.05f);
}
@ -915,7 +929,7 @@ struct PSMain
return;
}
float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 127.75f) / 128.f : trunc(current_color.a * 255.5f) / 128.f;
float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f;
float3 Cd = trunc(current_color.rgb * 255.5f);
float3 Cs = Color.rgb;
@ -1047,7 +1061,7 @@ struct PSMain
float4 alpha_blend = float4(0.f);
if (SW_AD_TO_HW)
{
alpha_blend = PS_RTA_CORRECTION ? float4(trunc(current_color.a * 127.75f) / 128.f) : float4(trunc(current_color.a * 255.5f) / 128.f);
alpha_blend = PS_RTA_CORRECTION ? float4(trunc(current_color.a * 128.f) / 128.f) : float4(trunc(current_color.a * 255.5f) / 128.f);
}
else
{

View File

@ -1373,6 +1373,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask)
+ fmt::format("#define PS_HDR {}\n", sel.hdr)
+ fmt::format("#define PS_RTA_CORRECTION {}\n", sel.rta_correction)
+ fmt::format("#define PS_RTA_SRC_CORRECTION {}\n", sel.rta_source_correction)
+ fmt::format("#define PS_DITHER {}\n", sel.dither)
+ fmt::format("#define PS_DITHER_ADJUST {}\n", sel.dither_adjust)
+ fmt::format("#define PS_ZCLAMP {}\n", sel.zclamp)

View File

@ -4791,6 +4791,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_FBMASK", sel.fbmask);
AddMacro(ss, "PS_HDR", sel.hdr);
AddMacro(ss, "PS_RTA_CORRECTION", sel.rta_correction);
AddMacro(ss, "PS_RTA_SRC_CORRECTION", sel.rta_source_correction);
AddMacro(ss, "PS_DITHER", sel.dither);
AddMacro(ss, "PS_DITHER_ADJUST", sel.dither_adjust);
AddMacro(ss, "PS_ZCLAMP", sel.zclamp);