GS-HW: Fix real 16bit value shuffles

This commit is contained in:
refractionpcsx2 2023-02-16 10:25:46 +00:00
parent 03f0f2f803
commit 925e874ada
13 changed files with 87 additions and 40 deletions

View File

@ -38,6 +38,7 @@
#define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0
#define PS_READ_BA 0
#define PS_READ16_SRC 0
#define PS_DFMT 0
#define PS_DEPTH_FMT 0
#define PS_PAL_FMT 0
@ -878,6 +879,16 @@ PS_OUTPUT ps_main(PS_INPUT input)
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f);
if (PS_READ16_SRC)
{
C.rb = (float2)float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5));
if (denorm_c.a & 0x80u)
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u));
else
C.ga = (float2)float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u));
}
else
{
// Mask will take care of the correct destination
if (PS_READ_BA)
C.rb = C.bb;
@ -899,6 +910,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
}
}
}
// Must be done before alpha correction

View File

@ -871,7 +871,13 @@ void ps_main()
#if PS_SHUFFLE
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
#if PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if (bool(denorm_c.a & 0x80u))
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#else
// Write RB part. Mask will take care of the correct destination
#if PS_READ_BA
C.rb = C.bb;
@ -907,9 +913,10 @@ void ps_main()
// float sel = step(128.0f, c.g);
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
#endif
#endif // PS_READ_BA
#endif
#endif // READ16_SRC
#endif // PS_SHUFFLE
// Must be done before alpha correction

View File

@ -328,6 +328,7 @@ void main()
#define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0
#define PS_READ_BA 0
#define PS_READ16_SRC 0
#define PS_DFMT 0
#define PS_DEPTH_FMT 0
#define PS_PAL_FMT 0
@ -1175,7 +1176,13 @@ void main()
#if PS_SHUFFLE
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
#if PS_READ16_SRC
C.rb = vec2(float((denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5)));
if ((denorm_c.a & 0x80u) != 0u)
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80u)));
else
C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u)));
#else
// Mask will take care of the correct destination
#if PS_READ_BA
C.rb = C.bb;
@ -1195,6 +1202,7 @@ void main()
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
#endif
#endif
#endif
// Must be done before alpha correction

View File

@ -314,6 +314,7 @@ struct alignas(16) GSHWDrawConfig
u32 ltf : 1;
// Shuffle and fbmask effect
u32 shuffle : 1;
u32 real16src: 1;
u32 read_ba : 1;
u32 write_rg : 1;
u32 fbmask : 1;

View File

@ -34,6 +34,7 @@ private:
protected:
GSVector2i m_real_size{0, 0};
bool m_texture_shuffle = false;
bool m_copy_16bit_to_target_shuffle = false;
virtual GSTexture* GetOutput(int i, int& y_offset) = 0;
virtual GSTexture* GetFeedbackOutput() { return nullptr; }

View File

@ -160,6 +160,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler);
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
sm.AddMacro("PS_READ_BA", sel.read_ba);
sm.AddMacro("PS_READ16_SRC", sel.real16src);
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);

View File

@ -1501,6 +1501,7 @@ const ID3DBlob* GSDevice12::GetTFXPixelShader(const GSHWDrawConfig::PSSelector&
sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler);
sm.AddMacro("PS_SHUFFLE", sel.shuffle);
sm.AddMacro("PS_READ_BA", sel.read_ba);
sm.AddMacro("PS_READ16_SRC", sel.real16src);
sm.AddMacro("PS_CHANNEL_FETCH", sel.channel);
sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle);
sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle);

View File

@ -1395,6 +1395,7 @@ void GSRendererHW::Draw()
m_src = nullptr;
m_texture_shuffle = false;
m_copy_16bit_to_target_shuffle = false;
m_tex_is_fb = false;
// The rectangle of the draw
@ -1579,12 +1580,11 @@ void GSRendererHW::Draw()
{
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
bool copy_16bit_to_target_shuffle = false;
if (rt)
{
// copy of a 16bit source in to this target, make sure it's opaque and not bilinear to reduce false positives.
copy_16bit_to_target_shuffle = context->TEX0.TBP0 != context->FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque() && !(context->TEX1.MMIN & 1);
m_copy_16bit_to_target_shuffle = context->TEX0.TBP0 != context->FRAME.Block() && rt->m_32_bits_fmt == true && IsOpaque()
&& !(context->TEX1.MMIN & 1) && !m_src->m_32_bits_fmt && context->FRAME.FBMSK;
}
// Hypothesis: texture shuffle is used as a postprocessing effect so texture will be an old target.
@ -1592,7 +1592,7 @@ void GSRendererHW::Draw()
//
// Both input and output are 16 bits and texture was initially 32 bits!
m_texture_shuffle = (GSLocalMemory::m_psm[context->FRAME.PSM].bpp == 16) && (tex_psm.bpp == 16)
&& draw_sprite_tex && (m_src->m_32_bits_fmt || copy_16bit_to_target_shuffle);
&& draw_sprite_tex && (m_src->m_32_bits_fmt || m_copy_16bit_to_target_shuffle);
// Okami mustn't call this code
if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_context->FRAME.FBMSK & fm_mask) == 0))
@ -2208,7 +2208,7 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask()
m_conf.ps.write_rg = !write_ba && features.texture_barrier && m_context->TEST.DATE;
m_conf.ps.read_ba = read_ba;
m_conf.ps.real16src = m_copy_16bit_to_target_shuffle;
// Please bang my head against the wall!
// 1/ Reduce the frame mask to a 16 bit format
const u32 m = m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;

View File

@ -1379,6 +1379,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
setFnConstantB(m_fn_constants, pssel.ltf, GSMTLConstantIndex_PS_LTF);
setFnConstantB(m_fn_constants, pssel.shuffle, GSMTLConstantIndex_PS_SHUFFLE);
setFnConstantB(m_fn_constants, pssel.read_ba, GSMTLConstantIndex_PS_READ_BA);
setFnConstantB(m_fn_constants, pssel.real16src, GSMTLConstantIndex_PS_READ16_SRC);
setFnConstantB(m_fn_constants, pssel.write_rg, GSMTLConstantIndex_PS_WRITE_RG);
setFnConstantB(m_fn_constants, pssel.fbmask, GSMTLConstantIndex_PS_FBMASK);
setFnConstantI(m_fn_constants, pssel.blend_a, GSMTLConstantIndex_PS_BLEND_A);

View File

@ -175,6 +175,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_PS_LTF,
GSMTLConstantIndex_PS_SHUFFLE,
GSMTLConstantIndex_PS_READ_BA,
GSMTLConstantIndex_PS_READ16_SRC,
GSMTLConstantIndex_PS_WRITE_RG,
GSMTLConstantIndex_PS_FBMASK,
GSMTLConstantIndex_PS_BLEND_A,

View File

@ -42,6 +42,7 @@ constant bool PS_ADJT [[function_constant(GSMTLConstantIndex_PS_AD
constant bool PS_LTF [[function_constant(GSMTLConstantIndex_PS_LTF)]];
constant bool PS_SHUFFLE [[function_constant(GSMTLConstantIndex_PS_SHUFFLE)]];
constant bool PS_READ_BA [[function_constant(GSMTLConstantIndex_PS_READ_BA)]];
constant bool PS_READ16_SRC [[function_constant(GSMTLConstantIndex_PS_READ16_SRC)]];
constant bool PS_WRITE_RG [[function_constant(GSMTLConstantIndex_PS_WRITE_RG)]];
constant bool PS_FBMASK [[function_constant(GSMTLConstantIndex_PS_FBMASK)]];
constant uint PS_BLEND_A [[function_constant(GSMTLConstantIndex_PS_BLEND_A)]];
@ -950,12 +951,23 @@ struct PSMain
uint4 denorm_c = uint4(C);
uint2 denorm_TA = uint2(cb.ta * 255.5f);
if (PS_READ16_SRC)
{
C.rb = (denorm_c.r >> 3) | (((denorm_c.g >> 3) & 0x7u) << 5);
if (denorm_c.a & 0x80)
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.y & 0x80);
else
C.ga = (denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80);
}
else
{
C.rb = PS_READ_BA ? C.bb : C.rr;
if (PS_READ_BA)
C.ga = (denorm_c.a & 0x7F) | (denorm_c.a & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
else
C.ga = (denorm_c.g & 0x7F) | (denorm_c.g & 0x80 ? denorm_TA.y & 0x80 : denorm_TA.x & 0x80);
}
}
// Must be done before alpha correction

View File

@ -1060,6 +1060,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_IIP {}\n", sel.iip)
+ fmt::format("#define PS_SHUFFLE {}\n", sel.shuffle)
+ fmt::format("#define PS_READ_BA {}\n", sel.read_ba)
+ fmt::format("#define PS_READ16_SRC {}\n", sel.real16src)
+ fmt::format("#define PS_WRITE_RG {}\n", sel.write_rg)
+ fmt::format("#define PS_FBMASK {}\n", sel.fbmask)
+ fmt::format("#define PS_HDR {}\n", sel.hdr)

View File

@ -1983,6 +1983,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector
AddMacro(ss, "PS_IIP", sel.iip);
AddMacro(ss, "PS_SHUFFLE", sel.shuffle);
AddMacro(ss, "PS_READ_BA", sel.read_ba);
AddMacro(ss, "PS_READ16_SRC", sel.real16src);
AddMacro(ss, "PS_WRITE_RG", sel.write_rg);
AddMacro(ss, "PS_FBMASK", sel.fbmask);
AddMacro(ss, "PS_HDR", sel.hdr);