GS/HW: Rearrange color on shuffle if SW Blend or TFX

This commit is contained in:
refractionpcsx2 2024-04-02 21:26:50 +01:00
parent b1f4f67130
commit 30f4e77b31
6 changed files with 77 additions and 10 deletions

View File

@ -766,7 +766,7 @@ float4 ps_color(PS_INPUT input)
float4 T = sample_color(st, input.t.w); float4 T = sample_color(st, input.t.w);
#endif #endif
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{ {
uint4 denorm_c_before = uint4(T); uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ) if (PS_PROCESS_BA & SHUFFLE_READ)
@ -866,6 +866,25 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f; float4 RT = SW_BLEND_NEEDS_RT ? RtTexture.Load(int3(pos_xy, 0)) : (float4)0.0f;
if (PS_SHUFFLE && SW_BLEND_NEEDS_RT)
{
uint4 denorm_rt = uint4(RT);
if (PS_PROCESS_BA & SHUFFLE_WRITE)
{
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
}
else
{
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
}
}
float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f; float Ad = PS_RTA_CORRECTION ? trunc(RT.a * 128.0f + 0.1f) / 128.0f : trunc(RT.a * 255.0f + 0.1f) / 128.0f;
float3 Cd = trunc(RT.rgb * 255.0f + 0.1f); float3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
float3 Cs = Color.rgb; float3 Cs = Color.rgb;
@ -1037,7 +1056,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
if (PS_SHUFFLE) if (PS_SHUFFLE)
{ {
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{ {
uint4 denorm_c_after = uint4(C); uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ) if (PS_PROCESS_BA & SHUFFLE_READ)

View File

@ -686,7 +686,7 @@ vec4 ps_color()
vec4 T = sample_color(st); vec4 T = sample_color(st);
#endif #endif
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T); uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8); T.r = float((denorm_c_before.b << 3) & 0xF8);
@ -807,6 +807,21 @@ float As = As_rgba.a;
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif #endif
#if PS_SHUFFLE && SW_BLEND_NEEDS_RT
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
#else
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
#endif
#endif
// Let the compiler do its jobs ! // Let the compiler do its jobs !
vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f); vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
vec3 Cs = Color.rgb; vec3 Cs = Color.rgb;
@ -1024,7 +1039,7 @@ void ps_main()
#if PS_SHUFFLE #if PS_SHUFFLE
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C); uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));

View File

@ -953,7 +953,7 @@ vec4 ps_color()
vec4 T = sample_color(st); vec4 T = sample_color(st);
#endif #endif
#if SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) #if (SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_before = uvec4(T); uvec4 denorm_c_before = uvec4(T);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
T.r = float((denorm_c_before.b << 3) & 0xF8); T.r = float((denorm_c_before.b << 3) & 0xF8);
@ -1073,6 +1073,21 @@ void ps_blend(inout vec4 Color, inout vec4 As_rgba)
#else #else
float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f; float Ad = trunc(RT.a * 255.0f + 0.1f) / 128.0f;
#endif #endif
#if PS_SHUFFLE && PS_FEEDBACK_LOOP_IS_NEEDED
uvec4 denorm_rt = uvec4(RT);
#if (PS_PROCESS_BA & SHUFFLE_WRITE)
RT.r = float((denorm_rt.b << 3) & 0xF8);
RT.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
RT.b = float((denorm_rt.a << 1) & 0xF8);
RT.a = float(denorm_rt.a & 0x80);
#else
RT.r = float((denorm_rt.r << 3) & 0xF8);
RT.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
RT.b = float((denorm_rt.g << 1) & 0xF8);
RT.a = float(denorm_rt.g & 0x80);
#endif
#endif
// Let the compiler do its jobs ! // Let the compiler do its jobs !
vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f); vec3 Cd = trunc(RT.rgb * 255.0f + 0.1f);
@ -1289,7 +1304,7 @@ void main()
ps_blend(C, alpha_blend); ps_blend(C, alpha_blend);
#if PS_SHUFFLE #if PS_SHUFFLE
#if SW_BLEND && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE) #if (SW_BLEND || PS_TFX != 1) && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)
uvec4 denorm_c_after = uvec4(C); uvec4 denorm_c_after = uvec4(C);
#if (PS_PROCESS_BA & SHUFFLE_READ) #if (PS_PROCESS_BA & SHUFFLE_READ)
C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0)); C.b = float(((denorm_c_after.r >> 3) & 0x1F) | ((denorm_c_after.g << 2) & 0xE0));

View File

@ -4040,7 +4040,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
// Condition 1: Require full sw blend for full barrier. // Condition 1: Require full sw blend for full barrier.
// Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead. // Condition 2: One barrier is already enabled, prims don't overlap so let's use sw blend instead.
// Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend. // Condition 3: A shuffle is unlikely to overlap, so when a barrier is enabled like from fbmask we can prefer full sw blend.
const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && (no_prim_overlap || m_conf.ps.shuffle)); const bool prefer_sw_blend = (features.texture_barrier && m_conf.require_full_barrier) || (m_conf.require_one_barrier && no_prim_overlap) || m_conf.ps.shuffle;
const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb const bool free_blend = blend_non_recursive // Free sw blending, doesn't require barriers or reading fb
|| accumulation_blend; // Mix of hw/sw blending || accumulation_blend; // Mix of hw/sw blending

View File

@ -948,7 +948,6 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const bool is_depth, c
// FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ??? // FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ???
if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) if (t->m_age <= 1 && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{ {
pxAssert(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
dst = t; dst = t;
inside_target = false; inside_target = false;
break; break;

View File

@ -831,7 +831,7 @@ struct PSMain
else else
T = sample_color(st); T = sample_color(st);
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{ {
uint4 denorm_c_before = uint4(T); uint4 denorm_c_before = uint4(T);
if (PS_PROCESS_BA & SHUFFLE_READ) if (PS_PROCESS_BA & SHUFFLE_READ)
@ -936,6 +936,25 @@ struct PSMain
float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f; float Ad = PS_RTA_CORRECTION ? trunc(current_color.a * 128.1f) / 128.f : trunc(current_color.a * 255.1f) / 128.f;
if (PS_SHUFFLE && NEEDS_RT)
{
uint4 denorm_rt = uint4(current_color);
if (PS_PROCESS_BA & SHUFFLE_WRITE)
{
current_color.r = float((denorm_rt.b << 3) & 0xF8);
current_color.g = float(((denorm_rt.b >> 2) & 0x38) | ((denorm_rt.a << 6) & 0xC0));
current_color.b = float((denorm_rt.a << 1) & 0xF8);
current_color.a = float(denorm_rt.a & 0x80);
}
else
{
current_color.r = float((denorm_rt.r << 3) & 0xF8);
current_color.g = float(((denorm_rt.r >> 2) & 0x38) | ((denorm_rt.g << 6) & 0xC0));
current_color.b = float((denorm_rt.g << 1) & 0xF8);
current_color.a = float(denorm_rt.g & 0x80);
}
}
float3 Cd = trunc(current_color.rgb * 255.5f); float3 Cd = trunc(current_color.rgb * 255.5f);
float3 Cs = Color.rgb; float3 Cs = Color.rgb;
@ -1105,7 +1124,7 @@ struct PSMain
if (PS_SHUFFLE) if (PS_SHUFFLE)
{ {
if (SW_BLEND && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE)) if ((SW_BLEND || PS_TFX != 1) && PS_SHUFFLE && !PS_SHUFFLE_SAME && !PS_READ16_SRC && (PS_SHUFFLE_ACROSS || PS_PROCESS_BA == SHUFFLE_READWRITE || PS_PROCESS_RG == SHUFFLE_READWRITE))
{ {
uint4 denorm_c_after = uint4(C); uint4 denorm_c_after = uint4(C);
if (PS_PROCESS_BA & SHUFFLE_READ) if (PS_PROCESS_BA & SHUFFLE_READ)