diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index ab15f78c95..469baa2fdd 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -38,6 +38,7 @@ #define PS_POINT_SAMPLER 0 #define PS_SHUFFLE 0 #define PS_READ_BA 0 +#define PS_SWAP_GA 0 #define PS_DFMT 0 #define PS_DEPTH_FMT 0 #define PS_PAL_FMT 0 @@ -875,28 +876,37 @@ PS_OUTPUT ps_main(PS_INPUT input) if (PS_SHUFFLE) { - uint4 denorm_c = uint4(C); - uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); - - // Mask will take care of the correct destination - if (PS_READ_BA) - C.rb = C.bb; - else - C.rb = C.rr; - - if (PS_READ_BA) + if(PS_SWAP_GA) { - if (denorm_c.a & 0x80u) - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + float4 RT = trunc(RtTexture.Load(int3(input.p.xy, 0)) * 255.0f + 0.1f); + C.a = C.g; + C.g = C.a; } else { - if (denorm_c.g & 0x80u) - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + uint4 denorm_c = uint4(C); + uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); + + // Mask will take care of the correct destination + if (PS_READ_BA) + C.rb = C.bb; else - C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + C.rb = C.rr; + + if (PS_READ_BA) + { + if (denorm_c.a & 0x80u) + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = (float2)(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + } + else + { + if (denorm_c.g & 0x80u) + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = (float2)(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + } } } diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index bdb336f0eb..b0254d72ee 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -347,6 +347,7 @@ void main() #define PS_ZCLAMP 0 #define PS_FEEDBACK_LOOP 0 #define PS_TEX_IS_FB 0 +#define PS_SWAP_GA 0 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) @@ -1173,26 +1174,32 @@ void main() vec4 C = ps_color(); #if PS_SHUFFLE - uvec4 denorm_c = uvec4(C); - uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); - - // Mask will take care of the correct destination - #if PS_READ_BA - C.rb = C.bb; + #if PS_SWAP_GA + vec4 RT = trunc(subpassLoad(RtSampler) * 255.0f + 0.1f); + C.a = RT.g; + C.g = RT.a; #else - C.rb = C.rr; - #endif + uvec4 denorm_c = uvec4(C); + uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); - #if PS_READ_BA - if ((denorm_c.a & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); - #else - if ((denorm_c.g & 0x80u) != 0u) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + // Mask will take care of the correct destination + #if PS_READ_BA + C.rb = C.bb; + #else + C.rb = C.rr; + #endif + + #if PS_READ_BA + if ((denorm_c.a & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + #else + if ((denorm_c.g & 0x80u) != 0u) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + #endif #endif #endif diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 755da42574..c0902d3a99 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3517,6 +3517,13 @@ __forceinline void GSState::VertexKick(u32 skip) } CLUTAutoFlush(prim); + + if (GSLocalMemory::m_psm[m_prev_env.CTXT[m_prev_env.PRIM.CTXT].FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM].bpp == 16 + && m_prev_env.CTXT[m_prev_env.PRIM.CTXT].FRAME.FBMSK == 0x3FFF && m_prev_env.CTXT[m_prev_env.PRIM.CTXT].ALPHA.IsOpaque()) + { + DevCon.Warning("Shuffle flush %d", s_n+1); + Flush(AUTOFLUSH); + } } /// Checks if region repeat is used (applying it does something to at least one of the values in min...max) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 5b536207e1..581691b6b8 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -357,6 +357,7 @@ struct alignas(16) GSHWDrawConfig // Scan mask u32 scanmsk : 2; + u32 swap_ga : 1; }; struct diff --git a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp index fafe075d88..08c3b0d3dd 100644 --- a/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSTextureFX11.cpp @@ -160,6 +160,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant sm.AddMacro("PS_POINT_SAMPLER", sel.point_sampler); sm.AddMacro("PS_SHUFFLE", sel.shuffle); sm.AddMacro("PS_READ_BA", sel.read_ba); + sm.AddMacro("PS_SWAP_GA", sel.swap_ga); sm.AddMacro("PS_CHANNEL_FETCH", sel.channel); sm.AddMacro("PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); sm.AddMacro("PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 0fa80144c1..6a2ed7aa26 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -463,22 +463,47 @@ void GSRendererHW::ExpandIndices() } // Fix the vertex position/tex_coordinate from 16 bits color to 32 bits color -void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) +void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, bool& swap_ga) { const u32 count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; const GIFRegXYOFFSET& o = m_context->XYOFFSET; - + int first_vertex = (v[1].XYZ.X > v[0].XYZ.X) ? 0 : 1; // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors - const int pos = (v[0].XYZ.X - o.OFX) & 0xFF; + const int pos = (v[first_vertex].XYZ.X - o.OFX) & 0xFF; + DevCon.Warning("Draw %d Pos %d(%d)",s_n, pos, pos >> 4); write_ba = (pos > 112 && pos < 136); - // Read texture is 8 to 16 pixels (same as above) const float tw = static_cast(1u << m_context->TEX0.TW); - int tex_pos = (PRIM->FST) ? v[0].U : static_cast(tw * v[0].ST.S); + + // Read texture is 8 to 16 pixels (same as above) + int tex_pos = (PRIM->FST) ? v[first_vertex].U : static_cast(tw * v[first_vertex].ST.S); tex_pos &= 0xFF; read_ba = (tex_pos > 112 && tex_pos < 144); + int coord_width = std::abs(v[first_vertex].XYZ.X - v[1 - first_vertex].XYZ.X) >> 4; + + int tex_width = m_vt.m_max.t.x - m_vt.m_min.t.x; + + // Probably Green/Alpha swap + if (tex_width > 14) + { + /*v[1 - first_vertex].XYZ.X = ((v[1 - first_vertex].XYZ.X - o.OFX) * 2) + o.OFX; + v[first_vertex].XYZ.X = ((v[first_vertex].XYZ.X - o.OFX) * 2) + o.OFX; + if (m_vt.m_max.p.y > 512.0f) + { + DevCon.Warning("Changing width from %d to %d", v[1 - first_vertex].XYZ.X, v[1 - first_vertex].XYZ.X + (tex_width << 4)); + v[1 - first_vertex].XYZ.X -= tex_width << 4; + }*/ + DevCon.Warning("Width %d Max y %f", tex_width, m_vt.m_max.p.y); + //read_ba = true; + //write_ba = false; + swap_ga = true; + return; + } + else + swap_ga = false; + bool half_bottom = false; switch (GSConfig.UserHacks_HalfBottomOverride) { @@ -541,15 +566,16 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) for (u32 i = 0; i < count; i += 2) { + first_vertex = (v[i+1].XYZ.X > v[i].XYZ.X) ? 0 : 1; if (write_ba) - v[i].XYZ.X -= 128u; + v[i+first_vertex].XYZ.X -= 128u; else - v[i+1].XYZ.X += 128u; + v[i+(1-first_vertex)].XYZ.X += 128u; if (read_ba) - v[i].U -= 128u; + v[i + first_vertex].U -= 128u; else - v[i+1].U += 128u; + v[i+(1 - first_vertex)].U += 128u; if (!half_bottom) { @@ -557,13 +583,13 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) const int tex_offset = v[i].V & 0xF; const GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); - GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V); + GSVector4i tmp(v[i + first_vertex].XYZ.Y, v[i +first_vertex].V, v[i + (1 - first_vertex)].XYZ.Y, v[i + (1 - first_vertex)].V); tmp = GSVector4i(tmp - offset).srl32(1) + offset; - v[i].XYZ.Y = static_cast(tmp.x); - v[i].V = static_cast(tmp.y); - v[i + 1].XYZ.Y = static_cast(tmp.z); - v[i + 1].V = static_cast(tmp.w); + v[i + first_vertex].XYZ.Y = static_cast(tmp.x); + v[i + first_vertex].V = static_cast(tmp.y); + v[i + (1 - first_vertex)].XYZ.Y = static_cast(tmp.z); + v[i + (1 - first_vertex)].V = static_cast(tmp.w); } } } @@ -574,22 +600,24 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba) for (u32 i = 0; i < count; i += 2) { + first_vertex = (v[i + 1].XYZ.X > v[i].XYZ.X) ? 0 : 1; + if (write_ba) - v[i].XYZ.X -= 128u; + v[i+ first_vertex].XYZ.X -= 128u; else - v[i+1].XYZ.X += 128u; + v[i+(1- first_vertex)].XYZ.X += 128u; if (read_ba) - v[i].ST.S -= offset_8pix; + v[i + first_vertex].ST.S -= offset_8pix; else - v[i+1].ST.S += offset_8pix; + v[i + (1- first_vertex)].ST.S += offset_8pix; if (!half_bottom) { // Height is too big (2x). const GSVector4i offset(o.OFY, o.OFY); - GSVector4i tmp(v[i].XYZ.Y, v[i + 1].XYZ.Y); + GSVector4i tmp(v[i+ first_vertex].XYZ.Y, v[i + (1- first_vertex)].XYZ.Y); tmp = GSVector4i(tmp - offset).srl32(1) + offset; //fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y); @@ -1626,6 +1654,9 @@ void GSRendererHW::Draw() m_texture_shuffle = (GSLocalMemory::m_psm[context->FRAME.PSM].bpp == 16) && (tex_psm.bpp == 16) && draw_sprite_tex && (m_src->m_32_bits_fmt || copy_16bit_to_target_shuffle); + + if (copy_16bit_to_target_shuffle && m_texture_shuffle) + DevCon.Warning("here"); // Okami mustn't call this code if (m_texture_shuffle && m_vertex.next < 3 && PRIM->FST && ((m_context->FRAME.FBMSK & fm_mask) == 0)) { @@ -2230,8 +2261,9 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask() bool write_ba; bool read_ba; + bool swap_ga; - ConvertSpriteTextureShuffle(write_ba, read_ba); + ConvertSpriteTextureShuffle(write_ba, read_ba, swap_ga); // If date is enabled you need to test the green channel instead of the // alpha channel. Only enable this code in DATE mode to reduce the number @@ -2251,37 +2283,52 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask() const GSVector2i rb_ga_mask = GSVector2i(fbmask & 0xFF, (fbmask >> 8) & 0xFF); m_conf.colormask.wrgba = 0; - // 2 Select the new mask - if (rb_ga_mask.r != 0xFF) + if (swap_ga) { - if (write_ba) - { - GL_INS("Color shuffle %s => B", read_ba ? "B" : "R"); - m_conf.colormask.wb = 1; - } - else - { - GL_INS("Color shuffle %s => R", read_ba ? "B" : "R"); - m_conf.colormask.wr = 1; - } - if (rb_ga_mask.r) - m_conf.ps.fbmask = 1; + m_conf.ps.swap_ga = true; + m_conf.ps.fbmask = 0; + GL_CACHE("I hate myself"); + m_conf.colormask.wg = 1; + m_conf.colormask.wa = 1; + m_skip = 1; + DevCon.Warning("Kill me"); + m_conf.ps.tex_is_fb = true; + return; } - - if (rb_ga_mask.g != 0xFF) + else { - if (write_ba) + // 2 Select the new mask + if (rb_ga_mask.r != 0xFF) { - GL_INS("Color shuffle %s => A", read_ba ? "A" : "G"); - m_conf.colormask.wa = 1; + if (write_ba) + { + DevCon.Warning("Color shuffle %s => B", read_ba ? "B" : "R"); + m_conf.colormask.wb = 1; + } + else + { + DevCon.Warning("Color shuffle %s => R", read_ba ? "B" : "R"); + m_conf.colormask.wr = 1; + } + if (rb_ga_mask.r) + m_conf.ps.fbmask = 1; } - else + + if (rb_ga_mask.g != 0xFF) { - GL_INS("Color shuffle %s => G", read_ba ? "A" : "G"); - m_conf.colormask.wg = 1; + if (write_ba) + { + DevCon.Warning("Color shuffle %s => A", read_ba ? "A" : "G"); + m_conf.colormask.wa = 1; + } + else + { + DevCon.Warning("Color shuffle %s => G", read_ba ? "A" : "G"); + m_conf.colormask.wg = 1; + } + if (rb_ga_mask.g) + m_conf.ps.fbmask = 1; } - if (rb_ga_mask.g) - m_conf.ps.fbmask = 1; } if (m_conf.ps.fbmask && enable_fbmask_emulation) @@ -2294,12 +2341,12 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask() // No blending so hit unsafe path. if (!PRIM->ABE || !features.texture_barrier) { - GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask); + DevCon.Warning("FBMASK Unsafe SW emulated fb_mask:%x on tex shuffle", fbmask); m_conf.require_one_barrier = true; } else { - GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); + DevCon.Warning("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); m_conf.require_full_barrier = true; } } @@ -2351,14 +2398,14 @@ void GSRendererHW::EmulateTextureShuffleAndFbmask() // No blending so hit unsafe path. if (!PRIM->ABE || !(~ff_fbmask & ~zero_fbmask & 0x7) || !g_gs_device->Features().texture_barrier) { - GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, + DevCon.Warning("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, (m_conf.ps.dfmt == 2) ? 16 : 32); m_conf.require_one_barrier = true; } else { // The safe and accurate path (but slow) - GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, + DevCon.Warning("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, (m_conf.ps.dfmt == 2) ? 16 : 32); m_conf.require_full_barrier = true; } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 38443bfd32..1e13021ced 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -140,7 +140,7 @@ public: void Lines2Sprites(); bool VerifyIndices(); template void ExpandIndices(); - void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba); + void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, bool& swap_ga); GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex); GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize); void MergeSprite(GSTextureCache::Source* tex); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 4b3c620cb7..c0b25dfbf7 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -1996,6 +1996,7 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_NO_COLOR1", sel.no_color1); AddMacro(ss, "PS_NO_ABLEND", sel.no_ablend); AddMacro(ss, "PS_ONLY_ALPHA", sel.only_alpha); + AddMacro(ss, "PS_SWAP_GA", sel.swap_ga); ss << m_tfx_source; VkShaderModule mod = g_vulkan_shader_cache->GetFragmentShader(ss.str());