From 36defdfbe928179861059a81c6727f2f667bcb9d Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 30 Jul 2022 16:33:54 +1000 Subject: [PATCH] GS: Use tex-is-fb for sprites/shuffles --- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 117 ++++++++++++++--------- pcsx2/GS/Renderers/HW/GSRendererHW.h | 5 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 14 +-- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 31 +++--- 4 files changed, 103 insertions(+), 64 deletions(-) diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index a832058ffd..c78ff6d160 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -28,6 +28,7 @@ GSRendererHW::GSRendererHW() , m_tc(new GSTextureCache()) , m_src(nullptr) , m_reset(false) + , m_tex_is_fb(false) , m_channel_shuffle(false) , m_userhacks_tcoffset(false) , m_userhacks_tcoffset_x(0) @@ -120,10 +121,41 @@ void GSRendererHW::PurgeTextureCache() m_tc->RemoveAll(); } -bool GSRendererHW::IsPossibleTextureShuffle(GSTextureCache::Source* src) const +bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) +{ + if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier) + return false; + + // Texture is actually the frame buffer. Stencil emulation to compute shadow (Jak series/tri-ace game) + // Will hit the "m_ps_sel.tex_is_fb = 1" path in the draw + if (m_vt.m_primclass == GS_TRIANGLE_CLASS) + { + if (m_context->FRAME.FBMSK == 0x00FFFFFF && TEX0.TBP0 == m_context->FRAME.Block()) + m_tex_is_fb = true; + } + else if (m_vt.m_primclass == GS_SPRITE_CLASS) + { + if (TEX0.TBP0 == m_context->FRAME.Block()) + { + m_tex_is_fb = IsPossibleTextureShuffle(dst, TEX0); + + if (!m_tex_is_fb && !m_vt.IsLinear()) + { + // Make sure that we're not sampling away from the area we're rendering. + const GSVector4 diff(m_vt.m_min.p.xyxy(m_vt.m_max.p) - m_vt.m_min.t.xyxy(m_vt.m_max.t)); + if ((diff < GSVector4(1.0f)).alltrue()) + m_tex_is_fb = true; + } + } + } + + return m_tex_is_fb; +} + +bool GSRendererHW::IsPossibleTextureShuffle(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) const { return (PRIM->TME && m_vt.m_primclass == GS_SPRITE_CLASS && - src->m_32_bits_fmt && GSLocalMemory::m_psm[src->m_TEX0.PSM].bpp == 16 && + dst->m_32_bits_fmt && GSLocalMemory::m_psm[TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16); } @@ -1320,6 +1352,7 @@ void GSRendererHW::Draw() m_src = nullptr; m_texture_shuffle = false; + m_tex_is_fb = false; if (PRIM->TME) { @@ -2256,21 +2289,8 @@ void GSRendererHW::EmulateChannelShuffle(const GSTextureCache::Source* tex) m_conf.tex = *tex->m_from_target; if (m_conf.tex) { - if (m_conf.tex == m_conf.rt) - { - // sample from fb instead - m_conf.tex = nullptr; - m_conf.ps.tex_is_fb = true; - m_conf.require_one_barrier = true; - } - else if (m_conf.tex == m_conf.ds) - { - // if depth testing is disabled, we don't need to copy, and can just unbind the depth buffer - // no need for a barrier for GL either, since it's not bound to depth and texture concurrently - // otherwise, the backend should recognise the hazard, and copy the buffer (D3D/Vulkan). - if (m_conf.depth.ztst == ZTST_ALWAYS) - m_conf.ds = nullptr; - } + // Identify when we're sampling the current buffer, defer fixup for later. + m_tex_is_fb |= (m_conf.tex == m_conf.rt || m_conf.tex == m_conf.ds); } // Replace current draw with a fullscreen sprite @@ -3070,6 +3090,41 @@ void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Source* tex) if (!m_channel_shuffle) m_conf.tex = tex->m_texture; m_conf.pal = tex->m_palette; + + // Detect framebuffer read that will need special handling + if (m_tex_is_fb) + { + if (m_conf.tex == m_conf.rt) + { + // This pattern is used by several games to emulate a stencil (shadow) + // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 + // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 + GL_DBG("Source and Target are the same! Let's sample the framebuffer"); + m_conf.tex = nullptr; + m_conf.ps.tex_is_fb = true; + if (m_prim_overlap == PRIM_OVERLAP_NO || !g_gs_device->Features().texture_barrier) + m_conf.require_one_barrier = true; + else + m_conf.require_full_barrier = true; + } + else if (m_conf.tex == m_conf.ds) + { + // if depth testing is disabled, we don't need to copy, and can just unbind the depth buffer + // no need for a barrier for GL either, since it's not bound to depth and texture concurrently + // otherwise, the backend should recognise the hazard, and copy the buffer (D3D/Vulkan). + if (m_conf.depth.ztst == ZTST_ALWAYS) + { + m_conf.ds = nullptr; + m_tex_is_fb = false; + } + } + else + { + // weird... we detected a fb read, but didn't end up using it? + DevCon.WriteLn("Tex-is-FB set but not used?"); + m_tex_is_fb = false; + } + } } void GSRendererHW::EmulateATST(float& AREF, GSHWDrawConfig::PSSelector& ps, bool pass_2) @@ -3172,27 +3227,6 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc else m_prim_overlap = PRIM_OVERLAP_UNKNOW; - // Detect framebuffer read that will need special handling - if (features.texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum) - { - const u32 fb_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; - if (((m_context->FRAME.FBMSK & fb_mask) == (fb_mask & 0x00FFFFFF)) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) - { - // This pattern is used by several games to emulate a stencil (shadow) - // Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1 - // Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1 - GL_DBG("Source and Target are the same! Let's sample the framebuffer"); - m_conf.ps.tex_is_fb = 1; - m_conf.require_full_barrier = !features.framebuffer_fetch; - } - else if (m_prim_overlap != PRIM_OVERLAP_NO) - { - // Note: It is fine if the texture fits in a single GS page. First access will cache - // the page in the GS texture buffer. - GL_INS("ERROR: Source and Target are the same!"); - } - } - EmulateTextureShuffleAndFbmask(); // DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending @@ -3635,13 +3669,6 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc g_gs_device->RenderHW(m_conf); } -bool GSRendererHW::IsDummyTexture() const -{ - // Texture is actually the frame buffer. Stencil emulation to compute shadow (Jak series/tri-ace game) - // Will hit the "m_ps_sel.tex_is_fb = 1" path in the draw - return g_gs_device->Features().texture_barrier && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && GSConfig.AccurateBlendingUnit != AccBlendLevel::Minimum && m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_context->FRAME.FBMSK == 0x00FFFFFF); -} - bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex) { // Master enable. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index bddc29f311..ce2d71339b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -151,6 +151,7 @@ private: GSTextureCache::Source* m_src; bool m_reset; + bool m_tex_is_fb; bool m_channel_shuffle; bool m_userhacks_tcoffset; float m_userhacks_tcoffset_x; @@ -201,8 +202,8 @@ public: void PurgeTextureCache() override; // Called by the texture cache to know if current texture is useful - bool IsDummyTexture() const; + bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0); // Called by the texture cache when optimizing the copy range for sources - bool IsPossibleTextureShuffle(GSTextureCache::Source* src) const; + bool IsPossibleTextureShuffle(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0) const; }; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index e87afbf090..d82e6b44c3 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1458,7 +1458,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con AttachPaletteToSource(src, psm.pal, true); } } - else if (dst && GSRendererHW::GetInstance()->IsDummyTexture()) + else if (dst && GSRendererHW::GetInstance()->UpdateTexIsFB(dst, TEX0)) { // This shortcut is a temporary solution. It isn't a good solution // as it won't work with Channel Shuffle/Texture Shuffle pattern @@ -1470,21 +1470,23 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Be aware that you can't use StrechRect between BeginScene/EndScene. // So it could be tricky to put in the middle of the DrawPrims - // Texture is created to keep code compatibility - GSTexture* dTex = g_gs_device->CreateTexture(tw, th, false, GSTexture::Format::Color, true); - // Keep a trace of origin of the texture - src->m_texture = dTex; + src->m_texture = dst->m_texture; src->m_target = true; + src->m_shared_texture = true; src->m_from_target = &dst->m_texture; src->m_from_target_TEX0 = dst->m_TEX0; src->m_end_block = dst->m_end_block; src->m_texture->SetScale(dst->m_texture->GetScale()); + src->m_32_bits_fmt = dst->m_32_bits_fmt; // Even if we sample the framebuffer directly we might need the palette // to handle the format conversion on GPU if (psm.pal > 0) AttachPaletteToSource(src, psm.pal, true); + + // This will get immediately invalidated. + m_temporary_source = src; } else if (dst) { @@ -1626,7 +1628,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { // if it looks like a texture shuffle, we might read up to +/- 8 pixels on either side. GSVector4 adjusted_src_range(*src_range); - if (GSRendererHW::GetInstance()->IsPossibleTextureShuffle(src)) + if (GSRendererHW::GetInstance()->IsPossibleTextureShuffle(dst, TEX0)) adjusted_src_range += GSVector4(-8.0f, 0.0f, 8.0f, 0.0f); // don't forget to scale the copy range diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index cff0cbc93d..fffb949683 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -1956,7 +1956,6 @@ VkShaderModule GSDeviceVK::GetTFXFragmentShader(const GSHWDrawConfig::PSSelector AddMacro(ss, "PS_CHANNEL_FETCH", sel.channel); AddMacro(ss, "PS_URBAN_CHAOS_HLE", sel.urban_chaos_hle); AddMacro(ss, "PS_TALES_OF_ABYSS_HLE", sel.tales_of_abyss_hle); - AddMacro(ss, "PS_TEX_IS_FB", sel.tex_is_fb); AddMacro(ss, "PS_INVALID_TEX0", sel.invalid_tex0); AddMacro(ss, "PS_AEM", sel.aem); AddMacro(ss, "PS_TFX", sel.tfx); @@ -2979,22 +2978,32 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) } } - if (config.tex && config.tex == config.ds) + if (config.tex) { - // requires a copy of the depth buffer. this is mainly for ico. - copy_ds = static_cast(CreateDepthStencil(rtsize.x, rtsize.y, GSTexture::Format::DepthStencil, false)); - if (copy_ds) + if (config.tex == config.ds) { - EndRenderPass(); + // requires a copy of the depth buffer. this is mainly for ico. + copy_ds = static_cast(CreateDepthStencil(rtsize.x, rtsize.y, GSTexture::Format::DepthStencil, false)); + if (copy_ds) + { + EndRenderPass(); - GL_PUSH("Copy depth to temp texture for shuffle {%d,%d %dx%d}", - config.drawarea.left, config.drawarea.top, - config.drawarea.width(), config.drawarea.height()); + GL_PUSH("Copy depth to temp texture for shuffle {%d,%d %dx%d}", + config.drawarea.left, config.drawarea.top, + config.drawarea.width(), config.drawarea.height()); - CopyRect(config.ds, copy_ds, config.drawarea, config.drawarea.left, config.drawarea.top); - PSSetShaderResource(0, copy_ds, true); + CopyRect(config.ds, copy_ds, config.drawarea, config.drawarea.left, config.drawarea.top); + PSSetShaderResource(0, copy_ds, true); + } } } + // clear texture binding when it's bound to RT or DS + if (!config.tex && m_tfx_textures[0] && + ((!pipe.feedback_loop && config.rt && static_cast(config.rt)->GetView() == m_tfx_textures[0]) || + (config.ds && static_cast(config.ds)->GetView() == m_tfx_textures[0]))) + { + PSSetShaderResource(0, nullptr, false); + } const bool render_area_okay = (!hdr_rt && DATE_rp != DATE_RENDER_PASS_STENCIL_ONE && CheckRenderPassArea(render_area));