gsdx ogl: Bypass the texture cache when the frame buffer will be sampled.

Proof of concept. It should provide a huge speedup when accurate
blending is enabled for tri-ace / Jak / R&C (shadow rendering).

See #2894
Need PR#2892

v2: Add const + comment to explain that code isn't ideal.
This commit is contained in:
Gregory Hainaut 2019-04-10 12:21:21 +02:00 committed by lightningterror
parent 1414d64b18
commit f406051ed9
4 changed files with 42 additions and 3 deletions

View File

@ -180,4 +180,7 @@ public:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void Draw();
// Called by the texture cache to know if current texture is useful
virtual bool IsDummyTexture() const { return false;}
};

View File

@ -21,6 +21,7 @@
#include "stdafx.h"
#include "GSTextureCache.h"
#include "GSRendererHW.h"
#include "GSUtil.h"
bool GSTextureCache::m_disable_partial_invalidation = false;
@ -1173,6 +1174,31 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
AttachPaletteToSource(src, psm.pal, true);
}
}
else if (dst && static_cast<GSRendererHW*>(m_renderer)->IsDummyTexture())
{
// This shortcut is a temporary solution. It isn't a good solution
// as it won't work with Channel Shuffle/Texture Shuffle pattern
// (we need texture cache result to detect those effects).
// Instead a better solution would be to defer the copy/StrechRect later
// in the rendering.
// Still this poor solution is enough for a huge speed up in a couple of games
//
// Be aware that you can't use StrechRect between BeginScene/EndScene.
// So it could be tricky to put in the middle of the DrawPrims
// Texture is created to keep code compatibility
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(tw, th);
// Keep a trace of origin of the texture
src->m_texture = dTex;
src->m_target = true;
src->m_from_target = dst->m_texture;
// Even if we sample the framebuffer directly we might need the palette
// to handle the format conversion on GPU
if (psm.pal > 0)
AttachPaletteToSource(src, psm.pal, true);
}
else if (dst)
{
// TODO: clean up this mess

View File

@ -1027,9 +1027,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Detect framebuffer read that will need special handling
if ((m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending) {
if ((m_context->FRAME.FBMSK == 0x00FFFFFF) && (m_vt.m_primclass == GS_TRIANGLE_CLASS)) {
// Ratchet & Clank, Jak, Tri-Ace (Star Ocean 3) games use this pattern to compute the shadows.
// Alpha (multiplication) tfx is mostly equivalent to -1/+1 stencil operation.
GL_DBG("ERROR: Source and Target are the same! Let's sample the framebuffer");
// This pattern is used by several games to emulate a stencil (shadow)
// Ratchet & Clank, Jak do alpha integer multiplication (tfx) which is mostly equivalent to +1/-1
// Tri-Ace (Star Ocean 3/RadiataStories/VP2) uses a palette to handle the +1/-1
GL_DBG("Source and Target are the same! Let's sample the framebuffer");
m_ps_sel.tex_is_fb = 1;
m_require_full_barrier = true;
} else if (m_prim_overlap != PRIM_OVERLAP_NO) {
@ -1417,3 +1418,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->Recycle(hdr_rt);
}
}
bool GSRendererOGL::IsDummyTexture() const
{
// Texture is actually the frame buffer. Stencil emulation to compute shadow (Jak series/tri-ace game)
// Will hit the "m_ps_sel.tex_is_fb = 1" path in the draw
return (m_context->FRAME.Block() == m_context->TEX0.TBP0) && PRIM->TME && m_sw_blending && m_vt.m_primclass == GS_TRIANGLE_CLASS && (m_context->FRAME.FBMSK == 0x00FFFFFF);
}

View File

@ -90,4 +90,6 @@ class GSRendererOGL final : public GSRendererHW
PRIM_OVERLAP PrimitiveOverlap();
void SendDraw();
bool IsDummyTexture() const final;
};