From 0d25a0592a058899f8b451e25b4df227efca0164 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 31 Dec 2015 23:58:03 +0100 Subject: [PATCH] gsdx-ogl: fast blending accurate hack The hack relies on the undefined behavior of the hardware so it can potentially generate rendering corruption. This new hack drops the cache flusing when only the alpha channel is masked. Alpha is a direct copy of the fragment. Normally masked bits will be constant everywhere (RT, FS output, texture cache) so it would likely work. Just in case, code is only enabled with the new shiny hack --- plugins/GSdx/GSRendererOGL.cpp | 43 ++++++++++++++++++++++++++++++---- plugins/GSdx/GSRendererOGL.h | 2 ++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index d40744f265..4c3ef80b4c 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -34,6 +34,7 @@ GSRendererOGL::GSRendererOGL() UserHacks_TCOffset = theApp.GetConfig("UserHacks_TCOffset", 0); UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f; + UserHacks_unsafe_fbmask = theApp.GetConfig("UserHacks_unsafe_fbmask", false); m_prim_overlap = PRIM_OVERLAP_UNKNOW; @@ -41,6 +42,7 @@ GSRendererOGL::GSRendererOGL() UserHacks_TCOffset = 0; UserHacks_TCO_x = 0; UserHacks_TCO_y = 0; + UserHacks_unsafe_fbmask = false; } } @@ -299,10 +301,37 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF); if (ps_sel.fbmask) { - GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, - (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); ps_cb.FbMask = fbmask_v.u8to32(); - require_barrier = true; + // Only alpha is special here, I think we can take a very unsafe shortcut + // Alpha isn't blended on the GS but directly copyied into the RT. + // + // Behavior is clearly undefined however there is a high probability that + // it will work. Masked bit will be constant and normally the same everywhere + // RT/FS output/Cached value. + // + // Just to be sure let's add a new safe hack for unsafe access :) + // + // Here the GL spec quote to emphasize the unexpected behavior. + /* + - If a texel has been written, then in order to safely read the result + a texel fetch must be in a subsequent Draw separated by the command + + void TextureBarrier(void); + + TextureBarrier() will guarantee that writes have completed and caches + have been invalidated before subsequent Draws are executed. + */ + if (!(~ff_fbmask & ~zero_fbmask & 0x7) && UserHacks_unsafe_fbmask) { + GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, + (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); + m_unsafe_fbmask = true; + require_barrier = false; + } else { + // The safe and accurate path (but slow) + GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, + (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); + require_barrier = true; + } } } @@ -566,7 +595,12 @@ void GSRendererOGL::SendDraw(bool require_barrier) { GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; - if (!require_barrier) { + if (!require_barrier && m_unsafe_fbmask) { + // Not safe but still worth to take some precautions. + ASSERT(GLLoader::found_GL_ARB_texture_barrier); + glTextureBarrier(); + dev->DrawIndexedPrimitive(); + } else if (!require_barrier) { dev->DrawIndexedPrimitive(); } else if (m_prim_overlap == PRIM_OVERLAP_NO) { ASSERT(GLLoader::found_GL_ARB_texture_barrier); @@ -621,6 +655,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool DATE_GL45 = false; bool require_barrier = false; // For accurate option + m_unsafe_fbmask = false; ASSERT(m_dev != NULL); diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 846e8a21db..2f06c18108 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -48,9 +48,11 @@ class GSRendererOGL : public GSRendererHW bool m_accurate_date; int m_sw_blending; PRIM_OVERLAP m_prim_overlap; + bool m_unsafe_fbmask; unsigned int UserHacks_TCOffset; float UserHacks_TCO_x, UserHacks_TCO_y; + bool UserHacks_unsafe_fbmask; GSDeviceOGL::VSConstantBuffer vs_cb; GSDeviceOGL::PSConstantBuffer ps_cb;