gsdx-ogl: fast blending accurate hack

The hack relies on the undefined behavior of the hardware so it can
potentially generate rendering corruption.

This new hack drops the cache flusing when only the alpha channel is masked.
Alpha is a direct copy of the fragment. Normally masked bits will be constant
everywhere (RT, FS output, texture cache) so it would likely work.

Just in case, code is only enabled with the new shiny hack
This commit is contained in:
Gregory Hainaut 2015-12-31 23:58:03 +01:00
parent 6140fde60c
commit 0d25a0592a
2 changed files with 41 additions and 4 deletions

View File

@ -34,6 +34,7 @@ GSRendererOGL::GSRendererOGL()
UserHacks_TCOffset = theApp.GetConfig("UserHacks_TCOffset", 0);
UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f;
UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f;
UserHacks_unsafe_fbmask = theApp.GetConfig("UserHacks_unsafe_fbmask", false);
m_prim_overlap = PRIM_OVERLAP_UNKNOW;
@ -41,6 +42,7 @@ GSRendererOGL::GSRendererOGL()
UserHacks_TCOffset = 0;
UserHacks_TCO_x = 0;
UserHacks_TCO_y = 0;
UserHacks_unsafe_fbmask = false;
}
}
@ -299,10 +301,37 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s
ps_sel.fbmask = m_sw_blending && (~ff_fbmask & ~zero_fbmask & 0xF);
if (ps_sel.fbmask) {
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32);
ps_cb.FbMask = fbmask_v.u8to32();
require_barrier = true;
// Only alpha is special here, I think we can take a very unsafe shortcut
// Alpha isn't blended on the GS but directly copyied into the RT.
//
// Behavior is clearly undefined however there is a high probability that
// it will work. Masked bit will be constant and normally the same everywhere
// RT/FS output/Cached value.
//
// Just to be sure let's add a new safe hack for unsafe access :)
//
// Here the GL spec quote to emphasize the unexpected behavior.
/*
- If a texel has been written, then in order to safely read the result
a texel fetch must be in a subsequent Draw separated by the command
void TextureBarrier(void);
TextureBarrier() will guarantee that writes have completed and caches
have been invalidated before subsequent Draws are executed.
*/
if (!(~ff_fbmask & ~zero_fbmask & 0x7) && UserHacks_unsafe_fbmask) {
GL_INS("FBMASK Unsafe SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32);
m_unsafe_fbmask = true;
require_barrier = false;
} else {
// The safe and accurate path (but slow)
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK,
(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32);
require_barrier = true;
}
}
}
@ -566,7 +595,12 @@ void GSRendererOGL::SendDraw(bool require_barrier)
{
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
if (!require_barrier) {
if (!require_barrier && m_unsafe_fbmask) {
// Not safe but still worth to take some precautions.
ASSERT(GLLoader::found_GL_ARB_texture_barrier);
glTextureBarrier();
dev->DrawIndexedPrimitive();
} else if (!require_barrier) {
dev->DrawIndexedPrimitive();
} else if (m_prim_overlap == PRIM_OVERLAP_NO) {
ASSERT(GLLoader::found_GL_ARB_texture_barrier);
@ -621,6 +655,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool DATE_GL45 = false;
bool require_barrier = false; // For accurate option
m_unsafe_fbmask = false;
ASSERT(m_dev != NULL);

View File

@ -48,9 +48,11 @@ class GSRendererOGL : public GSRendererHW
bool m_accurate_date;
int m_sw_blending;
PRIM_OVERLAP m_prim_overlap;
bool m_unsafe_fbmask;
unsigned int UserHacks_TCOffset;
float UserHacks_TCO_x, UserHacks_TCO_y;
bool UserHacks_unsafe_fbmask;
GSDeviceOGL::VSConstantBuffer vs_cb;
GSDeviceOGL::PSConstantBuffer ps_cb;