From 8d3e3e6c5bc7e09c0751f4160103371c28da77f4 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 20 May 2015 00:51:37 +0200 Subject: [PATCH] gsdx-ogl: more blend rework to support accurate_colclip So far few blending equations are implemented in PS. It is only for test the behavior on GoW --- plugins/GSdx/GSDeviceOGL.cpp | 6 ++ plugins/GSdx/GSDeviceOGL.h | 25 +++---- plugins/GSdx/GSRendererOGL.cpp | 115 +++++++++++++++++------------- plugins/GSdx/GSRendererOGL.h | 1 + plugins/GSdx/GSTextureFXOGL.cpp | 14 ++-- plugins/GSdx/res/glsl/tfx_fs.glsl | 11 +++ plugins/GSdx/res/glsl_source.h | 11 +++ 7 files changed, 115 insertions(+), 68 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index f569bdcd75..f0e579fb58 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1135,6 +1135,9 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) void GSDeviceOGL::OMSetBlendState(GSBlendStateOGL* bs, float bf) { + // SW date might change the enable state without updating the object + // Time to remove this micro-optimization +#if 0 // State is checkd inside the object but worst case is 8 comparaisons if (m_state.bs != bs || m_state.bf != bf) { @@ -1143,6 +1146,9 @@ void GSDeviceOGL::OMSetBlendState(GSBlendStateOGL* bs, float bf) bs->SetupBlend(bf); } +#else + bs->SetupBlend(bf); +#endif } void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 8b940b85ca..24a956ad7c 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -47,7 +47,6 @@ class GSBlendStateOGL { GLenum m_func_sRGB; GLenum m_func_dRGB; bool m_constant_factor; - int m_bogus; public: @@ -56,7 +55,6 @@ public: , m_func_sRGB(0) , m_func_dRGB(0) , m_constant_factor(false) - , m_bogus(0) {} void SetRGB(GLenum op, GLenum src, GLenum dst) @@ -67,10 +65,6 @@ public: if (IsConstant(src) || IsConstant(dst)) m_constant_factor = true; } - void SetBogus(int bogus) { m_bogus = bogus; } - - int GetBogus() { return m_bogus; } - void RevertOp() { if(m_equation_RGB == GL_FUNC_ADD) @@ -95,11 +89,6 @@ public: glDisable(GL_BLEND); } -#ifdef ENABLE_OGL_DEBUG - if (m_bogus & A_MAX) { - GL_INS("!!! Bogus blending effect used (%d) !!!", m_bogus); - } -#endif if (m_enable) { if (HasConstantFactor()) { if (GLState::bf != factor) { @@ -327,14 +316,18 @@ class GSDeviceOGL : public GSDevice uint32 wmt:2; uint32 ltf:1; - uint32 blend:4; + uint32 _free1:4; + + // Word 2 + uint32 blend:8; + uint32 _free2:24; }; - uint32 key; + uint64 key; }; // FIXME is the & useful ? - operator uint32() {return key & 0xffffffff;} + operator uint64() {return key;} PSSelector() : key(0) {} }; @@ -528,7 +521,7 @@ class GSDeviceOGL : public GSDevice GLuint m_gs; GLuint m_ps_ss[1<<3]; GSDepthStencilOGL* m_om_dss[1<<6]; - hash_map m_ps; + hash_map m_ps; hash_map m_om_bs; GLuint m_apitrace; @@ -632,7 +625,7 @@ class GSDeviceOGL : public GSDevice void SetupPS(PSSelector sel); void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb); void SetupSampler(PSSamplerSelector ssel); - int SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix); + void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending = false); GLuint GetSamplerID(PSSamplerSelector ssel); GLuint GetPaletteSamplerID(); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 3ae76276db..e94a2a8cf3 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -31,6 +31,7 @@ GSRendererOGL::GSRendererOGL() m_accurate_blend = theApp.GetConfig("accurate_blend", 0); m_accurate_date = theApp.GetConfig("accurate_date", 0); + m_accurate_colclip = theApp.GetConfig("accurate_colclip", 0); UserHacks_AlphaHack = theApp.GetConfig("UserHacks_AlphaHack", 0); UserHacks_AlphaStencil = theApp.GetConfig("UserHacks_AlphaStencil", 0); @@ -51,7 +52,7 @@ GSRendererOGL::GSRendererOGL() bool GSRendererOGL::CreateDevice(GSDevice* dev) { - if(!GSRenderer::CreateDevice(dev)) + if (!GSRenderer::CreateDevice(dev)) return false; return true; @@ -70,7 +71,7 @@ void GSRendererOGL::EmulateGS() // assume vertices are tightly packed and sequentially indexed (it should be the case) - if(m_vertex.next >= 2) + if (m_vertex.next >= 2) { size_t count = m_vertex.next; @@ -245,7 +246,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Blend - if(!IsOpaque()) + if (!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; @@ -254,9 +255,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_bsel.c = context->ALPHA.C; om_bsel.d = context->ALPHA.D; - if(env.PABE.PABE) + if (env.PABE.PABE) { - if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) + if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) { // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result @@ -340,7 +341,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // om - if(context->TEST.ZTE) + if (context->TEST.ZTE) { om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; @@ -360,11 +361,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // than the buffer supports seems to be an error condition on the real GS, causing it to crash. // We are probably receiving bad coordinates from VU1 in these cases. - if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) + if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { - if(context->ZBUF.PSM == PSM_PSMZ24) + if (context->ZBUF.PSM == PSM_PSMZ24) { - if(m_vt.m_max.p.z > 0xffffff) + if (m_vt.m_max.p.z > 0xffffff) { ASSERT(m_vt.m_min.p.z > 0xffffff); // Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended. @@ -376,9 +377,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } } - else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) + else if (context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { - if(m_vt.m_max.p.z > 0xffff) + if (m_vt.m_max.p.z > 0xffff) { ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo // Fixme : Same as above, I guess. @@ -406,7 +407,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. - if(rt->LikelyOffset) + if (rt->LikelyOffset) { ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; @@ -421,44 +422,53 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (DATE_GL45) { ps_sel.date = 5 + context->TEST.DATM; - } else if(DATE) { + } else if (DATE) { if (DATE_GL42) ps_sel.date = 1 + context->TEST.DATM; else om_dssel.date = 1; } - bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST; - if(colclip_wrap) - { -#ifdef ENABLE_OGL_DEBUG - const char *col[3] = {"Cs", "Cd", "0"}; -#endif - if (context->ALPHA.A == context->ALPHA.B) { + bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST && !m_accurate_colclip; + bool acc_colclip_wrap = env.COLCLAMP.CLAMP == 0 && m_accurate_colclip; + if (context->ALPHA.A == context->ALPHA.B) { // Optimize-away colclip + if (colclip_wrap || acc_colclip_wrap) { // No addition neither substraction so no risk of overflow the [0:255] range. - GL_INS("Disable COLCLIP wrap: blending is a plain copy of %s", col[context->ALPHA.D]); colclip_wrap = false; - } else { - GL_INS("Enable COLCLIP wrap (blending is %d/%d/%d/%d)", - context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); - ps_sel.colclip = 1; + acc_colclip_wrap = false; +#ifdef ENABLE_OGL_DEBUG + const char *col[3] = {"Cs", "Cd", "0"}; + GL_INS("Disable COLCLIP wrap: blending is a plain copy of %s", col[context->ALPHA.D]); +#endif } } + if (colclip_wrap) { + ps_sel.colclip = 1; + GL_INS("Enable COLCLIP wrap (blending is %d/%d/%d/%d)", + context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); + } else if (acc_colclip_wrap) { + ps_sel.colclip = 3; + GL_INS("Enable accurate COLCLIP wrap (blending is %d/%d/%d/%d)", + context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); + } else if (env.COLCLAMP.CLAMP == 0) { + GL_INS("COLCLIP wrap not supported (blending is %d/%d/%d/%d)", + context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); + } ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; - if(UserHacks_AlphaHack) ps_sel.aout = 1; + if (UserHacks_AlphaHack) ps_sel.aout = 1; - if(PRIM->FGE) + if (PRIM->FGE) { ps_sel.fog = 1; ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255; } - if(context->TEST.ATE) + if (context->TEST.ATE) ps_sel.atst = context->TEST.ATST; else ps_sel.atst = ATST_ALWAYS; @@ -489,7 +499,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool spritehack = false; int atst = ps_sel.atst; - if(tex) + if (tex) { const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM]; const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[context->TEX0.CPSM] : psm; @@ -519,7 +529,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSVector4 WH(tw, th, w, h); - if(PRIM->FST) + if (PRIM->FST) { vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy(); ps_sel.fst = 1; @@ -565,6 +575,28 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } + // Compute the blending equation to detect special case + int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; + int bogus_blend = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; + bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || (acc_colclip_wrap); + + if (sw_blending) { + GL_INS("!!! SW blending effect used (0x%x) !!!", bogus_blend); + + // select a shader that support blending + ps_sel.blend = bogus_blend & 0xFF; + + dev->PSSetShaderResource(3, rt); + + // Require the fix alpha vlaue + if (context->ALPHA.C == 2) { + ps_cb.AlphaCoeff = GSVector4((float)(int)context->ALPHA.FIX / 0x80); + } + + // No need to flush for every primitive + require_barrier = !(bogus_blend & NO_BAR); + } + // WARNING: setup of the program must be done first. So you can setup // 1/ subroutine uniform // 2/ bindless texture uniform @@ -574,29 +606,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupPS(ps_sel); // rs + uint8 afix = context->ALPHA.FIX; GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); - uint8 afix = context->ALPHA.FIX; - GL_PUSH("IA"); SetupIA(); GL_POP(); dev->OMSetColorMaskState(om_csel); - // Handle blending with care - int bogus_blend = dev->SetupOM(om_dssel, om_bsel, afix); - if (m_accurate_blend && (bogus_blend & A_MAX)) { - ps_sel.blend = bogus_blend & 0xF; - dev->SetupPS(ps_sel); - dev->PSSetShaderResource(3, rt); - - if (context->ALPHA.C == 2) { - ps_cb.AlphaCoeff = GSVector4((float)(int)afix / 0x80); - } - - require_barrier = !(bogus_blend & NO_BAR); - } + dev->SetupOM(om_dssel, om_bsel, afix, sw_blending); dev->SetupCB(&vs_cb, &ps_cb); @@ -634,7 +653,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->OMSetRenderTargets(rt, ds, &scissor); - if(context->TEST.DoFirstPass()) + if (context->TEST.DoFirstPass()) { SendDraw(require_barrier); @@ -657,7 +676,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } - if(context->TEST.DoSecondPass()) + if (context->TEST.DoSecondPass()) { ASSERT(!env.PABE.PABE); @@ -685,7 +704,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour default: __assume(0); } - if(z || r || g || b || a) + if (z || r || g || b || a) { om_dssel.zwe = z; om_csel.wr = r; diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 6771261bcc..dc1c672930 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -33,6 +33,7 @@ class GSRendererOGL : public GSRendererHW GSVector2 m_pixelcenter; bool m_accurate_blend; bool m_accurate_date; + bool m_accurate_colclip; bool UserHacks_AlphaHack; bool UserHacks_AlphaStencil; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index d7d009824f..87d759d7ed 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -109,7 +109,6 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix) int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); - bs->SetBogus(m_blendMapD3D9[i].bogus); if (m_blendMapD3D9[i].bogus & A_MAX) { if (!theApp.GetConfig("accurate_blend", 0)) { @@ -236,12 +235,21 @@ GLuint GSDeviceOGL::GetPaletteSamplerID() return m_palette_ss; } -int GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix) +void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending) { GSDepthStencilOGL* dss = m_om_dss[dssel]; OMSetDepthStencilState(dss, 1); + if (sw_blending) { + if (GLState::blend) { + GLState::blend = false; + glDisable(GL_BLEND); + } + // No hardware blending thank + return; + } + // ************************************************************* // Static // ************************************************************* @@ -260,6 +268,4 @@ int GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin // Dynamic // ************************************************************* OMSetBlendState(bs, (float)(int)afix / 0x80); - - return bs->GetBogus(); } diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index e6df792be3..592c48c323 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -366,7 +366,9 @@ vec4 ps_color() fog(c, PSin_t.z); +#if (PS_COLCLIP < 3) colclip(c); +#endif #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes c.rgb = vec3(1.0f, 1.0f, 1.0f); @@ -408,6 +410,15 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 12 // { A_MAX | 12 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F c.rgb = rt.rgb * (Af.x + 1.0f) - c.rgb * Af.x; +#elif PS_BLEND == 45 + // { NO_BAR | 45 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F + c.rgb = - c.rgb * Af.x; +#elif PS_BLEND > 0 + error not yet implemented; +#endif + +#if PS_COLCLIP == 3 + c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f; #endif } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 6e99f2d287..971ec5cabc 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1108,7 +1108,9 @@ static const char* tfx_fs_all_glsl = "\n" " fog(c, PSin_t.z);\n" "\n" + "#if (PS_COLCLIP < 3)\n" " colclip(c);\n" + "#endif\n" "\n" "#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" " c.rgb = vec3(1.0f, 1.0f, 1.0f);\n" @@ -1150,6 +1152,15 @@ static const char* tfx_fs_all_glsl = "#elif PS_BLEND == 12\n" " // { A_MAX | 12 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F\n" " c.rgb = rt.rgb * (Af.x + 1.0f) - c.rgb * Af.x;\n" + "#elif PS_BLEND == 45\n" + " // { NO_BAR | 45 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F\n" + " c.rgb = - c.rgb * Af.x;\n" + "#elif PS_BLEND > 0\n" + " error not yet implemented;\n" + "#endif\n" + "\n" + "#if PS_COLCLIP == 3\n" + " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n" "#endif\n" "}\n" "\n"