diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index b70a23771f..c8f74fb521 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -667,7 +667,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_SHUFFLE %d\n", sel.shuffle) + format("#define PS_READ_BA %d\n", sel.read_ba) + format("#define PS_FBMASK %d\n", sel.fbmask) - + format("#define PS_BLEND_ACCU %d\n", sel.blend_accu) + + format("#define PS_HDR %d\n", sel.hdr) ; return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); @@ -1551,13 +1551,13 @@ const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] = { 17 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) { 18 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F { NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) - { 19 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd + { BLEND_ACCU | 19 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { NO_BAR | 20 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As { A_MAX | 8 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { 21 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { 22 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { NO_BAR| A_MAX | 9 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - { 23 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd + { BLEND_ACCU | 23 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd { NO_BAR | 24 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F { 25 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) { A_MAX | 10 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index a5d9d3805b..2db02ed4d5 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -33,6 +33,7 @@ #define A_MAX (0x100) // Impossible blending uses coeff bigger than 1 #define C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) #define NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) +#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds #ifdef ENABLE_OGL_DEBUG_MEM_BW extern uint64 g_real_texture_upload_byte; @@ -350,8 +351,8 @@ class GSDeviceOGL : public GSDevice uint32 blend_b:2; uint32 blend_c:2; uint32 blend_d:2; - uint32 blend_accu:1; uint32 dfmt:2; + uint32 hdr:1; uint32 _free2:21; }; @@ -451,9 +452,8 @@ class GSDeviceOGL : public GSDevice uint32 c:2; uint32 d:2; uint32 negative:1; - uint32 accu:1; - uint32 _free:21; + uint32 _free:22; }; struct diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0ca0cadcd6..b50c4783be 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -357,30 +357,26 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL // We really need SW blending for this one, barely used bool impossible_blend = (blend_flag & A_MAX); // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd - bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1); + bool accumulation_blend = (blend_flag & BLEND_ACCU); - bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); + bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend); // Color clip - bool acc_colclip_wrap = false; if (m_env.COLCLAMP.CLAMP == 0) { - acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); - if (acc_colclip_wrap) { + if (accumulation_blend) { + ps_sel.hdr = 1; + GL_INS("COLCLIP Fast HDR mode ENABLED"); + } else if (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base) { ps_sel.colclip = 3; + sw_blending_base = true; GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); - } else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) { - // Standard (inaccurate) colclip - ps_sel.colclip = 1; - accumulation_blend = false; - GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); + } else { + fprintf(stderr, "Sorry colclip isn't supported\n"); } } // Note: Option is duplicated, one impact the blend unit / the other the shader. - if (accumulation_blend && m_sw_blending) { - om_bsel.accu = ps_sel.blend_accu = 1; - sw_blending_base = true; - } + sw_blending_base |= accumulation_blend; bool all_blend_sw; switch (m_sw_blending) { @@ -393,7 +389,6 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL bool sw_blending = sw_blending_base // Free case or Impossible blend || all_blend_sw // all blend - || acc_colclip_wrap // accurate colclip || ps_sel.fbmask; // accurate fbmask @@ -402,6 +397,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive // (therefore primitiveID will be constant to 1) sw_blending &= !DATE_GL42; + // Seriously don't expect me to support this kind of crazyness. + // No mix of COLCLIP + accumulation_blend + DATE GL42 + ASSERT(!(ps_sel.hdr && DATE_GL42)); // For stat to optimize accurate option #if 0 @@ -409,20 +407,28 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); #endif if (sw_blending) { - // Disable HW blending except in accu mode - om_bsel.abe = ps_sel.blend_accu; ps_sel.blend_a = om_bsel.a; ps_sel.blend_b = om_bsel.b; ps_sel.blend_c = om_bsel.c; ps_sel.blend_d = om_bsel.d; + if (accumulation_blend) { + // Keep HW blending to do the addition + om_bsel.abe = 1; + // Remove the addition from the SW blending + ps_sel.blend_d = 2; + } else { + // Disable HW blending + om_bsel.abe = 0; + } + // Require the fix alpha vlaue if (ALPHA.C == 2) { ps_cb.AlphaCoeff.a = afix; } // No need to flush for every primitive - require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu; + require_barrier |= !(blend_flag & NO_BAR) && !accumulation_blend; } else { ps_sel.clr1 = om_bsel.IsCLR1(); if (ps_sel.dfmt == 1 && ALPHA.C == 1) { @@ -924,7 +930,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_POP(); } - if (m_env.COLCLAMP.CLAMP == 0 && om_bsel.accu) { + if (ps_sel.hdr) { hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F); dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 9a9e1f688e..5fe60ae286 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -105,10 +105,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) { int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; - if (bsel.accu) - bs->SetRGB(GL_FUNC_ADD, GL_ONE, GL_ONE); - else - bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); + bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); if (m_blendMapD3D9[i].bogus & A_MAX) { if (!theApp.GetConfig("accurate_blending_unit", 1)) { diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 4dc1dcc174..fd3f075f52 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -453,9 +453,6 @@ void ps_blend(inout vec4 Color, float As) #if PS_BLEND_A == PS_BLEND_B Color.rgb = D; -#elif PS_BLEND_ACCU == 1 - // The D addition will be done in the blending unit - Color.rgb = trunc(A * C); #else Color.rgb = trunc((A - B) * C + D); #endif @@ -612,8 +609,8 @@ void ps_main() ps_fbmask(C); -#if PS_BLEND_ACCU && PS_COLCLIP - // Use negative value to avoid overflow of the texture +#if PS_HDR == 1 + // Use negative value to avoid overflow of the texture (in accumulation mode) if (any(greaterThan(C.rgb, vec3(128.0f)))) { C.rgb = (C.rgb - 256.0f); } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 5b761a2471..8619fd259d 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1312,9 +1312,6 @@ static const char* tfx_fs_all_glsl = "\n" "#if PS_BLEND_A == PS_BLEND_B\n" " Color.rgb = D;\n" - "#elif PS_BLEND_ACCU == 1\n" - " // The D addition will be done in the blending unit\n" - " Color.rgb = trunc(A * C);\n" "#else\n" " Color.rgb = trunc((A - B) * C + D);\n" "#endif\n" @@ -1471,8 +1468,8 @@ static const char* tfx_fs_all_glsl = "\n" " ps_fbmask(C);\n" "\n" - "#if PS_BLEND_ACCU && PS_COLCLIP\n" - " // Use negative value to avoid overflow of the texture\n" + "#if PS_HDR == 1\n" + " // Use negative value to avoid overflow of the texture (in accumulation mode)\n" " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" " C.rgb = (C.rgb - 256.0f);\n" " }\n"