From 8a4c0e9782dcd7aa80274baeb3486f60820b3a9d Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 11:32:27 +0200 Subject: [PATCH 1/7] cmake: drop extra alignment on GSdx It requires extensive tests --- pcsx2/CMakeLists.txt | 2 ++ plugins/GSdx/CMakeLists.txt | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 17d1b1c40e..41f1222c11 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -15,6 +15,8 @@ endif() set(CommonFlags # GCC-4.6 crash pcsx2 during the binding of plugins at startup... # Disable this optimization for the moment + # GCC-4.9 update: + # Crash when you start a game. Likely a stack corruption/alignment -fno-omit-frame-pointer # END GCC-4.6 -fno-strict-aliasing diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index e15b7c8dfd..a7f0f07482 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -51,10 +51,6 @@ endif() #Clang doesn't support a few common flags that GCC does. if(NOT USE_CLANG) set(GSdxFinalFlags ${GSdxFinalFlags} -fabi-version=6) - if (_M_X86_32 AND NOT USE_ASAN) - # Someone need to seriously test the build of GSdx without this option - set(GSdxFinalFlags ${GSdxFinalFlags} -mpreferred-stack-boundary=2) - endif() endif() set(GSdxSources From bd0de8fbaf0c1bbb5924d8a96a023275df0cb7d8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 09:48:35 +0200 Subject: [PATCH 2/7] glsl: (colclip) HDR doesn't need wrapping neither clamping Might save a couple of instruction in the PS --- plugins/GSdx/res/glsl/tfx_fs.glsl | 4 ++-- plugins/GSdx/res/glsl_source.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 6fba02473e..d794d2cf11 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -442,7 +442,7 @@ void ps_blend(inout vec4 Color, float As) // FIXME dithering // Correct the Color value based on the output format -#if PS_COLCLIP == 0 +#if PS_COLCLIP == 0 && PS_HDR == 0 // Standard Clamp Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f)); #endif @@ -457,7 +457,7 @@ void ps_blend(inout vec4 Color, float As) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8)); -#elif PS_COLCLIP == 1 +#elif PS_COLCLIP == 1 && PS_HDR == 0 Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF)); #endif diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index f7a4d23153..0b0530a4a7 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1301,7 +1301,7 @@ static const char* tfx_fs_all_glsl = " // FIXME dithering\n" "\n" " // Correct the Color value based on the output format\n" - "#if PS_COLCLIP == 0\n" + "#if PS_COLCLIP == 0 && PS_HDR == 0\n" " // Standard Clamp\n" " Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n" "#endif\n" @@ -1316,7 +1316,7 @@ static const char* tfx_fs_all_glsl = " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" "\n" " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n" - "#elif PS_COLCLIP == 1\n" + "#elif PS_COLCLIP == 1 && PS_HDR == 0\n" " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n" "#endif\n" "\n" From 4d1241070768879602f5a690508eda3a1d424869 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 09:44:42 +0200 Subject: [PATCH 3/7] gsdx-ogl: latch constant buffer in rendering object * Initialization of the object is done once * Avoid to reupload it when an useless parameter toggle => -10% of UBO update --- plugins/GSdx/GSRendererOGL.cpp | 14 ++++---------- plugins/GSdx/GSRendererOGL.h | 8 +++++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index e7a285a0bd..18b5567ac5 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -152,7 +152,7 @@ void GSRendererOGL::SetupIA() dev->IASetPrimitiveTopology(t); } -bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb) +bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel) { bool require_barrier = false; @@ -301,7 +301,7 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s return require_barrier; } -bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSConstantBuffer& ps_cb, bool DATE_GL42) +bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42) { const GIFRegALPHA& ALPHA = m_context->ALPHA; bool require_barrier = false; @@ -577,16 +577,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; dev->s_n = s_n; - // FIXME: optimization, latch ps_cb & vs_cb in the object - // 1/ Avoid a reset every draw - // 2/ potentially less update GSDeviceOGL::VSSelector vs_sel; - GSDeviceOGL::VSConstantBuffer vs_cb; - GSDeviceOGL::GSSelector gs_sel; GSDeviceOGL::PSSelector ps_sel; - GSDeviceOGL::PSConstantBuffer ps_cb; GSDeviceOGL::PSSamplerSelector ps_ssel; GSDeviceOGL::OMColorMaskSelector om_csel; @@ -604,7 +598,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour m_prim_overlap = PRIM_OVERLAP_UNKNOW; } - require_barrier |= EmulateTextureShuffleAndFbmask(ps_sel, om_csel, ps_cb); + require_barrier |= EmulateTextureShuffleAndFbmask(ps_sel, om_csel); // DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending @@ -632,7 +626,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Blend if (!IsOpaque() && rt) { - require_barrier |= EmulateBlending(ps_sel, ps_cb, DATE_GL42); + require_barrier |= EmulateBlending(ps_sel, DATE_GL42); } else { dev->OMSetBlendState(); // No blending please } diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 941a1a968a..8024b210da 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -48,19 +48,21 @@ class GSRendererOGL : public GSRendererHW GSVector2 m_pixelcenter; bool m_accurate_date; int m_sw_blending; + PRIM_OVERLAP m_prim_overlap; unsigned int UserHacks_TCOffset; float UserHacks_TCO_x, UserHacks_TCO_y; - PRIM_OVERLAP m_prim_overlap; + GSDeviceOGL::VSConstantBuffer vs_cb; + GSDeviceOGL::PSConstantBuffer ps_cb; GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize); protected: void EmulateGS(); void SetupIA(); - bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb); - bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSConstantBuffer& ps_cb, bool DATE_GL42); + bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel); + bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42); public: GSRendererOGL(); From 5b574055178278982c7450e883205d12f46d6e42 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 12:06:45 +0200 Subject: [PATCH 4/7] gsdx-ogl: blend management cleanup * reorder the blend function * remove OM bsel object * add a bit to support pabe (miss the glsl part) --- plugins/GSdx/GSDeviceOGL.cpp | 2 +- plugins/GSdx/GSDeviceOGL.h | 39 +---------- plugins/GSdx/GSRendererOGL.cpp | 116 ++++++++++++++------------------- 3 files changed, 52 insertions(+), 105 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 86902de7c3..d5dd22967e 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -353,7 +353,6 @@ bool GSDeviceOGL::Create(GSWnd* wnd) ASSERT(sizeof(PSSamplerSelector) == 4); ASSERT(sizeof(OMDepthStencilSelector) == 4); ASSERT(sizeof(OMColorMaskSelector) == 4); - ASSERT(sizeof(OMBlendSelector) == 4); return true; } @@ -663,6 +662,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_WRITE_RG %d\n", sel.write_rg) + format("#define PS_FBMASK %d\n", sel.fbmask) + format("#define PS_HDR %d\n", sel.hdr) + + format("#define PS_PABE %d\n", sel.pabe); ; return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index a86e2a22aa..5edab6f4e1 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -285,6 +285,7 @@ class GSDeviceOGL : public GSDevice uint32 blend_c:2; uint32 blend_d:2; uint32 clr1:1; // useful? + uint32 pabe:1; uint32 hdr:1; uint32 colclip:1; @@ -292,7 +293,7 @@ class GSDeviceOGL : public GSDevice uint32 tcoffsethack:1; //uint32 point_sampler:1; Not tested, so keep the bit for blend - uint32 _free2:20; + uint32 _free2:19; }; uint64 key; @@ -378,42 +379,6 @@ class GSDeviceOGL : public GSDevice OMColorMaskSelector(uint32 c) { wrgba = c; } }; - struct OMBlendSelector - { - union - { - struct - { - uint32 abe:1; - uint32 a:2; - uint32 b:2; - uint32 c:2; - uint32 d:2; - - uint32 _free:23; - }; - - struct - { - uint32 _abe:1; - uint32 abcd:8; - - uint32 _free2:23; - }; - - uint32 key; - }; - - operator uint32() {return key;} - - OMBlendSelector() : key(0) {} - - bool IsCLR1() const - { - return (key & 0x19f) == 0x93; // abe == 1 && a == 1 && b == 2 && d == 1 - } - }; - struct D3D9Blend {int bogus, op, src, dst;}; static const D3D9Blend m_blendMapD3D9[3*3*3*3 + 1]; static const int m_NO_BLEND; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 18b5567ac5..0bd8abedb7 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -303,91 +303,74 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42) { + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; const GIFRegALPHA& ALPHA = m_context->ALPHA; bool require_barrier = false; - GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + bool sw_blending = false; float afix = (float)m_context->ALPHA.FIX / 0x80; - GSDeviceOGL::OMBlendSelector om_bsel; - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - - om_bsel.a = ALPHA.A; - om_bsel.b = ALPHA.B; - om_bsel.c = ALPHA.C; - om_bsel.d = ALPHA.D; + // No blending so early exit + if (!(PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)) { +#ifdef ENABLE_OGL_DEBUG + if (m_env.PABE.PABE) { + GL_INS("!!! ENV PABE without ABE !!!"); + } +#endif + dev->OMSetBlendState(); + return false; + } if (m_env.PABE.PABE) { GL_INS("!!! ENV PABE not supported !!!"); - // FIXME it could be supported with SW blending! - if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) - { - // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader - // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result - om_bsel.abe = 0; + if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA) { + ps_sel.pabe = 1; + require_barrier |= (ALPHA.C == 1); + sw_blending = true; } - else - { - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); - } - } - - // No blending so early exit - if (!om_bsel.abe) { - dev->OMSetBlendState(); - return require_barrier; + //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. + //ASSERT(0); } // Compute the blending equation to detect special case - int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; - int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; + int blend_index = ((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D; + int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_index].bogus; + // SW Blend is (nearly) free. Let's use it. - bool free_blend = (blend_flag & BLEND_NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO); - // We really need SW blending for this one, barely used - bool impossible_blend = (blend_flag & BLEND_A_MAX); + bool impossible_or_free_blend = (blend_flag & (BLEND_NO_BAR|BLEND_A_MAX|BLEND_ACCU)) + || (m_prim_overlap == PRIM_OVERLAP_NO); + // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd bool accumulation_blend = (blend_flag & BLEND_ACCU); - bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend); + // Warning no break on purpose + switch (m_sw_blending) { + case ACC_BLEND_ULTRA: sw_blending |= true; + case ACC_BLEND_FULL: sw_blending |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && ALPHA.FIX <= 128u) ); + case ACC_BLEND_CCLIP_DALPHA: sw_blending |= (ALPHA.C == 1) || (m_env.COLCLAMP.CLAMP == 0); + case ACC_BLEND_SPRITE: sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS; + case ACC_BLEND_FREE: sw_blending |= ps_sel.fbmask || impossible_or_free_blend; + default: sw_blending |= accumulation_blend; + } + // SW Blending + // GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive + // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive + // (therefore primitiveID will be constant to 1) + sw_blending &= !DATE_GL42; // Color clip if (m_env.COLCLAMP.CLAMP == 0) { if (accumulation_blend) { ps_sel.hdr = 1; GL_INS("COLCLIP Fast HDR mode ENABLED"); - } else if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA || sw_blending_base) { + } else if (sw_blending) { ps_sel.colclip = 1; - sw_blending_base = true; GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } else { GL_INS("Sorry colclip isn't supported"); } } - // Note: Option is duplicated, one impact the blend unit / the other the shader. - sw_blending_base |= accumulation_blend; - - // Warning no break on purpose - bool sw_blending_adv = false; - switch (m_sw_blending) { - case ACC_BLEND_ULTRA: sw_blending_adv |= true; - case ACC_BLEND_FULL: sw_blending_adv |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); - case ACC_BLEND_CCLIP_DALPHA: sw_blending_adv |= (ALPHA.C == 1); - case ACC_BLEND_SPRITE: sw_blending_adv |= m_vt.m_primclass == GS_SPRITE_CLASS; - default: break; - } - - bool sw_blending = sw_blending_base // Free case or Impossible blend - || sw_blending_adv // complex blending case (for special effect) - || ps_sel.fbmask; // accurate fbmask - - - // SW Blending - // GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive - // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive - // (therefore primitiveID will be constant to 1) - sw_blending &= !DATE_GL42; // Seriously don't expect me to support this kind of crazyness. // No mix of COLCLIP + accumulation_blend + DATE GL42 // Neither fbmask and GL42 @@ -397,18 +380,17 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G // For stat to optimize accurate option #if 0 GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)", - om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); + ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); #endif if (sw_blending) { - ps_sel.blend_a = om_bsel.a; - ps_sel.blend_b = om_bsel.b; - ps_sel.blend_c = om_bsel.c; - ps_sel.blend_d = om_bsel.d; + ps_sel.blend_a = ALPHA.A; + ps_sel.blend_b = ALPHA.B; + ps_sel.blend_c = ALPHA.C; + ps_sel.blend_d = ALPHA.D; if (accumulation_blend) { // Keep HW blending to do the addition - dev->OMSetBlendState(blend_sel); - om_bsel.abe = 1; + dev->OMSetBlendState(blend_index); // Remove the addition from the SW blending ps_sel.blend_d = 2; } else { @@ -424,13 +406,13 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G // No need to flush for every primitive require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend; } else { - ps_sel.clr1 = om_bsel.IsCLR1(); + ps_sel.clr1 = (blend_flag & BLEND_C_CLR); if (ps_sel.dfmt == 1 && ALPHA.C == 1) { // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent - int hacked_blend_sel = blend_sel + 3; // +3 <=> +1 on C - dev->OMSetBlendState(hacked_blend_sel, 1.0f, true); + int hacked_blend_index = blend_index + 3; // +3 <=> +1 on C + dev->OMSetBlendState(hacked_blend_index, 1.0f, true); } else { - dev->OMSetBlendState(blend_sel, afix, (ALPHA.C == 2)); + dev->OMSetBlendState(blend_index, afix, (ALPHA.C == 2)); } } From df3ade896b0b03462b67b3cd6c6390eb76a72db4 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 12:13:50 +0200 Subject: [PATCH 5/7] gsdx-ogl: use integer for blend factor Integer argument&comparison might be lighter V2: Forget to change one OMSetBlendState call --- plugins/GSdx/GLState.cpp | 12 ++---------- plugins/GSdx/GLState.h | 2 +- plugins/GSdx/GSDeviceOGL.cpp | 5 +++-- plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSRendererOGL.cpp | 7 +++---- 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index 25028d1ce9..e00c25ca1c 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -33,7 +33,7 @@ namespace GLState { GLenum f_dRGB; uint32 wrgba; - float bf; + int bf; bool depth; GLenum depth_func; @@ -58,14 +58,6 @@ namespace GLState { GLuint vs; GLuint program; bool dirty_prog; -#if 0 - struct { - GSVertexBufferStateOGL* vb; - GSDepthStencilOGL* dss; - GSBlendStateOGL* bs; - float bf; // blend factor - } m_state; -#endif void Clear() { fbo = 0; @@ -77,7 +69,7 @@ namespace GLState { f_sRGB = 0; f_dRGB = 0; wrgba = 0xF; - bf = 0.0; + bf = 0; depth = false; depth_func = 0; diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index 43c7836a2e..2db08da410 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -34,7 +34,7 @@ namespace GLState { extern GLenum f_sRGB; extern GLenum f_dRGB; extern uint32 wrgba; - extern float bf; + extern int bf; extern bool depth; extern GLenum depth_func; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index d5dd22967e..e60b628e4c 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1332,7 +1332,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) } } -void GSDeviceOGL::OMSetBlendState(int blend_index, float blend_factor, bool is_blend_constant) +void GSDeviceOGL::OMSetBlendState(int blend_index, int blend_factor, bool is_blend_constant) { if (blend_index) { if (!GLState::blend) { @@ -1342,7 +1342,8 @@ void GSDeviceOGL::OMSetBlendState(int blend_index, float blend_factor, bool is_b if (is_blend_constant && GLState::bf != blend_factor) { GLState::bf = blend_factor; - gl_BlendColor(blend_factor, blend_factor, blend_factor, 0); + float bf = (float)blend_factor / 128.0f; + gl_BlendColor(bf, bf, bf, bf); } // FIXME test to use uint16 (cache friendly) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 5edab6f4e1..e17c9bb951 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -527,7 +527,7 @@ class GSDeviceOGL : public GSDevice void PSSetSamplerState(GLuint ss); void OMSetDepthStencilState(GSDepthStencilOGL* dss); - void OMSetBlendState(int blend_index = 0, float blend_factor = 0.0f, bool is_blend_constant = false); + void OMSetBlendState(int blend_index = 0, int blend_factor = 0, bool is_blend_constant = false); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0); void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector()); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0bd8abedb7..4e5236bd05 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -307,7 +307,6 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G const GIFRegALPHA& ALPHA = m_context->ALPHA; bool require_barrier = false; bool sw_blending = false; - float afix = (float)m_context->ALPHA.FIX / 0x80; // No blending so early exit if (!(PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS)) { @@ -400,7 +399,7 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G // Require the fix alpha vlaue if (ALPHA.C == 2) { - ps_cb.AlphaCoeff.a = afix; + ps_cb.AlphaCoeff.a = (float)ALPHA.FIX / 128.0f; } // No need to flush for every primitive @@ -410,9 +409,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G if (ps_sel.dfmt == 1 && ALPHA.C == 1) { // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent int hacked_blend_index = blend_index + 3; // +3 <=> +1 on C - dev->OMSetBlendState(hacked_blend_index, 1.0f, true); + dev->OMSetBlendState(hacked_blend_index, 128, true); } else { - dev->OMSetBlendState(blend_index, afix, (ALPHA.C == 2)); + dev->OMSetBlendState(blend_index, ALPHA.FIX, (ALPHA.C == 2)); } } From 61694013a53b097d245bdfd8176191f66b144014 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 7 Aug 2015 19:43:42 +0200 Subject: [PATCH 6/7] gsdx-ogl: compact blending parameter structure Save 656B of data. It is good for the cache. --- plugins/GSdx/GLState.cpp | 11 +++++------ plugins/GSdx/GLState.h | 8 ++++---- plugins/GSdx/GSDeviceOGL.cpp | 28 +++++++++++++--------------- plugins/GSdx/GSDeviceOGL.h | 6 +++--- plugins/GSdx/GSRendererOGL.cpp | 4 ++-- 5 files changed, 27 insertions(+), 30 deletions(-) diff --git a/plugins/GSdx/GLState.cpp b/plugins/GSdx/GLState.cpp index e00c25ca1c..4caf54afb4 100644 --- a/plugins/GSdx/GLState.cpp +++ b/plugins/GSdx/GLState.cpp @@ -28,13 +28,12 @@ namespace GLState { GSVector4i scissor; bool blend; - GLenum eq_RGB; - GLenum f_sRGB; - GLenum f_dRGB; + uint16 eq_RGB; + uint16 f_sRGB; + uint16 f_dRGB; + uint8 bf; uint32 wrgba; - int bf; - bool depth; GLenum depth_func; bool depth_mask; @@ -68,8 +67,8 @@ namespace GLState { eq_RGB = 0; f_sRGB = 0; f_dRGB = 0; - wrgba = 0xF; bf = 0; + wrgba = 0xF; depth = false; depth_func = 0; diff --git a/plugins/GSdx/GLState.h b/plugins/GSdx/GLState.h index 2db08da410..dc44c35957 100644 --- a/plugins/GSdx/GLState.h +++ b/plugins/GSdx/GLState.h @@ -30,11 +30,11 @@ namespace GLState { extern GSVector4i scissor; extern bool blend; - extern GLenum eq_RGB; - extern GLenum f_sRGB; - extern GLenum f_dRGB; + extern uint16 eq_RGB; + extern uint16 f_sRGB; + extern uint16 f_dRGB; + extern uint8 bf; extern uint32 wrgba; - extern int bf; extern bool depth; extern GLenum depth_func; diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index e60b628e4c..47031d4c6d 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1332,7 +1332,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) } } -void GSDeviceOGL::OMSetBlendState(int blend_index, int blend_factor, bool is_blend_constant) +void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant) { if (blend_index) { if (!GLState::blend) { @@ -1346,25 +1346,23 @@ void GSDeviceOGL::OMSetBlendState(int blend_index, int blend_factor, bool is_ble gl_BlendColor(bf, bf, bf, bf); } - // FIXME test to use uint16 (cache friendly) - const GLenum& op = m_blendMapD3D9[blend_index].op; - if (GLState::eq_RGB != op) { - GLState::eq_RGB = op; + const OGLBlend& b = m_blendMapOGL[blend_index]; + + if (GLState::eq_RGB != b.op) { + GLState::eq_RGB = b.op; if (gl_BlendEquationSeparateiARB) - gl_BlendEquationSeparateiARB(0, op, GL_FUNC_ADD); + gl_BlendEquationSeparateiARB(0, b.op, GL_FUNC_ADD); else - gl_BlendEquationSeparate(op, GL_FUNC_ADD); + gl_BlendEquationSeparate(b.op, GL_FUNC_ADD); } - const GLenum& src = m_blendMapD3D9[blend_index].src; - const GLenum& dst = m_blendMapD3D9[blend_index].dst; - if (GLState::f_sRGB != src || GLState::f_dRGB != dst) { - GLState::f_sRGB = src; - GLState::f_dRGB = dst; + if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) { + GLState::f_sRGB = b.src; + GLState::f_dRGB = b.dst; if (gl_BlendFuncSeparateiARB) - gl_BlendFuncSeparateiARB(0, src, dst, GL_ONE, GL_ZERO); + gl_BlendFuncSeparateiARB(0, b.src, b.dst, GL_ONE, GL_ZERO); else - gl_BlendFuncSeparate(src, dst, GL_ONE, GL_ZERO); + gl_BlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO); } } else { @@ -1532,7 +1530,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, const int GSDeviceOGL::m_NO_BLEND = 0; const int GSDeviceOGL::m_MERGE_BLEND = 3*3*3*3; -const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3 + 1] = +const GSDeviceOGL::OGLBlend GSDeviceOGL::m_blendMapOGL[3*3*3*3 + 1] = { { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index e17c9bb951..189d102229 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -379,8 +379,8 @@ class GSDeviceOGL : public GSDevice OMColorMaskSelector(uint32 c) { wrgba = c; } }; - struct D3D9Blend {int bogus, op, src, dst;}; - static const D3D9Blend m_blendMapD3D9[3*3*3*3 + 1]; + struct OGLBlend {uint16 bogus, op, src, dst;}; + static const OGLBlend m_blendMapOGL[3*3*3*3 + 1]; static const int m_NO_BLEND; static const int m_MERGE_BLEND; @@ -527,7 +527,7 @@ class GSDeviceOGL : public GSDevice void PSSetSamplerState(GLuint ss); void OMSetDepthStencilState(GSDepthStencilOGL* dss); - void OMSetBlendState(int blend_index = 0, int blend_factor = 0, bool is_blend_constant = false); + void OMSetBlendState(uint8 blend_index = 0, uint8 blend_factor = 0, bool is_blend_constant = false); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL); void OMSetWriteBuffer(GLenum buffer = GL_COLOR_ATTACHMENT0); void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector()); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 4e5236bd05..07750fc247 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -332,8 +332,8 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G } // Compute the blending equation to detect special case - int blend_index = ((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D; - int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_index].bogus; + uint8 blend_index = ((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D; + int blend_flag = GSDeviceOGL::m_blendMapOGL[blend_index].bogus; // SW Blend is (nearly) free. Let's use it. bool impossible_or_free_blend = (blend_flag & (BLEND_NO_BAR|BLEND_A_MAX|BLEND_ACCU)) From 9f92f63194f2812526617bfc81c9d2ae76bfdae6 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 8 Aug 2015 08:57:06 +0200 Subject: [PATCH 7/7] gsdx-ogl: Use GetAlphaMinMax to limit the scope of FULL accurate blending Provide a massive speed up in this level. --- plugins/GSdx/GSRendererOGL.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 07750fc247..cf9c2c34bc 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -345,7 +345,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G // Warning no break on purpose switch (m_sw_blending) { case ACC_BLEND_ULTRA: sw_blending |= true; - case ACC_BLEND_FULL: sw_blending |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && ALPHA.FIX <= 128u) ); + case ACC_BLEND_FULL: if (!m_vt.m_alpha.valid && (ALPHA.C == 0)) GetAlphaMinMax(); + sw_blending |= (ALPHA.A != ALPHA.B) && + ((ALPHA.C == 0 && m_vt.m_alpha.max > 128u) || (ALPHA.C == 2 && ALPHA.FIX > 128u)); case ACC_BLEND_CCLIP_DALPHA: sw_blending |= (ALPHA.C == 1) || (m_env.COLCLAMP.CLAMP == 0); case ACC_BLEND_SPRITE: sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS; case ACC_BLEND_FREE: sw_blending |= ps_sel.fbmask || impossible_or_free_blend;