From e026f1bac69230940fe22f8c7b2b7f82ed620927 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 24 Jul 2015 22:06:35 +0200 Subject: [PATCH 01/16] gsdx-ogl: implement a fast accurate colclip algo The idea is to use a floating texture to accumulate the data and then do a final postprocessing pass to apply the modulo v2: * use bounding box to * fix vertex corruption issue * use negative number in shader which allow to use half float (+12 fps@4x) --- plugins/GSdx/GSDeviceOGL.cpp | 9 +++-- plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSRendererOGL.cpp | 57 ++++++++++++++++++++----------- plugins/GSdx/GSRendererOGL.h | 2 ++ plugins/GSdx/res/glsl/tfx_fs.glsl | 6 ++++ plugins/GSdx/res/glsl_source.h | 6 ++++ 6 files changed, 59 insertions(+), 23 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 4fd0276492..ef8215cfd6 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -889,7 +889,7 @@ GSTexture* GSDeviceOGL::CopyOffscreen(GSTexture* src, const GSVector4& sRect, in } // Copy a sub part of texture (same as below but force a conversion) -void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) +void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin) { const GLuint& sid = sTex->GetID(); const GLuint& did = dTex->GetID(); @@ -899,7 +899,10 @@ void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4 gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0); - gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height()); + if (at_origin) + gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height()); + else + gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, r.x, r.y, r.x, r.y, r.width(), r.height()); gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0); @@ -924,7 +927,7 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r r.width(), r.height(), 1); } else { // Slower copy (conversion is done) - CopyRectConv(sTex, dTex, r); + CopyRectConv(sTex, dTex, r, true); } GL_POP(); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index cf276a4e2a..d09465dc0c 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -629,7 +629,7 @@ class GSDeviceOGL : public GSDevice GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0); void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r); - void CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r); + void CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin); void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true); void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear = true); void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, GSBlendStateOGL* bs, bool linear = true); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 62123f99c1..59a37c490f 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -344,6 +344,14 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() return PRIM_OVERLAP_NO; } +GSVector4i GSRendererOGL::ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize) +{ + GSVector4 scale = GSVector4(rtscale.x, rtscale.y); + GSVector4 offset = GSVector4(-1.0f, 1.0f); // Round value + GSVector4 box = m_vt.m_min.p.xyxy(m_vt.m_max.p) + offset.xxyy(); + return GSVector4i(box * scale.xyxy()).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y)); +} + void GSRendererOGL::SendDraw(bool require_barrier) { GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; @@ -388,6 +396,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; + GSTexture* hdr_rt = NULL; + const GSVector2i& rtsize = ds->GetSize(); const GSVector2& rtscale = ds->GetScale(); @@ -497,14 +507,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.blend_accu = m_sw_blending && ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1; om_bsel.accu = ps_sel.blend_accu; - bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend /*|| ps_sel.blend_accu*/); + bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); // Color clip bool acc_colclip_wrap = false; if (env.COLCLAMP.CLAMP == 0) { - // Not supported yet in colclip - om_bsel.accu = ps_sel.blend_accu = 0; - acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); if (acc_colclip_wrap) { ps_sel.colclip = 3; @@ -514,8 +521,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.colclip = 1; GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } - } else { - sw_blending_base |= m_sw_blending && ps_sel.blend_accu; } bool all_blend_sw; @@ -583,15 +588,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour gl_TextureBarrier(); dev->PSSetShaderResource(3, rt); } else if (DATE) { - // TODO: do I need to clamp the value (if yes how? rintersect with rt?) - GSVector4 si = GSVector4(rtscale.x, rtscale.y); - GSVector4 off = GSVector4(-1.0f, 1.0f); // Round value - GSVector4 b = m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy(); - GSVector4i ri = GSVector4i(b * si.xyxy()); + GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); // Reduce the quantity of clean function - glScissor( ri.x, ri.y, ri.width(), ri.height() ); - GLState::scissor = ri; + glScissor( dRect.x, dRect.y, dRect.width(), dRect.height() ); + GLState::scissor = dRect; // Must be done here to avoid any GL state pertubation (clear function...) // Create an r32ui image that will containt primitive ID @@ -599,10 +600,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->InitPrimDateTexture(rt); dev->PSSetShaderResource(3, rt); } else { - GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y); - - GSVector4 src = (b * s.xyxy()).sat(off.zzyy()); - GSVector4 dst = src * 2.0f + off.xxxx(); + GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy(); + GSVector4 dst = src * 2.0f - 1.0f; GSVertexPT1 vertices[] = { @@ -913,7 +912,15 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_POP(); } - dev->OMSetRenderTargets(rt, ds, &scissor); + if (env.COLCLAMP.CLAMP == 0 && om_bsel.accu) { + hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F); + + dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); + + dev->OMSetRenderTargets(hdr_rt, ds, &scissor); + } else { + dev->OMSetRenderTargets(rt, ds, &scissor); + } if (context->TEST.DoFirstPass()) { @@ -997,10 +1004,22 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } } - if (DATE_GL42) + + if (DATE_GL42) { dev->RecycleDateTexture(); + } dev->EndScene(); + // Warning: EndScene must be called before StretchRect otherwise + // vertices will be overwritten. Trust me you don't want to do that. + if (hdr_rt) { + GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize)); + GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy(); + dev->StretchRect(hdr_rt, sRect, rt, dRect, 4, false); + + dev->Recycle(hdr_rt); + } + GL_POP(); } diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 2a3b700cb1..66879bdfeb 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -54,6 +54,8 @@ class GSRendererOGL : public GSRendererHW PRIM_OVERLAP m_prim_overlap; + GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize); + protected: void EmulateGS(); void SetupIA(); diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 794060a56e..4dc1dcc174 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -612,6 +612,12 @@ void ps_main() ps_fbmask(C); +#if PS_BLEND_ACCU && PS_COLCLIP + // Use negative value to avoid overflow of the texture + if (any(greaterThan(C.rgb, vec3(128.0f)))) { + C.rgb = (C.rgb - 256.0f); + } +#endif SV_Target0 = C / 255.0f; SV_Target1 = vec4(alpha_blend); } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index d56238466c..5b761a2471 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1471,6 +1471,12 @@ static const char* tfx_fs_all_glsl = "\n" " ps_fbmask(C);\n" "\n" + "#if PS_BLEND_ACCU && PS_COLCLIP\n" + " // Use negative value to avoid overflow of the texture\n" + " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" + " C.rgb = (C.rgb - 256.0f);\n" + " }\n" + "#endif\n" " SV_Target0 = C / 255.0f;\n" " SV_Target1 = vec4(alpha_blend);\n" "}\n" From 83dfc6b633b5c0e16a56ea99dbd51d798b0adfbd Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 10:14:53 +0200 Subject: [PATCH 02/16] gsdx-ogl: clean a bit selector code Use countof macro (avoid to duplicate the size) Fix the size of array Remove useless alpha_stencil case --- plugins/GSdx/GSDeviceOGL.cpp | 11 ++++++----- plugins/GSdx/GSDeviceOGL.h | 23 ++++++----------------- plugins/GSdx/GSTextureFXOGL.cpp | 9 +++++---- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index ef8215cfd6..b70a23771f 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -133,15 +133,15 @@ GSDeviceOGL::~GSDeviceOGL() gl_DeleteSamplers(1, &m_palette_ss); m_shader->Delete(m_apitrace); - for (uint32 key = 0; key < VSSelector::size(); key++) m_shader->Delete(m_vs[key]); - for (uint32 key = 0; key < GSSelector::size(); key++) m_shader->Delete(m_gs[key]); + for (uint32 key = 0; key < countof(m_vs); key++) m_shader->Delete(m_vs[key]); + for (uint32 key = 0; key < countof(m_gs); key++) m_shader->Delete(m_gs[key]); for (auto it = m_ps.begin(); it != m_ps.end() ; it++) m_shader->Delete(it->second); m_ps.clear(); - gl_DeleteSamplers(PSSamplerSelector::size(), m_ps_ss); + gl_DeleteSamplers(countof(m_ps_ss), m_ps_ss); - for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) delete m_om_dss[key]; + for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key]; for (auto it = m_om_bs.begin(); it != m_om_bs.end(); it++) delete it->second; m_om_bs.clear(); @@ -238,8 +238,9 @@ bool GSDeviceOGL::Create(GSWnd* wnd) // **************************************************************** // Pre Generate the different sampler object // **************************************************************** - for (uint32 key = 0; key < PSSamplerSelector::size(); key++) + for (uint32 key = 0; key < countof(m_ps_ss); key++) { m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); + } // **************************************************************** // convert diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index d09465dc0c..71fb0cc771 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -238,13 +238,10 @@ class GSDeviceOGL : public GSDevice uint32 key; }; - // FIXME is the & useful ? - operator uint32() {return key & 0x3f;} + operator uint32() {return key;} VSSelector() : key(0) {} VSSelector(uint32 k) : key(k) {} - - static uint32 size() { return 1 << 5; } }; struct GSSelector @@ -266,8 +263,6 @@ class GSDeviceOGL : public GSDevice GSSelector() : key(0) {} GSSelector(uint32 k) : key(k) {} - - static uint32 size() { return 1 << 2; } }; __aligned(struct, 32) PSConstantBuffer @@ -386,13 +381,10 @@ class GSDeviceOGL : public GSDevice uint32 key; }; - // FIXME is the & useful ? - operator uint32() {return key & 0x7;} + operator uint32() {return key;} PSSamplerSelector() : key(0) {} PSSamplerSelector(uint32 k) : key(k) {} - - static uint32 size() { return 1 << 3; } }; struct OMDepthStencilSelector @@ -404,21 +396,18 @@ class GSDeviceOGL : public GSDevice uint32 ztst:2; uint32 zwe:1; uint32 date:1; - uint32 alpha_stencil:1; - uint32 _free:27; + uint32 _free:28; }; uint32 key; }; // FIXME is the & useful ? - operator uint32() {return key & 0x1f;} + operator uint32() {return key;} OMDepthStencilSelector() : key(0) {} OMDepthStencilSelector(uint32 k) : key(k) {} - - static uint32 size() { return 1 << 5; } }; struct OMColorMaskSelector @@ -560,10 +549,10 @@ class GSDeviceOGL : public GSDevice float bf; // blend factor } m_state; - GLuint m_vs[1<<6]; + GLuint m_vs[1<<5]; GLuint m_gs[1<<2]; GLuint m_ps_ss[1<<3]; - GSDepthStencilOGL* m_om_dss[1<<6]; + GSDepthStencilOGL* m_om_dss[1<<4]; hash_map m_ps; hash_map m_om_bs; GLuint m_apitrace; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 488ad6e0c2..2715e0d431 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -40,7 +40,7 @@ void GSDeviceOGL::CreateTextureFX() // Pre compile all Geometry & Vertex Shader // It might cost a seconds at startup but it would reduce benchmark pollution - for (uint32 key = 0; key < GSSelector::size(); key++) { + for (uint32 key = 0; key < countof(m_gs); key++) { GSSelector sel(key); if (sel.point == sel.sprite) m_gs[key] = 0; @@ -48,7 +48,7 @@ void GSDeviceOGL::CreateTextureFX() m_gs[key] = CompileGS(GSSelector(key)); } - for (uint32 key = 0; key < VSSelector::size(); key++) { + for (uint32 key = 0; key < countof(m_vs); key++) { // wildhack is only useful if both TME and FST are enabled. VSSelector sel(key); if (sel.wildhack && (!sel.tme || !sel.fst)) @@ -61,8 +61,9 @@ void GSDeviceOGL::CreateTextureFX() // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); - for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) + for (uint32 key = 0; key < countof(m_om_dss); key++) { m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); + } // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); @@ -77,7 +78,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) if (dssel.date) { dss->EnableStencil(); - dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP); + dss->SetStencil(GL_EQUAL, GL_KEEP); } if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) From 8f27a5a92b7fea59ca6232097d5b80d7db4acc89 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 16:10:36 +0200 Subject: [PATCH 03/16] gsdx-ogl: only enable fast accurate colclip in level3 Until we drop the old method --- plugins/GSdx/GSRendererOGL.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 59a37c490f..473d6e7113 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -464,11 +464,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Blend - const GIFRegALPHA& ALPHA = context->ALPHA; float afix = (float)context->ALPHA.FIX / 0x80; if (!IsOpaque() && rt) { + const GIFRegALPHA& ALPHA = context->ALPHA; + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; om_bsel.a = ALPHA.A; @@ -504,8 +505,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // We really need SW blending for this one, barely used bool impossible_blend = (blend_flag & A_MAX); // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd - ps_sel.blend_accu = m_sw_blending && ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1; - om_bsel.accu = ps_sel.blend_accu; + bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1); bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); @@ -519,10 +519,17 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) { // Standard (inaccurate) colclip ps_sel.colclip = 1; + accumulation_blend = false; GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } } + // Note: Option is duplicated, one impact the blend unit / the other the shader. + if (accumulation_blend && m_sw_blending) { + om_bsel.accu = ps_sel.blend_accu = 1; + sw_blending_base = true; + } + bool all_blend_sw; switch (m_sw_blending) { case ACC_BLEND_ULTRA: all_blend_sw = true; break; From 1fe3e04ce382fca0426346138d8e231532dff6ab Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 16:26:53 +0200 Subject: [PATCH 04/16] gsdx-ogl: don't alias m_env/m_context variable It is cumbersome to move code --- plugins/GSdx/GSRendererOGL.cpp | 95 ++++++++++++++++------------------ 1 file changed, 46 insertions(+), 49 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 473d6e7113..cb6c0ed881 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -393,15 +393,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour tex && tex->m_texture ? tex->m_texture->GetID() : 0, rt ? rt->GetID() : -1, ds->GetID()); - GSDrawingEnvironment& env = m_env; - GSDrawingContext* context = m_context; - GSTexture* hdr_rt = NULL; const GSVector2i& rtsize = ds->GetSize(); const GSVector2& rtscale = ds->GetScale(); - bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; + bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24; bool DATE_GL42 = false; bool DATE_GL45 = false; @@ -432,7 +429,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Except 2D games, sprites are often use for special post-processing effect m_prim_overlap = PrimitiveOverlap(); #ifdef ENABLE_OGL_DEBUG - if ((m_prim_overlap != PRIM_OVERLAP_NO) && (context->FRAME.Block() == context->TEX0.TBP0) && (m_vertex.next > 2)) { + if ((m_prim_overlap != PRIM_OVERLAP_NO) && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && (m_vertex.next > 2)) { GL_INS("ERROR: Source and Target are the same!"); } #endif @@ -449,7 +446,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour DATE_GL45 = true; DATE = false; } else if (m_accurate_date && om_csel.wa - && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) { + && (!m_context->TEST.ATE || m_context->TEST.ATST == ATST_ALWAYS)) { // texture barrier will split the draw call into n draw call. It is very efficient for // few primitive draws. Otherwise it sucks. if (GLLoader::found_GL_ARB_texture_barrier && (m_index.tail < 100)) { @@ -464,11 +461,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Blend - float afix = (float)context->ALPHA.FIX / 0x80; + float afix = (float)m_context->ALPHA.FIX / 0x80; if (!IsOpaque() && rt) { - const GIFRegALPHA& ALPHA = context->ALPHA; + const GIFRegALPHA& ALPHA = m_context->ALPHA; om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; @@ -477,7 +474,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_bsel.c = ALPHA.C; om_bsel.d = ALPHA.D; - if (env.PABE.PABE) + if (m_env.PABE.PABE) { // FIXME it could be supported with SW blending! if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) @@ -511,7 +508,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Color clip bool acc_colclip_wrap = false; - if (env.COLCLAMP.CLAMP == 0) { + if (m_env.COLCLAMP.CLAMP == 0) { acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); if (acc_colclip_wrap) { ps_sel.colclip = 3; @@ -555,7 +552,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour #if 0 if (om_bsel.abe) GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)", - om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); + om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); #endif if (sw_blending && om_bsel.abe) { // select a shader that support blending @@ -628,10 +625,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // om - if (context->TEST.ZTE) + if (m_context->TEST.ZTE) { - om_dssel.ztst = context->TEST.ZTST; - om_dssel.zwe = !context->ZBUF.ZMSK; + om_dssel.ztst = m_context->TEST.ZTST; + om_dssel.zwe = !m_context->ZBUF.ZMSK; } else { @@ -650,7 +647,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe) { - if (context->ZBUF.PSM == PSM_PSMZ24) + if (m_context->ZBUF.PSM == PSM_PSMZ24) { if (m_vt.m_max.p.z > 0xffffff) { @@ -664,7 +661,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } } - else if (context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) + else if (m_context->ZBUF.PSM == PSM_PSMZ16 || m_context->ZBUF.PSM == PSM_PSMZ16S) { if (m_vt.m_max.p.z > 0xffff) { @@ -683,8 +680,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // FIXME Opengl support half pixel center (as dx10). Code could be easier!!! float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4); - float ox = (float)(int)context->XYOFFSET.OFX; - float oy = (float)(int)context->XYOFFSET.OFY; + float ox = (float)(int)m_context->XYOFFSET.OFX; + float oy = (float)(int)m_context->XYOFFSET.OFY; float ox2 = -1.0f / rtsize.x; float oy2 = -1.0f / rtsize.y; @@ -708,30 +705,30 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP; if (DATE_GL45) { - ps_sel.date = 5 + context->TEST.DATM; + ps_sel.date = 5 + m_context->TEST.DATM; } else if (DATE) { if (DATE_GL42) - ps_sel.date = 1 + context->TEST.DATM; + ps_sel.date = 1 + m_context->TEST.DATM; else om_dssel.date = 1; } - ps_sel.fba = context->FBA.FBA; + ps_sel.fba = m_context->FBA.FBA; if (PRIM->FGE) { ps_sel.fog = 1; - ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]); + ps_cb.FogColor_AREF = GSVector4::rgba32(m_env.FOGCOL.u32[0]); } - if (context->TEST.ATE) - ps_sel.atst = context->TEST.ATST; + if (m_context->TEST.ATE) + ps_sel.atst = m_context->TEST.ATST; else ps_sel.atst = ATST_ALWAYS; - if (context->TEST.ATE && context->TEST.ATST > 1) - ps_cb.FogColor_AREF.a = (float)context->TEST.AREF; + if (m_context->TEST.ATE && m_context->TEST.ATST > 1) + ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF; // By default don't use texture ps_sel.tfx = 4; @@ -740,23 +737,23 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (tex) { - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[context->TEX0.CPSM] : psm; + const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; - bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3; + bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 3 && m_context->CLAMP.WMT < 3; // Don't force extra filtering on sprite (it creates various upscaling issue) bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); - ps_sel.wms = context->CLAMP.WMS; - ps_sel.wmt = context->CLAMP.WMT; + ps_sel.wms = m_context->CLAMP.WMS; + ps_sel.wmt = m_context->CLAMP.WMT; if (ps_sel.shuffle) { ps_sel.fmt = 0; } else if (tex->m_palette) { ps_sel.fmt = cpsm.fmt | 4; ps_sel.ifmt = !tex->m_target ? 0 - : (context->TEX0.PSM == PSM_PSMT4HL) ? 2 - : (context->TEX0.PSM == PSM_PSMT4HH) ? 1 + : (m_context->TEX0.PSM == PSM_PSMT4HL) ? 2 + : (m_context->TEX0.PSM == PSM_PSMT4HH) ? 1 : 0; // In standard mode palette is only used when alpha channel of the RT is @@ -769,16 +766,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } else { ps_sel.fmt = cpsm.fmt; } - ps_sel.aem = env.TEXA.AEM; + ps_sel.aem = m_env.TEXA.AEM; - if (context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { + if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) ps_sel.tfx = TFX_DECAL; } else { - ps_sel.tfx = context->TEX0.TFX; + ps_sel.tfx = m_context->TEX0.TFX; } - ps_sel.tcc = context->TEX0.TCC; + ps_sel.tcc = m_context->TEX0.TCC; ps_sel.ltf = bilinear && !simple_sample; spritehack = tex->m_spritehack_t; @@ -789,8 +786,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour int w = tex->m_texture->GetWidth(); int h = tex->m_texture->GetHeight(); - int tw = (int)(1 << context->TEX0.TW); - int th = (int)(1 << context->TEX0.TH); + int tw = (int)(1 << m_context->TEX0.TW); + int th = (int)(1 << m_context->TEX0.TH); GSVector4 WH(tw, th, w, h); @@ -802,20 +799,20 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_cb.WH = WH; ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV); + ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); // TC Offset Hack ps_sel.tcoffsethack = !!UserHacks_TCOffset; ps_cb.TC_OffsetHack = GSVector4(UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy(); GSVector4 clamp(ps_cb.MskFix); - GSVector4 ta(env.TEXA & GSVector4i::x000000ff()); + GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); ps_cb.MinMax = clamp / WH.xyxy(); ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255)); - ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1; - ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1; + ps_ssel.tau = (m_context->CLAMP.WMS + 3) >> 1; + ps_ssel.tav = (m_context->CLAMP.WMT + 3) >> 1; ps_ssel.ltf = bilinear && simple_sample; // Setup Texture ressources @@ -875,7 +872,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // rs - GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); + GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * m_context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); GL_PUSH("IA"); SetupIA(); @@ -919,7 +916,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_POP(); } - if (env.COLCLAMP.CLAMP == 0 && om_bsel.accu) { + if (m_env.COLCLAMP.CLAMP == 0 && om_bsel.accu) { hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F); dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); @@ -929,7 +926,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->OMSetRenderTargets(rt, ds, &scissor); } - if (context->TEST.DoFirstPass()) + if (m_context->TEST.DoFirstPass()) { SendDraw(require_barrier); @@ -952,9 +949,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } - if (context->TEST.DoSecondPass()) + if (m_context->TEST.DoSecondPass()) { - ASSERT(!env.PABE.PABE); + ASSERT(!m_env.PABE.PABE); static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; @@ -971,7 +968,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool b = om_csel.wb; bool a = om_csel.wa; - switch(context->TEST.AFAIL) + switch(m_context->TEST.AFAIL) { case AFAIL_KEEP: z = r = g = b = a = false; break; // none case AFAIL_FB_ONLY: z = false; break; // rgba From 2901e94ebcd9cfc03ff1f32cf3163dfe6c674913 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 17:01:00 +0200 Subject: [PATCH 05/16] gsdx-ogl: always bind the RT as input texture To avoid code duplication --- plugins/GSdx/GSRendererOGL.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index cb6c0ed881..98ecfda50a 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -562,8 +562,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.blend_c = om_bsel.c; ps_sel.blend_d = om_bsel.d; - dev->PSSetShaderResource(3, rt); - // Require the fix alpha vlaue if (ALPHA.C == 2) { ps_cb.AlphaCoeff.a = afix; @@ -590,7 +588,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (DATE_GL45) { gl_TextureBarrier(); - dev->PSSetShaderResource(3, rt); } else if (DATE) { GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); @@ -602,7 +599,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Create an r32ui image that will containt primitive ID if (DATE_GL42) { dev->InitPrimDateTexture(rt); - dev->PSSetShaderResource(3, rt); } else { GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy(); GSVector4 dst = src * 2.0f - 1.0f; @@ -846,6 +842,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->PSSetShaderResource(1, NULL); #endif } + // Always bind the RT. This way special effect can use it. + dev->PSSetShaderResource(3, rt); + // GS From 25bd5f5e85b8378f4bbca810937c7fe0eb562c26 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 17:13:37 +0200 Subject: [PATCH 06/16] gsdx-ogl: request texture barrier to emulate accurate date Actually it can partially be done with GL_ARB_shader_image_load_store extension. However all drivers that support shader_image have texture barrier too. --- plugins/GSdx/GLLoader.cpp | 1 + plugins/GSdx/GSRendererOGL.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 388367c4b3..bb4ba95495 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -507,6 +507,7 @@ namespace GLLoader { if (!found_GL_ARB_texture_barrier) { fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. You can't emulate correctly the GS blending unit! Sorry!\n"); theApp.SetConfig("accurate_blending_unit", 0); + theApp.SetConfig("accurate_date", 0); } fprintf(stderr, "\n"); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 98ecfda50a..30fec7630e 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -441,15 +441,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending - if (DATE) { - if (GLLoader::found_GL_ARB_texture_barrier && (m_prim_overlap == PRIM_OVERLAP_NO)) { + if (DATE && GLLoader::found_GL_ARB_texture_barrier) { + if (m_prim_overlap == PRIM_OVERLAP_NO) { + require_barrier = true; DATE_GL45 = true; DATE = false; } else if (m_accurate_date && om_csel.wa && (!m_context->TEST.ATE || m_context->TEST.ATST == ATST_ALWAYS)) { // texture barrier will split the draw call into n draw call. It is very efficient for // few primitive draws. Otherwise it sucks. - if (GLLoader::found_GL_ARB_texture_barrier && (m_index.tail < 100)) { + if (m_index.tail < 100) { require_barrier = true; DATE_GL45 = true; DATE = false; @@ -586,9 +587,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // DATE (setup part) - if (DATE_GL45) { - gl_TextureBarrier(); - } else if (DATE) { + if (DATE) { GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize); // Reduce the quantity of clean function From 25298c70f77fec571a2e7be8e53190c2708e00c1 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 17:57:42 +0200 Subject: [PATCH 07/16] gsdx-ogl: move blending management into a separate function --- plugins/GSdx/GSRendererOGL.cpp | 238 +++++++++++++++++---------------- plugins/GSdx/GSRendererOGL.h | 1 + 2 files changed, 125 insertions(+), 114 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 30fec7630e..91f6b3c8e8 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -313,6 +313,128 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s return require_barrier; } +bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMBlendSelector& om_bsel, GSDeviceOGL::PSConstantBuffer& ps_cb, float afix, bool DATE_GL42) +{ + const GIFRegALPHA& ALPHA = m_context->ALPHA; + bool require_barrier = false; + + om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; + + om_bsel.a = ALPHA.A; + om_bsel.b = ALPHA.B; + om_bsel.c = ALPHA.C; + om_bsel.d = ALPHA.D; + + if (m_env.PABE.PABE) + { +#ifdef ENABLE_OGL_DEBUG + fprintf(stderr, "env PABE not supported\n"); + GL_INS("!!! ENV PABE not supported !!!"); +#endif + // FIXME it could be supported with SW blending! + if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) + { + // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader + // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result + om_bsel.abe = 0; + } + else + { + //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. + //ASSERT(0); + } + } + + // No blending so early exit + if (!om_bsel.abe) + return require_barrier; + + // Compute the blending equation to detect special case + int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; + int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; + // SW Blend is (nearly) free. Let's use it. + bool free_blend = (blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO); + // We really need SW blending for this one, barely used + bool impossible_blend = (blend_flag & A_MAX); + // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd + bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1); + + bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); + + // Color clip + bool acc_colclip_wrap = false; + if (m_env.COLCLAMP.CLAMP == 0) { + acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); + if (acc_colclip_wrap) { + ps_sel.colclip = 3; + GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); + } else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) { + // Standard (inaccurate) colclip + ps_sel.colclip = 1; + accumulation_blend = false; + GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); + } + } + + // Note: Option is duplicated, one impact the blend unit / the other the shader. + if (accumulation_blend && m_sw_blending) { + om_bsel.accu = ps_sel.blend_accu = 1; + sw_blending_base = true; + } + + bool all_blend_sw; + switch (m_sw_blending) { + case ACC_BLEND_ULTRA: all_blend_sw = true; break; + case ACC_BLEND_FULL: all_blend_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); break; + case ACC_BLEND_CCLIP: + case ACC_BLEND_SPRITE: all_blend_sw = m_vt.m_primclass == GS_SPRITE_CLASS; break; + default: all_blend_sw = false; break; + } + + bool sw_blending = sw_blending_base // Free case or Impossible blend + || all_blend_sw // all blend + || acc_colclip_wrap // accurate colclip + || ps_sel.fbmask; // accurate fbmask + + + // SW Blending + // GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive + // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive + // (therefore primitiveID will be constant to 1) + sw_blending &= !DATE_GL42; + + // For stat to optimize accurate option +#if 0 + GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)", + om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); +#endif + if (sw_blending) { + // select a shader that support blending + om_bsel.ps = 1; + ps_sel.blend_a = om_bsel.a; + ps_sel.blend_b = om_bsel.b; + ps_sel.blend_c = om_bsel.c; + ps_sel.blend_d = om_bsel.d; + + // Require the fix alpha vlaue + if (ALPHA.C == 2) { + ps_cb.AlphaCoeff.a = afix; + } + + // No need to flush for every primitive + require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu; + } else { + ps_sel.clr1 = om_bsel.IsCLR1(); + if (ps_sel.dfmt == 1 && ALPHA.C == 1) { + // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent + om_bsel.c = 2; + afix = 1.0f; + } + } + + return require_barrier; +} + GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() { // Either 1 triangle or 1 line or 3 POINTs @@ -464,120 +586,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour float afix = (float)m_context->ALPHA.FIX / 0x80; - if (!IsOpaque() && rt) - { - const GIFRegALPHA& ALPHA = m_context->ALPHA; - - om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - - om_bsel.a = ALPHA.A; - om_bsel.b = ALPHA.B; - om_bsel.c = ALPHA.C; - om_bsel.d = ALPHA.D; - - if (m_env.PABE.PABE) - { - // FIXME it could be supported with SW blending! - if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1) - { - // this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader - // cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result - om_bsel.abe = 0; - } - else - { - //Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though. - //ASSERT(0); -#ifdef ENABLE_OGL_DEBUG - fprintf(stderr, "env PABE not supported\n"); - GL_INS("!!! ENV PABE not supported !!!"); -#endif - } - } - - // Compute the blending equation to detect special case - int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; - int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; - // SW Blend is (nearly) free. Let's use it. - bool free_blend = (blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO); - // We really need SW blending for this one, barely used - bool impossible_blend = (blend_flag & A_MAX); - // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd - bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1); - - bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); - - // Color clip - bool acc_colclip_wrap = false; - if (m_env.COLCLAMP.CLAMP == 0) { - acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); - if (acc_colclip_wrap) { - ps_sel.colclip = 3; - GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); - } else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) { - // Standard (inaccurate) colclip - ps_sel.colclip = 1; - accumulation_blend = false; - GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); - } - } - - // Note: Option is duplicated, one impact the blend unit / the other the shader. - if (accumulation_blend && m_sw_blending) { - om_bsel.accu = ps_sel.blend_accu = 1; - sw_blending_base = true; - } - - bool all_blend_sw; - switch (m_sw_blending) { - case ACC_BLEND_ULTRA: all_blend_sw = true; break; - case ACC_BLEND_FULL: all_blend_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); break; - case ACC_BLEND_CCLIP: - case ACC_BLEND_SPRITE: all_blend_sw = m_vt.m_primclass == GS_SPRITE_CLASS; break; - default: all_blend_sw = false; break; - } - - bool sw_blending = sw_blending_base // Free case or Impossible blend - || all_blend_sw // all blend - || acc_colclip_wrap // accurate colclip - || ps_sel.fbmask; // accurate fbmask - - - // SW Blending - // GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive - // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive - // (therefore primitiveID will be constant to 1) - sw_blending &= !DATE_GL42; - - // For stat to optimize accurate option -#if 0 - if (om_bsel.abe) - GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)", - om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); -#endif - if (sw_blending && om_bsel.abe) { - // select a shader that support blending - om_bsel.ps = 1; - ps_sel.blend_a = om_bsel.a; - ps_sel.blend_b = om_bsel.b; - ps_sel.blend_c = om_bsel.c; - ps_sel.blend_d = om_bsel.d; - - // Require the fix alpha vlaue - if (ALPHA.C == 2) { - ps_cb.AlphaCoeff.a = afix; - } - - // No need to flush for every primitive - require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu; - } else { - ps_sel.clr1 = om_bsel.IsCLR1(); - if (ps_sel.dfmt == 1 && ALPHA.C == 1) { - // 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent - om_bsel.c = 2; - afix = 1.0f; - } - } + if (!IsOpaque() && rt) { + require_barrier |= EmulateBlending(ps_sel, om_bsel, ps_cb, afix, DATE_GL42); } if (ps_sel.dfmt == 1) { diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 66879bdfeb..c8ab0e24ee 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -60,6 +60,7 @@ class GSRendererOGL : public GSRendererHW void EmulateGS(); void SetupIA(); bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb); + bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMBlendSelector& om_bsel, GSDeviceOGL::PSConstantBuffer& ps_cb, float afix, bool DATE_GL42); public: GSRendererOGL(); From 83f874db93055b3ed0cd57170603bd5ba9fc20ba Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 30 Jul 2015 19:11:38 +0200 Subject: [PATCH 08/16] gsdx-ogl: remove bsel.ps Just clear bsel.abe to disable blending --- plugins/GSdx/GSDeviceOGL.h | 6 ++---- plugins/GSdx/GSRendererOGL.cpp | 4 ++-- plugins/GSdx/GSTextureFXOGL.cpp | 9 --------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 71fb0cc771..a5d9d3805b 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -452,9 +452,8 @@ class GSDeviceOGL : public GSDevice uint32 d:2; uint32 negative:1; uint32 accu:1; - uint32 ps:1; - uint32 _free:20; + uint32 _free:21; }; struct @@ -463,9 +462,8 @@ class GSDeviceOGL : public GSDevice uint32 abcd:8; uint32 _negative:1; uint32 _accu:1; - uint32 _ps:1; - uint32 _free2:20; + uint32 _free2:21; }; uint32 key; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 91f6b3c8e8..0ca0cadcd6 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -409,8 +409,8 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); #endif if (sw_blending) { - // select a shader that support blending - om_bsel.ps = 1; + // Disable HW blending except in accu mode + om_bsel.abe = ps_sel.blend_accu; ps_sel.blend_a = om_bsel.a; ps_sel.blend_b = om_bsel.b; ps_sel.blend_c = om_bsel.c; diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 2715e0d431..9a9e1f688e 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -197,15 +197,6 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, fl OMSetDepthStencilState(dss, 1); - if (bsel.ps && !bsel.accu) { - if (GLState::blend) { - GLState::blend = false; - glDisable(GL_BLEND); - } - // No hardware blending thank - return; - } - // ************************************************************* // Static // ************************************************************* From 93c47feb7c5a8de251e40ae86cce46d11a69acfb Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 30 Jul 2015 22:05:08 +0200 Subject: [PATCH 09/16] gsdx-ogl: replace old colclip algo with the HDR algo Similar speed but more accurate Allow to clean the code --- plugins/GSdx/GSDeviceOGL.cpp | 6 ++--- plugins/GSdx/GSDeviceOGL.h | 6 ++--- plugins/GSdx/GSRendererOGL.cpp | 44 ++++++++++++++++++------------- plugins/GSdx/GSTextureFXOGL.cpp | 5 +--- plugins/GSdx/res/glsl/tfx_fs.glsl | 7 ++--- plugins/GSdx/res/glsl_source.h | 7 ++--- 6 files changed, 36 insertions(+), 39 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index b70a23771f..c8f74fb521 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -667,7 +667,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_SHUFFLE %d\n", sel.shuffle) + format("#define PS_READ_BA %d\n", sel.read_ba) + format("#define PS_FBMASK %d\n", sel.fbmask) - + format("#define PS_BLEND_ACCU %d\n", sel.blend_accu) + + format("#define PS_HDR %d\n", sel.hdr) ; return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); @@ -1551,13 +1551,13 @@ const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] = { 17 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) { 18 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F { NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) - { 19 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd + { BLEND_ACCU | 19 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { NO_BAR | 20 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As { A_MAX | 8 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { 21 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { 22 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad { NO_BAR| A_MAX | 9 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - { 23 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd + { BLEND_ACCU | 23 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd { NO_BAR | 24 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F { 25 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) { A_MAX | 10 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index a5d9d3805b..2db02ed4d5 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -33,6 +33,7 @@ #define A_MAX (0x100) // Impossible blending uses coeff bigger than 1 #define C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) #define NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) +#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds #ifdef ENABLE_OGL_DEBUG_MEM_BW extern uint64 g_real_texture_upload_byte; @@ -350,8 +351,8 @@ class GSDeviceOGL : public GSDevice uint32 blend_b:2; uint32 blend_c:2; uint32 blend_d:2; - uint32 blend_accu:1; uint32 dfmt:2; + uint32 hdr:1; uint32 _free2:21; }; @@ -451,9 +452,8 @@ class GSDeviceOGL : public GSDevice uint32 c:2; uint32 d:2; uint32 negative:1; - uint32 accu:1; - uint32 _free:21; + uint32 _free:22; }; struct diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0ca0cadcd6..b50c4783be 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -357,30 +357,26 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL // We really need SW blending for this one, barely used bool impossible_blend = (blend_flag & A_MAX); // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd - bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1); + bool accumulation_blend = (blend_flag & BLEND_ACCU); - bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu); + bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend); // Color clip - bool acc_colclip_wrap = false; if (m_env.COLCLAMP.CLAMP == 0) { - acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base); - if (acc_colclip_wrap) { + if (accumulation_blend) { + ps_sel.hdr = 1; + GL_INS("COLCLIP Fast HDR mode ENABLED"); + } else if (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base) { ps_sel.colclip = 3; + sw_blending_base = true; GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); - } else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) { - // Standard (inaccurate) colclip - ps_sel.colclip = 1; - accumulation_blend = false; - GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); + } else { + fprintf(stderr, "Sorry colclip isn't supported\n"); } } // Note: Option is duplicated, one impact the blend unit / the other the shader. - if (accumulation_blend && m_sw_blending) { - om_bsel.accu = ps_sel.blend_accu = 1; - sw_blending_base = true; - } + sw_blending_base |= accumulation_blend; bool all_blend_sw; switch (m_sw_blending) { @@ -393,7 +389,6 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL bool sw_blending = sw_blending_base // Free case or Impossible blend || all_blend_sw // all blend - || acc_colclip_wrap // accurate colclip || ps_sel.fbmask; // accurate fbmask @@ -402,6 +397,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL // that write the bad alpha value. Sw blending will force the draw to run primitive by primitive // (therefore primitiveID will be constant to 1) sw_blending &= !DATE_GL42; + // Seriously don't expect me to support this kind of crazyness. + // No mix of COLCLIP + accumulation_blend + DATE GL42 + ASSERT(!(ps_sel.hdr && DATE_GL42)); // For stat to optimize accurate option #if 0 @@ -409,20 +407,28 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending); #endif if (sw_blending) { - // Disable HW blending except in accu mode - om_bsel.abe = ps_sel.blend_accu; ps_sel.blend_a = om_bsel.a; ps_sel.blend_b = om_bsel.b; ps_sel.blend_c = om_bsel.c; ps_sel.blend_d = om_bsel.d; + if (accumulation_blend) { + // Keep HW blending to do the addition + om_bsel.abe = 1; + // Remove the addition from the SW blending + ps_sel.blend_d = 2; + } else { + // Disable HW blending + om_bsel.abe = 0; + } + // Require the fix alpha vlaue if (ALPHA.C == 2) { ps_cb.AlphaCoeff.a = afix; } // No need to flush for every primitive - require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu; + require_barrier |= !(blend_flag & NO_BAR) && !accumulation_blend; } else { ps_sel.clr1 = om_bsel.IsCLR1(); if (ps_sel.dfmt == 1 && ALPHA.C == 1) { @@ -924,7 +930,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_POP(); } - if (m_env.COLCLAMP.CLAMP == 0 && om_bsel.accu) { + if (ps_sel.hdr) { hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F); dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false); diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 9a9e1f688e..5fe60ae286 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -105,10 +105,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) { int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; - if (bsel.accu) - bs->SetRGB(GL_FUNC_ADD, GL_ONE, GL_ONE); - else - bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); + bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); if (m_blendMapD3D9[i].bogus & A_MAX) { if (!theApp.GetConfig("accurate_blending_unit", 1)) { diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 4dc1dcc174..fd3f075f52 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -453,9 +453,6 @@ void ps_blend(inout vec4 Color, float As) #if PS_BLEND_A == PS_BLEND_B Color.rgb = D; -#elif PS_BLEND_ACCU == 1 - // The D addition will be done in the blending unit - Color.rgb = trunc(A * C); #else Color.rgb = trunc((A - B) * C + D); #endif @@ -612,8 +609,8 @@ void ps_main() ps_fbmask(C); -#if PS_BLEND_ACCU && PS_COLCLIP - // Use negative value to avoid overflow of the texture +#if PS_HDR == 1 + // Use negative value to avoid overflow of the texture (in accumulation mode) if (any(greaterThan(C.rgb, vec3(128.0f)))) { C.rgb = (C.rgb - 256.0f); } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 5b761a2471..8619fd259d 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1312,9 +1312,6 @@ static const char* tfx_fs_all_glsl = "\n" "#if PS_BLEND_A == PS_BLEND_B\n" " Color.rgb = D;\n" - "#elif PS_BLEND_ACCU == 1\n" - " // The D addition will be done in the blending unit\n" - " Color.rgb = trunc(A * C);\n" "#else\n" " Color.rgb = trunc((A - B) * C + D);\n" "#endif\n" @@ -1471,8 +1468,8 @@ static const char* tfx_fs_all_glsl = "\n" " ps_fbmask(C);\n" "\n" - "#if PS_BLEND_ACCU && PS_COLCLIP\n" - " // Use negative value to avoid overflow of the texture\n" + "#if PS_HDR == 1\n" + " // Use negative value to avoid overflow of the texture (in accumulation mode)\n" " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" " C.rgb = (C.rgb - 256.0f);\n" " }\n" From cfd0fd6cc81ea6b3684e897b7e65547b7ae9f189 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 30 Jul 2015 22:11:32 +0200 Subject: [PATCH 10/16] gsdx-ogl: remove old colclip algo --- plugins/GSdx/GSDeviceOGL.cpp | 14 +----------- plugins/GSdx/GSDeviceOGL.h | 19 ++++------------ plugins/GSdx/GSRendererOGL.cpp | 37 +------------------------------ plugins/GSdx/GSTextureFXOGL.cpp | 3 --- plugins/GSdx/res/glsl/tfx_fs.glsl | 17 ++------------ plugins/GSdx/res/glsl_source.h | 20 +++++------------ 6 files changed, 13 insertions(+), 97 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index c8f74fb521..92f7c1839e 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -696,7 +696,7 @@ void GSDeviceOGL::SelfShaderTest() int perf = 0; int all = 0; // Test: SW blending - for (int colclip = 0; colclip < 4; colclip += 3) { + for (int colclip = 0; colclip < 2; colclip++) { for (int fmt = 0; fmt < 3; fmt++) { for (int i = 0; i < 3; i++) { PSSelector sel; @@ -787,18 +787,6 @@ void GSDeviceOGL::SelfShaderTest() } PRINT_TEST("Fst/Tc/IIp"); - // Test: Colclip - for (int colclip = 0; colclip < 3; colclip += 1) { - PSSelector sel; - sel.tfx = 4; - sel.atst = 1; - - sel.colclip = colclip; - std::string file = format("Shader_Colclip_%d.glsl.asm", colclip); - RUN_TEST; - } - PRINT_TEST("Colclip"); - // Test: tfx/tcc for (int tfx = 0; tfx < 5; tfx++) { for (int tcc = 0; tcc < 2; tcc++) { diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 2db02ed4d5..adc87a0dc9 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -66,14 +66,6 @@ public: if (IsConstant(src) || IsConstant(dst)) m_constant_factor = true; } - void RevertOp() - { - if(m_equation_RGB == GL_FUNC_ADD) - m_equation_RGB = GL_FUNC_REVERSE_SUBTRACT; - else if(m_equation_RGB == GL_FUNC_REVERSE_SUBTRACT) - m_equation_RGB = GL_FUNC_ADD; - } - void EnableBlend() { m_enable = true;} bool IsConstant(GLenum factor) { return ((factor == GL_CONSTANT_COLOR) || (factor == GL_ONE_MINUS_CONSTANT_COLOR)); } @@ -332,7 +324,7 @@ class GSDeviceOGL : public GSDevice uint32 tcoffsethack:1; //uint32 point_sampler:1; Not tested, so keep the bit for blend uint32 iip:1; - uint32 colclip:2; + uint32 colclip:1; uint32 atst:3; uint32 tfx:3; uint32 tcc:1; @@ -344,7 +336,7 @@ class GSDeviceOGL : public GSDevice uint32 read_ba:1; uint32 fbmask:1; - //uint32 _free1:0; + uint32 _free1:1; // Word 2 uint32 blend_a:2; @@ -451,19 +443,16 @@ class GSDeviceOGL : public GSDevice uint32 b:2; uint32 c:2; uint32 d:2; - uint32 negative:1; - uint32 _free:22; + uint32 _free:23; }; struct { uint32 _abe:1; uint32 abcd:8; - uint32 _negative:1; - uint32 _accu:1; - uint32 _free2:21; + uint32 _free2:23; }; uint32 key; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index b50c4783be..51fba0b82e 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -367,7 +367,7 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL ps_sel.hdr = 1; GL_INS("COLCLIP Fast HDR mode ENABLED"); } else if (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base) { - ps_sel.colclip = 3; + ps_sel.colclip = 1; sw_blending_base = true; GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } else { @@ -943,24 +943,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour if (m_context->TEST.DoFirstPass()) { SendDraw(require_barrier); - - if (ps_sel.colclip == 1) - { - ASSERT(!om_bsel.ps); - GL_PUSH("COLCLIP"); - GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); - GSDeviceOGL::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg); - - SendDraw(false); - dev->SetupOM(om_dssel, om_bsel, afix); - GL_POP(); - } } if (m_context->TEST.DoSecondPass()) @@ -1003,23 +985,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupOM(om_dssel, om_bsel, afix); SendDraw(require_barrier); - - if (ps_sel.colclip == 1) - { - ASSERT(!om_bsel.ps); - GL_PUSH("COLCLIP"); - GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel); - GSDeviceOGL::PSSelector ps_selneg(ps_sel); - - om_bselneg.negative = 1; - ps_selneg.colclip = 2; - - dev->SetupOM(om_dssel, om_bselneg, afix); - dev->SetupPS(ps_selneg); - - SendDraw(false); - GL_POP(); - } } } diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 5fe60ae286..c0bdcbc66d 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -123,9 +123,6 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) } else { bs->EnableBlend(); } - - // Not very good but I don't wanna write another 81 row table - if(bsel.negative) bs->RevertOp(); } return bs; diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index fd3f075f52..4c6f4bfcec 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -352,17 +352,6 @@ void atst(vec4 C) #endif } -void colclip(inout vec4 C) -{ -#if (PS_COLCLIP == 2) - C.rgb = 256.0f - C.rgb; -#endif -#if (PS_COLCLIP == 1 || PS_COLCLIP == 2) - bvec3 factor = lessThan(C.rgb, vec3(128.0f)); - C.rgb *= vec3(factor); -#endif -} - void fog(inout vec4 C, float f) { #if PS_FOG != 0 @@ -384,8 +373,6 @@ vec4 ps_color() fog(C, PSin_t.z); - colclip(C); - #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes C.rgb = vec3(255.0f); #endif @@ -460,7 +447,7 @@ void ps_blend(inout vec4 Color, float As) // FIXME dithering // Correct the Color value based on the output format -#if PS_COLCLIP != 3 +#if PS_COLCLIP == 0 // Standard Clamp Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f)); #endif @@ -475,7 +462,7 @@ void ps_blend(inout vec4 Color, float As) // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8)); -#elif PS_COLCLIP == 3 +#elif PS_COLCLIP == 1 Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF)); #endif diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 8619fd259d..c1d638e4ea 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -535,6 +535,9 @@ static const char* shadeboost_glsl = "** Contrast, saturation, brightness\n" "** Code of this function is from TGM's shader pack\n" "** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057\n" + "** TGM's author comment about the license (included in the previous link)\n" + "** \"do with it, what you want! its total free!\n" + "** (but would be nice, if you say that you used my shaders :wink: ) but not necessary\"\n" "*/\n" "\n" "struct vertex_basic\n" @@ -1211,17 +1214,6 @@ static const char* tfx_fs_all_glsl = "#endif\n" "}\n" "\n" - "void colclip(inout vec4 C)\n" - "{\n" - "#if (PS_COLCLIP == 2)\n" - " C.rgb = 256.0f - C.rgb;\n" - "#endif\n" - "#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n" - " bvec3 factor = lessThan(C.rgb, vec3(128.0f));\n" - " C.rgb *= vec3(factor);\n" - "#endif\n" - "}\n" - "\n" "void fog(inout vec4 C, float f)\n" "{\n" "#if PS_FOG != 0\n" @@ -1243,8 +1235,6 @@ static const char* tfx_fs_all_glsl = "\n" " fog(C, PSin_t.z);\n" "\n" - " colclip(C);\n" - "\n" "#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" " C.rgb = vec3(255.0f);\n" "#endif\n" @@ -1319,7 +1309,7 @@ static const char* tfx_fs_all_glsl = " // FIXME dithering\n" "\n" " // Correct the Color value based on the output format\n" - "#if PS_COLCLIP != 3\n" + "#if PS_COLCLIP == 0\n" " // Standard Clamp\n" " Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n" "#endif\n" @@ -1334,7 +1324,7 @@ static const char* tfx_fs_all_glsl = " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" "\n" " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n" - "#elif PS_COLCLIP == 3\n" + "#elif PS_COLCLIP == 1\n" " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n" "#endif\n" "\n" From 8554f32086539a41273b699abe3c7fd320ad50e2 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 31 Jul 2015 09:09:44 +0200 Subject: [PATCH 11/16] gsdx-ogl: clean the blend table Remove old shader define Prefix macro with BLEND_ Add some notes to explain the special symbol --- plugins/GSdx/GSDeviceOGL.cpp | 168 +++++++++++++++++--------------- plugins/GSdx/GSDeviceOGL.h | 8 +- plugins/GSdx/GSRendererOGL.cpp | 6 +- plugins/GSdx/GSTextureFXOGL.cpp | 2 +- 4 files changed, 95 insertions(+), 89 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 92f7c1839e..571700e47d 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1502,6 +1502,11 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, // 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha // 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor +// Special blending method table: +// # (tricky) => 1 * Cd + Cd * F => Use (Cd, F) as factor of color (1, Cd) +// * (bogus) => C * (1 + F ) + ... => factor is always bigger than 1 (except above case) +// ? => Cs * F + Cd => do the multiplication in shader and addition in blending unit. It is an optimization + // Copy Dx blend table and convert it to ogl #define D3DBLENDOP_ADD GL_FUNC_ADD #define D3DBLENDOP_SUBTRACT GL_FUNC_SUBTRACT @@ -1518,87 +1523,88 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, #define D3DBLEND_SRCALPHA GL_SRC1_ALPHA #define D3DBLEND_INVSRCALPHA GL_ONE_MINUS_SRC1_ALPHA + const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] = { - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 - { A_MAX | 4 , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As - { 13 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_INVSRCALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) - { 14 , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As - { A_MAX | 5 , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad - { 15 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_INVDESTALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) - { 16 , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad - { A_MAX | 6 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F - { 17 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) - { 18 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F - { NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) - { BLEND_ACCU | 19 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd - { NO_BAR | 20 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As - { A_MAX | 8 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) - { 21 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd - { 22 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad - { NO_BAR| A_MAX | 9 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - { BLEND_ACCU | 23 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd - { NO_BAR | 24 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F - { 25 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) - { A_MAX | 10 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As - { 26 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As - { 27 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_DESTALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) - { A_MAX | 11 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad - { 28 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad - { 29 , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) - { A_MAX | 12 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F - { 30 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 - { 31 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As - { C_CLR | 55 , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_SRCALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background - { 32 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As - { 33 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad - { C_CLR | 56 , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_DESTALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) - { 34 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_DESTALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad - { 35 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F - { C_CLR | 57 , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_BLENDFACTOR} , //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) - { 36 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_BLENDFACTOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F - { NO_BAR | 37 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) - { 38 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As - { NO_BAR | 39 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As - { 40 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) - { 41 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad - { 42 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad - { NO_BAR | 43 , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) - { 44 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F - { NO_BAR | 45 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F - { 46 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As - { 47 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVSRCALPHA} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) - { 48 , D3DBLENDOP_SUBTRACT , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As - { 49 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad - { 50 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVDESTALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) - { 51 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad - { 52 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F - { 53 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVBLENDFACTOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) - { 54 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0 - { NO_BAR | 1 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs - { 2 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd - { NO_BAR | 3 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0000: (Cs - Cs)*As + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0001: (Cs - Cs)*As + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0002: (Cs - Cs)*As + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0010: (Cs - Cs)*Ad + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0011: (Cs - Cs)*Ad + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0012: (Cs - Cs)*Ad + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As + { 0 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_INVSRCALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad + { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_INVDESTALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F + { 0 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F + { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) + { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0201: (Cs - 0)*As + Cd ==> Cs*As + Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As + { BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) + { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd + { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad + { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) + { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0221: (Cs - 0)*F + Cd ==> Cs*F + Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F + { 0 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As + { 0 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_DESTALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad + { 0 , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1102: (Cd - Cd)*As + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1110: (Cd - Cd)*Ad + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1111: (Cd - Cd)*Ad + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1112: (Cd - Cd)*Ad + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1120: (Cd - Cd)*F + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1121: (Cd - Cd)*F + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 1122: (Cd - Cd)*F + 0 ==> 0 + { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As + { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_SRCALPHA} , //#1201: (Cd - 0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 1202: (Cd - 0)*As + 0 ==> Cd*As + { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad + { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_DESTALPHA} , //#1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad) + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_DESTALPHA} , // 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad + { 0 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F + { BLEND_C_CLR , D3DBLENDOP_ADD , D3DBLEND_DESTCOLOR , D3DBLEND_BLENDFACTOR} , //#1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_BLENDFACTOR} , // 1222: (Cd - 0)*F + 0 ==> Cd*F + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_ZERO} , // 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , // 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As + { BLEND_NO_BAR , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As + { 0 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_ZERO} , // 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad) + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_ZERO} , // 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) + { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , // 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F + { BLEND_NO_BAR , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , // 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVSRCALPHA} , // 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ZERO , D3DBLEND_SRCALPHA} , // 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVDESTALPHA} , // 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , // 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_INVBLENDFACTOR} , // 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) + { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , // 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2200: (0 - 0)*As + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2201: (0 - 0)*As + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2202: (0 - 0)*As + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2210: (0 - 0)*Ad + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2211: (0 - 0)*Ad + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2212: (0 - 0)*Ad + 0 ==> 0 + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 2220: (0 - 0)*F + Cs ==> Cs + { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 2221: (0 - 0)*F + Cd ==> Cd + { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 2222: (0 - 0)*F + 0 ==> 0 }; diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index adc87a0dc9..6a346c9d13 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -30,10 +30,10 @@ #include "GLState.h" // A couple of flag to determine the blending behavior -#define A_MAX (0x100) // Impossible blending uses coeff bigger than 1 -#define C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) -#define NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) -#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds +#define BLEND_A_MAX (0x100) // Impossible blending uses coeff bigger than 1 +#define BLEND_C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) +#define BLEND_NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) +#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds #ifdef ENABLE_OGL_DEBUG_MEM_BW extern uint64 g_real_texture_upload_byte; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 51fba0b82e..d08a86e6ff 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -353,9 +353,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; // SW Blend is (nearly) free. Let's use it. - bool free_blend = (blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO); + bool free_blend = (blend_flag & BLEND_NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO); // We really need SW blending for this one, barely used - bool impossible_blend = (blend_flag & A_MAX); + bool impossible_blend = (blend_flag & BLEND_A_MAX); // Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd bool accumulation_blend = (blend_flag & BLEND_ACCU); @@ -428,7 +428,7 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL } // No need to flush for every primitive - require_barrier |= !(blend_flag & NO_BAR) && !accumulation_blend; + require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend; } else { ps_sel.clr1 = om_bsel.IsCLR1(); if (ps_sel.dfmt == 1 && ALPHA.C == 1) { diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index c0bdcbc66d..67716f1bc0 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -107,7 +107,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); - if (m_blendMapD3D9[i].bogus & A_MAX) { + if (m_blendMapD3D9[i].bogus & BLEND_A_MAX) { if (!theApp.GetConfig("accurate_blending_unit", 1)) { bs->EnableBlend(); if (bsel.a == 0) From 97b38d9e1bf0a26bff41766d0206a5498393698e Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 31 Jul 2015 09:24:10 +0200 Subject: [PATCH 12/16] gsdx-ogl: directly set impossible mode in the blending table Avoid to hack it in the creation Allow in the future to reuse the table directly instead of converting in a blend object --- plugins/GSdx/GSDeviceOGL.cpp | 18 +++++++++--------- plugins/GSdx/GSDeviceOGL.h | 6 +++--- plugins/GSdx/GSTextureFXOGL.cpp | 18 +----------------- 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 571700e47d..a7d178d121 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1535,32 +1535,32 @@ const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] = { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 0020: (Cs - Cs)*F + Cs ==> Cs { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 0021: (Cs - Cs)*F + Cd ==> Cd { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ZERO} , // 0022: (Cs - Cs)*F + 0 ==> 0 - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_SRCALPHA} , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As { 0 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_INVSRCALPHA} , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_DESTALPHA} , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_INVDESTALPHA} , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad) { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad - { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F + { BLEND_A_MAX , D3DBLENDOP_SUBTRACT , D3DBLEND_ONE , D3DBLEND_BLENDFACTOR} , //*0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F { 0 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) { 0 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F - { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) + { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0201: (Cs - 0)*As + Cd ==> Cs*As + Cd { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As - { BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) + { BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1) { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd { 0 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad - { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) + { BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) { BLEND_ACCU , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , //?0221: (Cs - 0)*F + Cd ==> Cs*F + Cd { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F { 0 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , // 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As { 0 , D3DBLENDOP_ADD , D3DBLEND_INVDESTALPHA , D3DBLEND_DESTALPHA} , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA , D3DBLEND_DESTALPHA} , // 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad { 0 , D3DBLENDOP_ADD , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) - { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F + { BLEND_A_MAX , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F { 0 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F { BLEND_NO_BAR , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ZERO} , // 1100: (Cd - Cd)*As + Cs ==> Cs { 0 , D3DBLENDOP_ADD , D3DBLEND_ZERO , D3DBLEND_ONE} , // 1101: (Cd - Cd)*As + Cd ==> Cd diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 6a346c9d13..f1efcfbc7a 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -30,9 +30,9 @@ #include "GLState.h" // A couple of flag to determine the blending behavior -#define BLEND_A_MAX (0x100) // Impossible blending uses coeff bigger than 1 -#define BLEND_C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) -#define BLEND_NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) +#define BLEND_A_MAX (0x100) // Impossible blending uses coeff bigger than 1 +#define BLEND_C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor) +#define BLEND_NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used) #define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds #ifdef ENABLE_OGL_DEBUG_MEM_BW diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 67716f1bc0..c5f39cb649 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -106,23 +106,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d; bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst); - - if (m_blendMapD3D9[i].bogus & BLEND_A_MAX) { - if (!theApp.GetConfig("accurate_blending_unit", 1)) { - bs->EnableBlend(); - if (bsel.a == 0) - bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst); - else - bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE); - } - - const string afixstr = format("%f", afix); - const char *col[3] = {"Cs", "Cd", "0"}; - const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; - fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); - } else { - bs->EnableBlend(); - } + bs->EnableBlend(); } return bs; From a0edcb58af8c725ef778ac343c9c72ebb8040217 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 25 Jul 2015 18:20:00 +0200 Subject: [PATCH 13/16] gsdx-ogl: extend cclip blending level with destination alpha blending The purpose is to emulate correctly destination alpha factor An alpha channel of 128 is 1.0 in the GS but only ~0.5 in the GPU I think few draw call use destination alpha so impact on perf must remains small. --- plugins/GSdx/GSRendererOGL.cpp | 19 +++++++++++-------- plugins/GSdx/GSRendererOGL.h | 2 +- plugins/GSdx/GSSetting.cpp | 6 +++--- plugins/GSdx/GSdx.cpp | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index d08a86e6ff..9a2d49ba79 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -366,7 +366,7 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL if (accumulation_blend) { ps_sel.hdr = 1; GL_INS("COLCLIP Fast HDR mode ENABLED"); - } else if (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base) { + } else if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA || sw_blending_base) { ps_sel.colclip = 1; sw_blending_base = true; GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); @@ -378,17 +378,18 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL // Note: Option is duplicated, one impact the blend unit / the other the shader. sw_blending_base |= accumulation_blend; - bool all_blend_sw; + // Warning no break on purpose + bool sw_blending_adv = false; switch (m_sw_blending) { - case ACC_BLEND_ULTRA: all_blend_sw = true; break; - case ACC_BLEND_FULL: all_blend_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); break; - case ACC_BLEND_CCLIP: - case ACC_BLEND_SPRITE: all_blend_sw = m_vt.m_primclass == GS_SPRITE_CLASS; break; - default: all_blend_sw = false; break; + case ACC_BLEND_ULTRA: sw_blending_adv |= true; + case ACC_BLEND_FULL: sw_blending_adv |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); + case ACC_BLEND_CCLIP_DALPHA: sw_blending_adv |= (ALPHA.C == 1); + case ACC_BLEND_SPRITE: sw_blending_adv |= m_vt.m_primclass == GS_SPRITE_CLASS; + default: break; } bool sw_blending = sw_blending_base // Free case or Impossible blend - || all_blend_sw // all blend + || sw_blending_adv // complex blending case (for special effect) || ps_sel.fbmask; // accurate fbmask @@ -399,7 +400,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL sw_blending &= !DATE_GL42; // Seriously don't expect me to support this kind of crazyness. // No mix of COLCLIP + accumulation_blend + DATE GL42 + // Neither fbmask and GL42 ASSERT(!(ps_sel.hdr && DATE_GL42)); + ASSERT(!(ps_sel.fbmask && DATE_GL42)); // For stat to optimize accurate option #if 0 diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index c8ab0e24ee..5368febe5a 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -39,7 +39,7 @@ class GSRendererOGL : public GSRendererHW ACC_BLEND_NONE = 0, ACC_BLEND_FREE = 1, ACC_BLEND_SPRITE = 2, - ACC_BLEND_CCLIP = 3, + ACC_BLEND_CCLIP_DALPHA = 3, ACC_BLEND_FULL = 4, ACC_BLEND_ULTRA = 5 }; diff --git a/plugins/GSdx/GSSetting.cpp b/plugins/GSdx/GSSetting.cpp index 139b167325..c191e855ea 100644 --- a/plugins/GSdx/GSSetting.cpp +++ b/plugins/GSdx/GSSetting.cpp @@ -123,11 +123,11 @@ const char* dialog_message(int ID, bool* updateText) { "------------------------------------------------------------------\n" "Basic\t: Emulate correctly most of the effects with a limited speed penality. It is the recommended setting.\n" "------------------------------------------------------------------\n" - "Medium\t: Add emulation of all sprites. Performance impact remains reasonable in 3D game.\n" + "Medium\t: Extend it to all sprites. Performance impact remains reasonable in 3D game.\n" "------------------------------------------------------------------\n" - "High\t: Add full emulation of color wrapping. It helps Castlevania games. Be aware that it will half your FPS.\n" + "High\t: Extend it to destination alpha blending and color wrapping. (help shadow and fog effect). A good CPU is required\n" "------------------------------------------------------------------\n" - "Full\t\t: Except few cases, the blending unit will be fully emulated by the shader. It is very slow! It is intended for debug\n" + "Full\t\t: Except few cases, the blending unit will be fully emulated by the shader. It is ultra slow! It is intended for debug\n" "------------------------------------------------------------------\n" "Ultra\t: The blending unit will be completely emulated by the shader. It is ultra slow! It is intended for debug\n"; #endif diff --git a/plugins/GSdx/GSdx.cpp b/plugins/GSdx/GSdx.cpp index 97e23475ee..19787cde2e 100644 --- a/plugins/GSdx/GSdx.cpp +++ b/plugins/GSdx/GSdx.cpp @@ -187,9 +187,9 @@ GSdxApp::GSdxApp() m_gs_crc_level.push_back(GSSetting(4 , "Aggressive", "")); m_gs_acc_blend_level.push_back(GSSetting(0, "None", "Fastest")); - m_gs_acc_blend_level.push_back(GSSetting(1, "Basic", "Recommended")); + m_gs_acc_blend_level.push_back(GSSetting(1, "Basic", "Recommended low-end PC")); m_gs_acc_blend_level.push_back(GSSetting(2, "Medium", "")); - m_gs_acc_blend_level.push_back(GSSetting(3, "High", "Slow")); + m_gs_acc_blend_level.push_back(GSSetting(3, "High", "Recommended high-end PC")); m_gs_acc_blend_level.push_back(GSSetting(4, "Full", "Very Slow")); m_gs_acc_blend_level.push_back(GSSetting(5, "Ultra", "Ultra Slow")); From fff59f547da6434cd943132ac207c6932cf2b57f Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 31 Jul 2015 19:43:06 +0200 Subject: [PATCH 14/16] gsdx-ogl: fbmask regression! don't use bit operation with integer --- plugins/GSdx/GSRendererOGL.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 9a2d49ba79..10e6adabfd 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -263,14 +263,15 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s ps_sel.fbmask = 1; } - ps_sel.fbmask &= m_sw_blending; - if (ps_sel.fbmask) { + if (ps_sel.fbmask && m_sw_blending) { GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); ps_cb.FbMask.r = rg_mask; ps_cb.FbMask.g = rg_mask; ps_cb.FbMask.b = ba_mask; ps_cb.FbMask.a = ba_mask; require_barrier = true; + } else { + ps_sel.fbmask = 0; } } else { @@ -297,8 +298,7 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s ps_sel.fbmask = 1; } - ps_sel.fbmask &= m_sw_blending; - if (ps_sel.fbmask) { + if (ps_sel.fbmask && m_sw_blending) { GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", m_context->FRAME.FBMSK, (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 2) ? 16 : 32); ps_cb.FbMask.r = r_mask; @@ -306,6 +306,8 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s ps_cb.FbMask.b = b_mask; ps_cb.FbMask.a = a_mask; require_barrier = true; + } else { + ps_sel.fbmask = 0; } } } From eb0fa8c7dc6d2d109719a50b6909a0628072e356 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 1 Aug 2015 01:27:22 +0200 Subject: [PATCH 15/16] gsdx-ogl: fix bad detection of overlapping avoid rendering corruption with SW blending --- plugins/GSdx/GSRendererOGL.cpp | 44 ++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 10e6adabfd..3d3ec34455 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -460,20 +460,44 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; - for(size_t i = 0; i < count; i += 2) { - // Very bad code - GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y); - for (size_t j = i+2; j < count; j += 2) { - GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y); - GSVector4i inter = vi.rintersect(vj); - if (!inter.rempty()) { - //fprintf(stderr, "Overlap found between %d and %d (draw of %d vertices)\n", i, j, count); - return PRIM_OVERLAP_YES; + // You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left + // There is a high probability that the draw call will uses same ordering for all vertices. + // In order to keep a small performance impact only the first sprite will be checked + // + // Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact + if (v[1].XYZ.Y < v[0].XYZ.Y) { + // First vertex is Top-Left + for(size_t i = 0; i < count; i += 2) { + if (v[i+1].XYZ.Y > v[i].XYZ.Y) { + return PRIM_OVERLAP_UNKNOW; + } + GSVector4i vi(v[i].XYZ.X, v[i+1].XYZ.Y, v[i+1].XYZ.X, v[i].XYZ.Y); + for (size_t j = i+2; j < count; j += 2) { + GSVector4i vj(v[j].XYZ.X, v[j+1].XYZ.Y, v[j+1].XYZ.X, v[j].XYZ.Y); + GSVector4i inter = vi.rintersect(vj); + if (!inter.rempty()) { + return PRIM_OVERLAP_YES; + } + } + } + } else { + // First vertex is Bottom-Left + for(size_t i = 0; i < count; i += 2) { + if (v[i+1].XYZ.Y < v[i].XYZ.Y) { + return PRIM_OVERLAP_UNKNOW; + } + GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y); + for (size_t j = i+2; j < count; j += 2) { + GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y); + GSVector4i inter = vi.rintersect(vj); + if (!inter.rempty()) { + return PRIM_OVERLAP_YES; + } } } } - //fprintf(stderr, "Yes, code can be optimized (draw of %d vertices)\n", count); + //fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count); return PRIM_OVERLAP_NO; } From 4a3c145c7244ac10c7f60313a449da83c8810d58 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 31 Jul 2015 23:23:17 +0200 Subject: [PATCH 16/16] gsdx-ogl: depth support: better support of 16 bits z buffer Fix issue in socom2 --- plugins/GSdx/GSDeviceOGL.cpp | 2 +- plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSTextureCache.cpp | 5 +++-- plugins/GSdx/res/glsl/convert.glsl | 19 ++++++++++++++++--- plugins/GSdx/res/glsl_source.h | 19 ++++++++++++++++--- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index a7d178d121..e03da76c4f 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -940,7 +940,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture return; } - bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13]); + bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13] || ps == m_convert.ps[14]); // Performance optimization. It might be faster to use a framebuffer blit for standard case // instead to emulate it with shader diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index f1efcfbc7a..898654c28e 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -500,7 +500,7 @@ class GSDeviceOGL : public GSDevice struct { GLuint vs; // program object - GLuint ps[15]; // program object + GLuint ps[16]; // program object GLuint ln; // sampler object GLuint pt; // sampler object GSDepthStencilOGL* dss; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 3839b7238c..67c139c0d4 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -283,7 +283,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int if (type == DepthStencil) { GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - int shader = (TEX0.PSM & 1) ? 13 : 12; + int shader = 12 + GSLocalMemory::m_psm[TEX0.PSM].fmt; + ASSERT(shader <= 14); m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false); } else { GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); @@ -861,7 +862,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (is_8bits) { GL_INS("Reading RT as a packed-indexed 8 bits format"); - shader = 14; // ask a conversion to 8 bits format + shader = 15; // ask a conversion to 8 bits format } #ifdef ENABLE_OGL_DEBUG diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index 0ed4b16a18..33bb05a803 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -194,17 +194,30 @@ void ps_main12() //out float gl_FragDepth; void ps_main13() { - // Same as above but without the alpha channel + // Same as above but without the alpha channel (24 bits Z) // Convert a RRGBA texture into a float depth texture // FIXME: I'm afraid of the accuracy - const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0); - gl_FragDepth = dot(sample_c(), bitSh); + const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0); + gl_FragDepth = dot(sample_c().rgb, bitSh); } #endif #ifdef ps_main14 +//out float gl_FragDepth; void ps_main14() +{ + // Same as above but without the A/B channels (16 bits Z) + + // Convert a RRGBA texture into a float depth texture + // FIXME: I'm afraid of the accuracy + const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0); + gl_FragDepth = dot(sample_c().rg, bitSh); +} +#endif + +#ifdef ps_main15 +void ps_main15() { // Potential speed optimization. There is a high probability that diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index c1d638e4ea..75f270d9bb 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -219,18 +219,31 @@ static const char* convert_glsl = "//out float gl_FragDepth;\n" "void ps_main13()\n" "{\n" - " // Same as above but without the alpha channel\n" + " // Same as above but without the alpha channel (24 bits Z)\n" "\n" " // Convert a RRGBA texture into a float depth texture\n" " // FIXME: I'm afraid of the accuracy\n" - " const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);\n" - " gl_FragDepth = dot(sample_c(), bitSh);\n" + " const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0);\n" + " gl_FragDepth = dot(sample_c().rgb, bitSh);\n" "}\n" "#endif\n" "\n" "#ifdef ps_main14\n" + "//out float gl_FragDepth;\n" "void ps_main14()\n" "{\n" + " // Same as above but without the A/B channels (16 bits Z)\n" + "\n" + " // Convert a RRGBA texture into a float depth texture\n" + " // FIXME: I'm afraid of the accuracy\n" + " const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0);\n" + " gl_FragDepth = dot(sample_c().rg, bitSh);\n" + "}\n" + "#endif\n" + "\n" + "#ifdef ps_main15\n" + "void ps_main15()\n" + "{\n" "\n" " // Potential speed optimization. There is a high probability that\n" " // game only want to extract a single channel (blue). It will allow\n"