Merge pull request #688 from PCSX2/hdr-colclip

Hdr colclip
2015-08-02 18:13:28 +02:00 · 2015-08-02 18:13:28 +02:00 · 8424c18e9f
parent 1f402b1b56 4a3c145c72
commit 8424c18e9f
12 changed files with 409 additions and 446 deletions
--- a/plugins/GSdx/GLLoader.cpp
+++ b/plugins/GSdx/GLLoader.cpp
@ -507,6 +507,7 @@ namespace GLLoader {
 		if (!found_GL_ARB_texture_barrier) {
 			fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. You can't emulate correctly the GS blending unit! Sorry!\n");
 			theApp.SetConfig("accurate_blending_unit", 0);
+			theApp.SetConfig("accurate_date", 0);
 		}

 		fprintf(stderr, "\n");
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -133,15 +133,15 @@ GSDeviceOGL::~GSDeviceOGL()
 	gl_DeleteSamplers(1, &m_palette_ss);
 	m_shader->Delete(m_apitrace);

-	for (uint32 key = 0; key < VSSelector::size(); key++) m_shader->Delete(m_vs[key]);
-	for (uint32 key = 0; key < GSSelector::size(); key++) m_shader->Delete(m_gs[key]);
+	for (uint32 key = 0; key < countof(m_vs); key++) m_shader->Delete(m_vs[key]);
+	for (uint32 key = 0; key < countof(m_gs); key++) m_shader->Delete(m_gs[key]);
 	for (auto it = m_ps.begin(); it != m_ps.end() ; it++) m_shader->Delete(it->second);

 	m_ps.clear();

-	gl_DeleteSamplers(PSSamplerSelector::size(), m_ps_ss);
+	gl_DeleteSamplers(countof(m_ps_ss), m_ps_ss);

-	for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++) delete m_om_dss[key];
+	for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key];

 	for (auto it = m_om_bs.begin(); it != m_om_bs.end(); it++) delete it->second;
 	m_om_bs.clear();
@ -238,8 +238,9 @@ bool GSDeviceOGL::Create(GSWnd* wnd)
 	// ****************************************************************
 	// Pre Generate the different sampler object
 	// ****************************************************************
-	for (uint32 key = 0; key < PSSamplerSelector::size(); key++)
+	for (uint32 key = 0; key < countof(m_ps_ss); key++) {
 		m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
+	}

 	// ****************************************************************
 	// convert
@ -666,7 +667,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
 		+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
 		+ format("#define PS_READ_BA %d\n", sel.read_ba)
 		+ format("#define PS_FBMASK %d\n", sel.fbmask)
-		+ format("#define PS_BLEND_ACCU %d\n", sel.blend_accu)
+		+ format("#define PS_HDR %d\n", sel.hdr)
 		;

 	return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
@ -695,7 +696,7 @@ void GSDeviceOGL::SelfShaderTest()
 	int perf = 0;
 	int all = 0;
 	// Test: SW blending
-	for (int colclip = 0; colclip < 4; colclip += 3) {
+	for (int colclip = 0; colclip < 2; colclip++) {
 		for (int fmt = 0; fmt < 3; fmt++) {
 			for (int i = 0; i < 3; i++) {
 				PSSelector sel;
@ -786,18 +787,6 @@ void GSDeviceOGL::SelfShaderTest()
 	}
 	PRINT_TEST("Fst/Tc/IIp");

-	// Test: Colclip
-	for (int colclip = 0; colclip < 3; colclip += 1) {
-		PSSelector sel;
-		sel.tfx = 4;
-		sel.atst = 1;
-
-		sel.colclip = colclip;
-		std::string file = format("Shader_Colclip_%d.glsl.asm", colclip);
-		RUN_TEST;
-	}
-	PRINT_TEST("Colclip");
-
 	// Test: tfx/tcc
 	for (int tfx = 0; tfx < 5; tfx++) {
 		for (int tcc = 0; tcc < 2; tcc++) {
@ -889,7 +878,7 @@ GSTexture* GSDeviceOGL::CopyOffscreen(GSTexture* src, const GSVector4& sRect, in
 }

 // Copy a sub part of texture (same as below but force a conversion)
-void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r)
+void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin)
 {
 	const GLuint& sid = sTex->GetID();
 	const GLuint& did = dTex->GetID();
@ -899,7 +888,10 @@ void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4
 	gl_BindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);

 	gl_FramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0);
-	gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height());
+	if (at_origin)
+		gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height());
+	else
+		gl_CopyTextureSubImage2D(did, GL_TEX_LEVEL_0, r.x, r.y, r.x, r.y, r.width(), r.height());

 	gl_BindFramebuffer(GL_READ_FRAMEBUFFER, 0);

@ -924,7 +916,7 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r
 				r.width(), r.height(), 1);
 	} else {
 		// Slower copy (conversion is done)
-		CopyRectConv(sTex, dTex, r);
+		CopyRectConv(sTex, dTex, r, true);
 	}

 	GL_POP();
@ -948,7 +940,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
 		return;
 	}

-	bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13]);
+	bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13] || ps == m_convert.ps[14]);

 	// Performance optimization. It might be faster to use a framebuffer blit for standard case
 	// instead to emulate it with shader
@ -1510,6 +1502,11 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
 // 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha
 // 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor

+// Special blending method table:
+// # (tricky) => 1 * Cd + Cd * F => Use (Cd, F) as factor of color (1, Cd)
+// * (bogus) => C * (1 + F ) + ... => factor is always bigger than 1 (except above case)
+// ? => Cs * F + Cd => do the multiplication in shader and addition in blending unit. It is an optimization
+
 // Copy Dx blend table and convert it to ogl
 #define D3DBLENDOP_ADD			GL_FUNC_ADD
 #define D3DBLENDOP_SUBTRACT		GL_FUNC_SUBTRACT
@ -1526,87 +1523,88 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
 #define D3DBLEND_SRCALPHA		GL_SRC1_ALPHA
 #define D3DBLEND_INVSRCALPHA	GL_ONE_MINUS_SRC1_ALPHA

+
 const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] =
 {
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0000: (Cs - Cs)*As + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0001: (Cs - Cs)*As + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0002: (Cs - Cs)*As +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0010: (Cs - Cs)*Ad + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0011: (Cs - Cs)*Ad + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0012: (Cs - Cs)*Ad +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0020: (Cs - Cs)*F  + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0021: (Cs - Cs)*F  + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0022: (Cs - Cs)*F  +  0 ==> 0
-	{ A_MAX | 4          , D3DBLENDOP_SUBTRACT    , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As
-	{ 13                 , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_INVSRCALPHA}    , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)
-	{ 14                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , // 0102: (Cs - Cd)*As +  0 ==> Cs*As - Cd*As
-	{ A_MAX | 5          , D3DBLENDOP_SUBTRACT    , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad
-	{ 15                 , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_INVDESTALPHA}   , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad)
-	{ 16                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , // 0112: (Cs - Cd)*Ad +  0 ==> Cs*Ad - Cd*Ad
-	{ A_MAX | 6          , D3DBLENDOP_SUBTRACT    , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , //*0120: (Cs - Cd)*F  + Cs ==> Cs*(F + 1) - Cd*F
-	{ 17                 , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F  + Cd ==> Cs*F + Cd*(1 - F)
-	{ 18                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , // 0122: (Cs - Cd)*F  +  0 ==> Cs*F - Cd*F
-	{ NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_ZERO}           , //*0200: (Cs -  0)*As + Cs ==> Cs*(As + 1)
-	{ 19                 , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_ONE}            , // 0201: (Cs -  0)*As + Cd ==> Cs*As + Cd
-	{ NO_BAR | 20        , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_ZERO}           , // 0202: (Cs -  0)*As +  0 ==> Cs*As
-	{ A_MAX | 8          , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_ZERO}           , //*0210: (Cs -  0)*Ad + Cs ==> Cs*(Ad + 1)
-	{ 21                 , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_ONE}            , // 0211: (Cs -  0)*Ad + Cd ==> Cs*Ad + Cd
-	{ 22                 , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_ZERO}           , // 0212: (Cs -  0)*Ad +  0 ==> Cs*Ad
-	{ NO_BAR| A_MAX | 9  , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_ZERO}           , //*0220: (Cs -  0)*F  + Cs ==> Cs*(F + 1)
-	{ 23                 , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_ONE}            , // 0221: (Cs -  0)*F  + Cd ==> Cs*F + Cd
-	{ NO_BAR | 24        , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_ZERO}           , // 0222: (Cs -  0)*F  +  0 ==> Cs*F
-	{ 25                 , D3DBLENDOP_ADD         , D3DBLEND_INVSRCALPHA    , D3DBLEND_SRCALPHA}       , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
-	{ A_MAX | 10         , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
-	{ 26                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , // 1002: (Cd - Cs)*As +  0 ==> Cd*As - Cs*As
-	{ 27                 , D3DBLENDOP_ADD         , D3DBLEND_INVDESTALPHA   , D3DBLEND_DESTALPHA}      , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad)
-	{ A_MAX | 11         , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad
-	{ 28                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , // 1012: (Cd - Cs)*Ad +  0 ==> Cd*Ad - Cs*Ad
-	{ 29                 , D3DBLENDOP_ADD         , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR}    , // 1020: (Cd - Cs)*F  + Cs ==> Cd*F + Cs*(1 - F)
-	{ A_MAX | 12         , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , //*1021: (Cd - Cs)*F  + Cd ==> Cd*(F + 1) - Cs*F
-	{ 30                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , // 1022: (Cd - Cs)*F  +  0 ==> Cd*F - Cs*F
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1100: (Cd - Cd)*As + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1101: (Cd - Cd)*As + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1102: (Cd - Cd)*As +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1110: (Cd - Cd)*Ad + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1111: (Cd - Cd)*Ad + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1112: (Cd - Cd)*Ad +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1120: (Cd - Cd)*F  + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1121: (Cd - Cd)*F  + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1122: (Cd - Cd)*F  +  0 ==> 0
-	{ 31                 , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_SRCALPHA}       , // 1200: (Cd -  0)*As + Cs ==> Cs + Cd*As
-	{ C_CLR | 55         , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_SRCALPHA}       , //#1201: (Cd -  0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background
-	{ 32                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_SRCALPHA}       , // 1202: (Cd -  0)*As +  0 ==> Cd*As
-	{ 33                 , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 1210: (Cd -  0)*Ad + Cs ==> Cs + Cd*Ad
-	{ C_CLR | 56         , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_DESTALPHA}      , //#1211: (Cd -  0)*Ad + Cd ==> Cd*(1 + Ad)
-	{ 34                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_DESTALPHA}      , // 1212: (Cd -  0)*Ad +  0 ==> Cd*Ad
-	{  35                , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 1220: (Cd -  0)*F  + Cs ==> Cs + Cd*F
-	{ C_CLR | 57         , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_BLENDFACTOR}    , //#1221: (Cd -  0)*F  + Cd ==> Cd*(1 + F)
-	{ 36                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_BLENDFACTOR}    , // 1222: (Cd -  0)*F  +  0 ==> Cd*F
-	{ NO_BAR | 37        , D3DBLENDOP_ADD         , D3DBLEND_INVSRCALPHA    , D3DBLEND_ZERO}           , // 2000: (0  - Cs)*As + Cs ==> Cs*(1 - As)
-	{ 38                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_ONE}            , // 2001: (0  - Cs)*As + Cd ==> Cd - Cs*As
-	{ NO_BAR | 39        , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_ZERO}           , // 2002: (0  - Cs)*As +  0 ==> 0 - Cs*As
-	{ 40                 , D3DBLENDOP_ADD         , D3DBLEND_INVDESTALPHA   , D3DBLEND_ZERO}           , // 2010: (0  - Cs)*Ad + Cs ==> Cs*(1 - Ad)
-	{ 41                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_ONE}            , // 2011: (0  - Cs)*Ad + Cd ==> Cd - Cs*Ad
-	{ 42                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_ZERO}           , // 2012: (0  - Cs)*Ad +  0 ==> 0 - Cs*Ad
-	{ NO_BAR | 43        , D3DBLENDOP_ADD         , D3DBLEND_INVBLENDFACTOR , D3DBLEND_ZERO}           , // 2020: (0  - Cs)*F  + Cs ==> Cs*(1 - F)
-	{ 44                 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_ONE}            , // 2021: (0  - Cs)*F  + Cd ==> Cd - Cs*F
-	{ NO_BAR | 45        , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_ZERO}           , // 2022: (0  - Cs)*F  +  0 ==> 0 - Cs*F
-	{ 46                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_SRCALPHA}       , // 2100: (0  - Cd)*As + Cs ==> Cs - Cd*As
-	{ 47                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVSRCALPHA}    , // 2101: (0  - Cd)*As + Cd ==> Cd*(1 - As)
-	{ 48                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ZERO           , D3DBLEND_SRCALPHA}       , // 2102: (0  - Cd)*As +  0 ==> 0 - Cd*As
-	{ 49                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 2110: (0  - Cd)*Ad + Cs ==> Cs - Cd*Ad
-	{ 50                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVDESTALPHA}   , // 2111: (0  - Cd)*Ad + Cd ==> Cd*(1 - Ad)
-	{ 51                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 2112: (0  - Cd)*Ad +  0 ==> 0 - Cd*Ad
-	{ 52                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 2120: (0  - Cd)*F  + Cs ==> Cs - Cd*F
-	{ 53                 , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVBLENDFACTOR} , // 2121: (0  - Cd)*F  + Cd ==> Cd*(1 - F)
-	{ 54                 , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 2122: (0  - Cd)*F  +  0 ==> 0 - Cd*F
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2200: (0  -  0)*As + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2201: (0  -  0)*As + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2202: (0  -  0)*As +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2210: (0  -  0)*Ad + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2211: (0  -  0)*Ad + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2212: (0  -  0)*Ad +  0 ==> 0
-	{ NO_BAR | 1         , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2220: (0  -  0)*F  + Cs ==> Cs
-	{ 2                  , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2221: (0  -  0)*F  + Cd ==> Cd
-	{ NO_BAR | 3         , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2222: (0  -  0)*F  +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0000: (Cs - Cs)*As + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0001: (Cs - Cs)*As + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0002: (Cs - Cs)*As +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0010: (Cs - Cs)*Ad + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0011: (Cs - Cs)*Ad + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0012: (Cs - Cs)*Ad +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 0020: (Cs - Cs)*F  + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 0021: (Cs - Cs)*F  + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 0022: (Cs - Cs)*F  +  0 ==> 0
+	{ BLEND_A_MAX                , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_SRCALPHA}       , //*0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_INVSRCALPHA}    , // 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , // 0102: (Cs - Cd)*As +  0 ==> Cs*As - Cd*As
+	{ BLEND_A_MAX                , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , //*0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_INVDESTALPHA}   , // 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , // 0112: (Cs - Cd)*Ad +  0 ==> Cs*Ad - Cd*Ad
+	{ BLEND_A_MAX                , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , //*0120: (Cs - Cd)*F  + Cs ==> Cs*(F + 1) - Cd*F
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F  + Cd ==> Cs*F + Cd*(1 - F)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , // 0122: (Cs - Cd)*F  +  0 ==> Cs*F - Cd*F
+	{ BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , //*0200: (Cs -  0)*As + Cs ==> Cs*(As + 1)
+	{ BLEND_ACCU                 , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ONE}            , //?0201: (Cs -  0)*As + Cd ==> Cs*As + Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_SRCALPHA       , D3DBLEND_ZERO}           , // 0202: (Cs -  0)*As +  0 ==> Cs*As
+	{ BLEND_A_MAX                , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , //*0210: (Cs -  0)*Ad + Cs ==> Cs*(Ad + 1)
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_ONE}            , // 0211: (Cs -  0)*Ad + Cd ==> Cs*Ad + Cd
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_DESTALPHA      , D3DBLEND_ZERO}           , // 0212: (Cs -  0)*Ad +  0 ==> Cs*Ad
+	{ BLEND_NO_BAR | BLEND_A_MAX , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , //*0220: (Cs -  0)*F  + Cs ==> Cs*(F + 1)
+	{ BLEND_ACCU                 , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ONE}            , //?0221: (Cs -  0)*F  + Cd ==> Cs*F + Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_BLENDFACTOR    , D3DBLEND_ZERO}           , // 0222: (Cs -  0)*F  +  0 ==> Cs*F
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_INVSRCALPHA    , D3DBLEND_SRCALPHA}       , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
+	{ BLEND_A_MAX                , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_ONE}            , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_SRCALPHA}       , // 1002: (Cd - Cs)*As +  0 ==> Cd*As - Cs*As
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_INVDESTALPHA   , D3DBLEND_DESTALPHA}      , // 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad)
+	{ BLEND_A_MAX                , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_ONE}            , //*1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_DESTALPHA}      , // 1012: (Cd - Cs)*Ad +  0 ==> Cd*Ad - Cs*Ad
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_INVBLENDFACTOR , D3DBLEND_BLENDFACTOR}    , // 1020: (Cd - Cs)*F  + Cs ==> Cd*F + Cs*(1 - F)
+	{ BLEND_A_MAX                , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_ONE}            , //*1021: (Cd - Cs)*F  + Cd ==> Cd*(F + 1) - Cs*F
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_BLENDFACTOR}    , // 1022: (Cd - Cs)*F  +  0 ==> Cd*F - Cs*F
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1100: (Cd - Cd)*As + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1101: (Cd - Cd)*As + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1102: (Cd - Cd)*As +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1110: (Cd - Cd)*Ad + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1111: (Cd - Cd)*Ad + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1112: (Cd - Cd)*Ad +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 1120: (Cd - Cd)*F  + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 1121: (Cd - Cd)*F  + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 1122: (Cd - Cd)*F  +  0 ==> 0
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_SRCALPHA}       , // 1200: (Cd -  0)*As + Cs ==> Cs + Cd*As
+	{ BLEND_C_CLR                , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_SRCALPHA}       , //#1201: (Cd -  0)*As + Cd ==> Cd*(1 + As) // ffxii main menu background
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_SRCALPHA}       , // 1202: (Cd -  0)*As +  0 ==> Cd*As
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 1210: (Cd -  0)*Ad + Cs ==> Cs + Cd*Ad
+	{ BLEND_C_CLR                , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_DESTALPHA}      , //#1211: (Cd -  0)*Ad + Cd ==> Cd*(1 + Ad)
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_DESTALPHA}      , // 1212: (Cd -  0)*Ad +  0 ==> Cd*Ad
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 1220: (Cd -  0)*F  + Cs ==> Cs + Cd*F
+	{ BLEND_C_CLR                , D3DBLENDOP_ADD         , D3DBLEND_DESTCOLOR      , D3DBLEND_BLENDFACTOR}    , //#1221: (Cd -  0)*F  + Cd ==> Cd*(1 + F)
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_BLENDFACTOR}    , // 1222: (Cd -  0)*F  +  0 ==> Cd*F
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_INVSRCALPHA    , D3DBLEND_ZERO}           , // 2000: (0  - Cs)*As + Cs ==> Cs*(1 - As)
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_ONE}            , // 2001: (0  - Cs)*As + Cd ==> Cd - Cs*As
+	{ BLEND_NO_BAR               , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA       , D3DBLEND_ZERO}           , // 2002: (0  - Cs)*As +  0 ==> 0 - Cs*As
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_INVDESTALPHA   , D3DBLEND_ZERO}           , // 2010: (0  - Cs)*Ad + Cs ==> Cs*(1 - Ad)
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_ONE}            , // 2011: (0  - Cs)*Ad + Cd ==> Cd - Cs*Ad
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_DESTALPHA      , D3DBLEND_ZERO}           , // 2012: (0  - Cs)*Ad +  0 ==> 0 - Cs*Ad
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_INVBLENDFACTOR , D3DBLEND_ZERO}           , // 2020: (0  - Cs)*F  + Cs ==> Cs*(1 - F)
+	{ 0                          , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_ONE}            , // 2021: (0  - Cs)*F  + Cd ==> Cd - Cs*F
+	{ BLEND_NO_BAR               , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR    , D3DBLEND_ZERO}           , // 2022: (0  - Cs)*F  +  0 ==> 0 - Cs*F
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_SRCALPHA}       , // 2100: (0  - Cd)*As + Cs ==> Cs - Cd*As
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVSRCALPHA}    , // 2101: (0  - Cd)*As + Cd ==> Cd*(1 - As)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ZERO           , D3DBLEND_SRCALPHA}       , // 2102: (0  - Cd)*As +  0 ==> 0 - Cd*As
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 2110: (0  - Cd)*Ad + Cs ==> Cs - Cd*Ad
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVDESTALPHA}   , // 2111: (0  - Cd)*Ad + Cd ==> Cd*(1 - Ad)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_DESTALPHA}      , // 2112: (0  - Cd)*Ad +  0 ==> 0 - Cd*Ad
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 2120: (0  - Cd)*F  + Cs ==> Cs - Cd*F
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_INVBLENDFACTOR} , // 2121: (0  - Cd)*F  + Cd ==> Cd*(1 - F)
+	{ 0                          , D3DBLENDOP_SUBTRACT    , D3DBLEND_ONE            , D3DBLEND_BLENDFACTOR}    , // 2122: (0  - Cd)*F  +  0 ==> 0 - Cd*F
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2200: (0  -  0)*As + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2201: (0  -  0)*As + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2202: (0  -  0)*As +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2210: (0  -  0)*Ad + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2211: (0  -  0)*Ad + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2212: (0  -  0)*Ad +  0 ==> 0
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ONE            , D3DBLEND_ZERO}           , // 2220: (0  -  0)*F  + Cs ==> Cs
+	{ 0                          , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ONE}            , // 2221: (0  -  0)*F  + Cd ==> Cd
+	{ BLEND_NO_BAR               , D3DBLENDOP_ADD         , D3DBLEND_ZERO           , D3DBLEND_ZERO}           , // 2222: (0  -  0)*F  +  0 ==> 0
 };
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@ -30,9 +30,10 @@
 #include "GLState.h"

 // A couple of flag to determine the blending behavior
-#define A_MAX	(0x100)	 // Impossible blending uses coeff bigger than 1
-#define C_CLR	(0x200)	 // Clear color blending (use directly the destination color as blending factor)
-#define NO_BAR  (0x400)  // don't require texture barrier for the blending (because the RT is not used)
+#define BLEND_A_MAX		(0x100) // Impossible blending uses coeff bigger than 1
+#define BLEND_C_CLR		(0x200) // Clear color blending (use directly the destination color as blending factor)
+#define BLEND_NO_BAR	(0x400) // don't require texture barrier for the blending (because the RT is not used)
+#define BLEND_ACCU		(0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds

 #ifdef ENABLE_OGL_DEBUG_MEM_BW
 extern uint64 g_real_texture_upload_byte;
@ -65,14 +66,6 @@ public:
 		if (IsConstant(src) || IsConstant(dst)) m_constant_factor = true;
 	}

-	void RevertOp()
-	{
-		if(m_equation_RGB == GL_FUNC_ADD)
-			m_equation_RGB = GL_FUNC_REVERSE_SUBTRACT;
-		else if(m_equation_RGB == GL_FUNC_REVERSE_SUBTRACT)
-			m_equation_RGB = GL_FUNC_ADD;
-	}
-
 	void EnableBlend() { m_enable = true;}

 	bool IsConstant(GLenum factor) { return ((factor == GL_CONSTANT_COLOR) || (factor == GL_ONE_MINUS_CONSTANT_COLOR)); }
@ -238,13 +231,10 @@ class GSDeviceOGL : public GSDevice
 			uint32 key;
 		};

-		// FIXME is the & useful ?
-		operator uint32() {return key & 0x3f;}
+		operator uint32() {return key;}

 		VSSelector() : key(0) {}
 		VSSelector(uint32 k) : key(k) {}
-
-		static uint32 size() { return 1 << 5; }
 	};

 	struct GSSelector
@ -266,8 +256,6 @@ class GSDeviceOGL : public GSDevice

 		GSSelector() : key(0) {}
 		GSSelector(uint32 k) : key(k) {}
-
-		static uint32 size() { return 1 << 2; }
 	};

 	__aligned(struct, 32) PSConstantBuffer
@ -336,7 +324,7 @@ class GSDeviceOGL : public GSDevice
 				uint32 tcoffsethack:1;
 				//uint32 point_sampler:1; Not tested, so keep the bit for blend
 				uint32 iip:1;
-				uint32 colclip:2;
+				uint32 colclip:1;
 				uint32 atst:3;
 				uint32 tfx:3;
 				uint32 tcc:1;
@ -348,15 +336,15 @@ class GSDeviceOGL : public GSDevice
 				uint32 read_ba:1;
 				uint32 fbmask:1;

-				//uint32 _free1:0;
+				uint32 _free1:1;

 				// Word 2
 				uint32 blend_a:2;
 				uint32 blend_b:2;
 				uint32 blend_c:2;
 				uint32 blend_d:2;
-				uint32 blend_accu:1;
 				uint32 dfmt:2;
+				uint32 hdr:1;

 				uint32 _free2:21;
 			};
@ -386,13 +374,10 @@ class GSDeviceOGL : public GSDevice
 			uint32 key;
 		};

-		// FIXME is the & useful ?
-		operator uint32() {return key & 0x7;}
+		operator uint32() {return key;}

 		PSSamplerSelector() : key(0) {}
 		PSSamplerSelector(uint32 k) : key(k) {}
-
-		static uint32 size() { return 1 << 3; }
 	};

 	struct OMDepthStencilSelector
@ -404,21 +389,18 @@ class GSDeviceOGL : public GSDevice
 				uint32 ztst:2;
 				uint32 zwe:1;
 				uint32 date:1;
-				uint32 alpha_stencil:1;

-				uint32 _free:27;
+				uint32 _free:28;
 			};

 			uint32 key;
 		};

 		// FIXME is the & useful ?
-		operator uint32() {return key & 0x1f;}
+		operator uint32() {return key;}

 		OMDepthStencilSelector() : key(0) {}
 		OMDepthStencilSelector(uint32 k) : key(k) {}
-
-		static uint32 size() { return 1 << 5; }
 	};

 	struct OMColorMaskSelector
@ -461,22 +443,16 @@ class GSDeviceOGL : public GSDevice
 				uint32 b:2;
 				uint32 c:2;
 				uint32 d:2;
-				uint32 negative:1;
-				uint32 accu:1;
-				uint32 ps:1;

-				uint32 _free:20;
+				uint32 _free:23;
 			};

 			struct
 			{
 				uint32 _abe:1;
 				uint32 abcd:8;
-				uint32 _negative:1;
-				uint32 _accu:1;
-				uint32 _ps:1;

-				uint32 _free2:20;
+				uint32 _free2:23;
 			};

 			uint32 key;
@ -524,7 +500,7 @@ class GSDeviceOGL : public GSDevice

 	struct {
 		GLuint vs;		// program object
-		GLuint ps[15];	// program object
+		GLuint ps[16];	// program object
 		GLuint ln;		// sampler object
 		GLuint pt;		// sampler object
 		GSDepthStencilOGL* dss;
@ -560,10 +536,10 @@ class GSDeviceOGL : public GSDevice
 		float bf; // blend factor
 	} m_state;

-	GLuint m_vs[1<<6];
+	GLuint m_vs[1<<5];
 	GLuint m_gs[1<<2];
 	GLuint m_ps_ss[1<<3];
-	GSDepthStencilOGL* m_om_dss[1<<6];
+	GSDepthStencilOGL* m_om_dss[1<<4];
 	hash_map<uint64, GLuint > m_ps;
 	hash_map<uint32, GSBlendStateOGL* > m_om_bs;
 	GLuint m_apitrace;
@ -629,7 +605,7 @@ class GSDeviceOGL : public GSDevice
 	GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0);

 	void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r);
-	void CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r);
+	void CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin);
 	void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader = 0, bool linear = true);
 	void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear = true);
 	void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, GSBlendStateOGL* bs, bool linear = true);
--- a/plugins/GSdx/GSRendererOGL.cpp
+++ b/plugins/GSdx/GSRendererOGL.cpp
@ -315,6 +315,137 @@ bool GSRendererOGL::EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_s
 	return require_barrier;
 }

+bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMBlendSelector& om_bsel, GSDeviceOGL::PSConstantBuffer& ps_cb, float afix, bool DATE_GL42)
+{
+	const GIFRegALPHA& ALPHA = m_context->ALPHA;
+	bool require_barrier = false;
+
+	om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
+
+	om_bsel.a = ALPHA.A;
+	om_bsel.b = ALPHA.B;
+	om_bsel.c = ALPHA.C;
+	om_bsel.d = ALPHA.D;
+
+	if (m_env.PABE.PABE)
+	{
+#ifdef ENABLE_OGL_DEBUG
+		fprintf(stderr, "env PABE  not supported\n");
+		GL_INS("!!! ENV PABE  not supported !!!");
+#endif
+		// FIXME it could be supported with SW blending!
+		if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
+		{
+			// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
+			// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
+			om_bsel.abe = 0;
+		}
+		else
+		{
+			//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
+			//ASSERT(0);
+		}
+	}
+
+	// No blending so early exit
+	if (!om_bsel.abe)
+		return require_barrier;
+
+	// Compute the blending equation to detect special case
+	int blend_sel  = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
+	int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
+	// SW Blend is (nearly) free. Let's use it.
+	bool free_blend = (blend_flag & BLEND_NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO);
+	// We really need SW blending for this one, barely used
+	bool impossible_blend = (blend_flag & BLEND_A_MAX);
+	// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
+	bool accumulation_blend = (blend_flag & BLEND_ACCU);
+
+	bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend);
+
+	// Color clip
+	if (m_env.COLCLAMP.CLAMP == 0) {
+		if (accumulation_blend) {
+			ps_sel.hdr = 1;
+			GL_INS("COLCLIP Fast HDR mode ENABLED");
+		} else if (m_sw_blending >= ACC_BLEND_CCLIP_DALPHA || sw_blending_base) {
+			ps_sel.colclip = 1;
+			sw_blending_base = true;
+			GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
+		} else {
+			fprintf(stderr, "Sorry colclip isn't supported\n");
+		}
+	}
+
+	// Note: Option is duplicated, one impact the blend unit / the other the shader.
+	sw_blending_base |= accumulation_blend;
+
+	// Warning no break on purpose
+	bool sw_blending_adv = false;
+	switch (m_sw_blending) {
+		case ACC_BLEND_ULTRA:			sw_blending_adv |= true;
+		case ACC_BLEND_FULL:			sw_blending_adv |= !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) );
+		case ACC_BLEND_CCLIP_DALPHA:	sw_blending_adv |= (ALPHA.C == 1);
+		case ACC_BLEND_SPRITE:			sw_blending_adv |= m_vt.m_primclass == GS_SPRITE_CLASS;
+		default:						break;
+	}
+
+	bool sw_blending = sw_blending_base // Free case or Impossible blend
+		|| sw_blending_adv // complex blending case (for special effect)
+		|| ps_sel.fbmask; // accurate fbmask
+
+
+	// SW Blending
+	// GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive
+	// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
+	// (therefore primitiveID will be constant to 1)
+	sw_blending &= !DATE_GL42;
+	// Seriously don't expect me to support this kind of crazyness.
+	// No mix of COLCLIP + accumulation_blend + DATE GL42
+	// Neither fbmask and GL42
+	ASSERT(!(ps_sel.hdr && DATE_GL42));
+	ASSERT(!(ps_sel.fbmask && DATE_GL42));
+
+	// For stat to optimize accurate option
+#if 0
+	GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)",
+			om_bsel.a, om_bsel.b,  om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
+#endif
+	if (sw_blending) {
+		ps_sel.blend_a = om_bsel.a;
+		ps_sel.blend_b = om_bsel.b;
+		ps_sel.blend_c = om_bsel.c;
+		ps_sel.blend_d = om_bsel.d;
+
+		if (accumulation_blend) {
+			// Keep HW blending to do the addition
+			om_bsel.abe = 1;
+			// Remove the addition from the SW blending
+			ps_sel.blend_d = 2;
+		} else {
+			// Disable HW blending
+			om_bsel.abe = 0;
+		}
+
+		// Require the fix alpha vlaue
+		if (ALPHA.C == 2) {
+			ps_cb.AlphaCoeff.a = afix;
+		}
+
+		// No need to flush for every primitive
+		require_barrier |= !(blend_flag & BLEND_NO_BAR) && !accumulation_blend;
+	} else {
+		ps_sel.clr1 = om_bsel.IsCLR1();
+		if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
+			// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
+			om_bsel.c = 2;
+			afix = 1.0f;
+		}
+	}
+
+	return require_barrier;
+}
+
 GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
 {
 	// Either 1 triangle or 1 line or 3 POINTs
@ -370,6 +501,14 @@ GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap()
 	return PRIM_OVERLAP_NO;
 }

+GSVector4i GSRendererOGL::ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize)
+{
+	GSVector4 scale = GSVector4(rtscale.x, rtscale.y);
+	GSVector4 offset = GSVector4(-1.0f, 1.0f); // Round value
+	GSVector4 box = m_vt.m_min.p.xyxy(m_vt.m_max.p) + offset.xxyy();
+	return GSVector4i(box * scale.xyxy()).rintersect(GSVector4i(0, 0, rtsize.x, rtsize.y));
+}
+
 void GSRendererOGL::SendDraw(bool require_barrier)
 {
 	GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
@ -410,13 +549,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 				tex && tex->m_texture ? tex->m_texture->GetID() : 0,
 				rt ? rt->GetID() : -1, ds->GetID());

-	GSDrawingEnvironment& env = m_env;
-	GSDrawingContext* context = m_context;
+	GSTexture* hdr_rt = NULL;

 	const GSVector2i& rtsize = ds->GetSize();
 	const GSVector2& rtscale = ds->GetScale();

-	bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
+	bool DATE = m_context->TEST.DATE && m_context->FRAME.PSM != PSM_PSMCT24;
 	bool DATE_GL42 = false;
 	bool DATE_GL45 = false;

@ -447,7 +585,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		// Except 2D games, sprites are often use for special post-processing effect
 		m_prim_overlap = PrimitiveOverlap();
 #ifdef ENABLE_OGL_DEBUG
-		if ((m_prim_overlap != PRIM_OVERLAP_NO) && (context->FRAME.Block() == context->TEX0.TBP0) && (m_vertex.next > 2)) {
+		if ((m_prim_overlap != PRIM_OVERLAP_NO) && (m_context->FRAME.Block() == m_context->TEX0.TBP0) && (m_vertex.next > 2)) {
 			GL_INS("ERROR: Source and Target are the same!");
 		}
 #endif
@ -459,15 +597,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	// DATE: selection of the algorithm. Must be done before blending because GL42 is not compatible with blending

-	if (DATE) {
-		if (GLLoader::found_GL_ARB_texture_barrier && (m_prim_overlap == PRIM_OVERLAP_NO)) {
+	if (DATE && GLLoader::found_GL_ARB_texture_barrier) {
+		if (m_prim_overlap == PRIM_OVERLAP_NO) {
+			require_barrier = true;
 			DATE_GL45 = true;
 			DATE = false;
 		} else if (m_accurate_date && om_csel.wa
-				&& (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
+				&& (!m_context->TEST.ATE || m_context->TEST.ATST == ATST_ALWAYS)) {
 			// texture barrier will split the draw call into n draw call. It is very efficient for
 			// few primitive draws. Otherwise it sucks.
-			if (GLLoader::found_GL_ARB_texture_barrier && (m_index.tail < 100)) {
+			if (m_index.tail < 100) {
 				require_barrier = true;
 				DATE_GL45 = true;
 				DATE = false;
@ -479,122 +618,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	// Blend

-	const GIFRegALPHA& ALPHA = context->ALPHA;
-	float afix = (float)context->ALPHA.FIX / 0x80;
+	float afix = (float)m_context->ALPHA.FIX / 0x80;

-	if (!IsOpaque() && rt)
-	{
-		om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
-
-		om_bsel.a = ALPHA.A;
-		om_bsel.b = ALPHA.B;
-		om_bsel.c = ALPHA.C;
-		om_bsel.d = ALPHA.D;
-
-		if (env.PABE.PABE)
-		{
-			// FIXME it could be supported with SW blending!
-			if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
-			{
-				// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
-				// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
-				om_bsel.abe = 0;
-			}
-			else
-			{
-				//Breath of Fire Dragon Quarter triggers this in battles. Graphics are fine though.
-				//ASSERT(0);
-#ifdef ENABLE_OGL_DEBUG
-				fprintf(stderr, "env PABE  not supported\n");
-				GL_INS("!!! ENV PABE  not supported !!!");
-#endif
-			}
-		}
-
-		// Compute the blending equation to detect special case
-		int blend_sel  = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
-		int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
-		// SW Blend is (nearly) free. Let's use it.
-		bool free_blend = (blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO);
-		// We really need SW blending for this one, barely used
-		bool impossible_blend = (blend_flag & A_MAX);
-		// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
-		ps_sel.blend_accu = m_sw_blending && ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1;
-		om_bsel.accu = ps_sel.blend_accu;
-
-		bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend /*|| ps_sel.blend_accu*/);
-
-		// Color clip
-		bool acc_colclip_wrap = false;
-		if (env.COLCLAMP.CLAMP == 0) {
-			// Not supported yet in colclip
-			om_bsel.accu = ps_sel.blend_accu = 0;
-
-			acc_colclip_wrap =  (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base);
-			if (acc_colclip_wrap) {
-				ps_sel.colclip = 3;
-				GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
-			} else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) {
-				// Standard (inaccurate) colclip
-				ps_sel.colclip = 1;
-				GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
-			}
-		} else {
-			sw_blending_base |= m_sw_blending && ps_sel.blend_accu;
-		}
-
-		bool all_blend_sw;
-		switch (m_sw_blending) {
-			case ACC_BLEND_ULTRA:	all_blend_sw = true; break;
-			case ACC_BLEND_FULL:	all_blend_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ); break;
-			case ACC_BLEND_CCLIP:
-			case ACC_BLEND_SPRITE:	all_blend_sw = m_vt.m_primclass == GS_SPRITE_CLASS; break;
-			default:				all_blend_sw = false; break;
-		}
-
-		bool sw_blending = sw_blending_base // Free case or Impossible blend
-			|| all_blend_sw // all blend
-			|| acc_colclip_wrap // accurate colclip
-			|| ps_sel.fbmask; // accurate fbmask
-
-
-		// SW Blending
-		// GL42 interact very badly with sw blending. GL42 uses the primitiveID to find the primitive
-		// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
-		// (therefore primitiveID will be constant to 1)
-		sw_blending &= !DATE_GL42;
-
-		// For stat to optimize accurate option
-#if 0
-		if (om_bsel.abe)
-			GL_INS("BLEND_INFO: %d/%d/%d/%d. Clamp:%d. Prim:%d number %d (sw %d)",
-					om_bsel.a, om_bsel.b,  om_bsel.c, om_bsel.d, env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
-#endif
-		if (sw_blending && om_bsel.abe) {
-			// select a shader that support blending
-			om_bsel.ps = 1;
-			ps_sel.blend_a = om_bsel.a;
-			ps_sel.blend_b = om_bsel.b;
-			ps_sel.blend_c = om_bsel.c;
-			ps_sel.blend_d = om_bsel.d;
-
-			dev->PSSetShaderResource(3, rt);
-
-			// Require the fix alpha vlaue
-			if (ALPHA.C == 2) {
-				ps_cb.AlphaCoeff.a = afix;
-			}
-
-			// No need to flush for every primitive
-			require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu;
-		} else {
-			ps_sel.clr1 = om_bsel.IsCLR1();
-			if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
-				// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
-				om_bsel.c = 2;
-				afix = 1.0f;
-			}
-		}
+	if (!IsOpaque() && rt) {
+		require_barrier |= EmulateBlending(ps_sel, om_bsel, ps_cb, afix, DATE_GL42);
 	}

 	if (ps_sel.dfmt == 1) {
@ -604,30 +631,20 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	// DATE (setup part)

-	if (DATE_GL45) {
-		gl_TextureBarrier();
-		dev->PSSetShaderResource(3, rt);
-	} else if (DATE) {
-		// TODO: do I need to clamp the value (if yes how? rintersect with rt?)
-		GSVector4 si = GSVector4(rtscale.x, rtscale.y);
-		GSVector4 off = GSVector4(-1.0f, 1.0f); // Round value
-		GSVector4 b = m_vt.m_min.p.xyxy(m_vt.m_max.p) + off.xxyy();
-		GSVector4i ri = GSVector4i(b * si.xyxy());
+	if (DATE) {
+		GSVector4i dRect = ComputeBoundingBox(rtscale, rtsize);

 		// Reduce the quantity of clean function
-		glScissor( ri.x, ri.y, ri.width(), ri.height() );
-		GLState::scissor = ri;
+		glScissor( dRect.x, dRect.y, dRect.width(), dRect.height() );
+		GLState::scissor = dRect;

 		// Must be done here to avoid any GL state pertubation (clear function...)
 		// Create an r32ui image that will containt primitive ID
 		if (DATE_GL42) {
 			dev->InitPrimDateTexture(rt);
-			dev->PSSetShaderResource(3, rt);
 		} else {
-			GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
-
-			GSVector4 src = (b * s.xyxy()).sat(off.zzyy());
-			GSVector4 dst = src * 2.0f + off.xxxx();
+			GSVector4 src = GSVector4(dRect) / GSVector4(rtsize.x, rtsize.y).xyxy();
+			GSVector4 dst = src * 2.0f - 1.0f;

 			GSVertexPT1 vertices[] =
 			{
@ -647,10 +664,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	// om

-	if (context->TEST.ZTE)
+	if (m_context->TEST.ZTE)
 	{
-		om_dssel.ztst = context->TEST.ZTST;
-		om_dssel.zwe = !context->ZBUF.ZMSK;
+		om_dssel.ztst = m_context->TEST.ZTST;
+		om_dssel.zwe = !m_context->ZBUF.ZMSK;
 	}
 	else
 	{
@ -669,7 +686,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
 	{
-		if (context->ZBUF.PSM == PSM_PSMZ24)
+		if (m_context->ZBUF.PSM == PSM_PSMZ24)
 		{
 			if (m_vt.m_max.p.z > 0xffffff)
 			{
@ -683,7 +700,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 				}
 			}
 		}
-		else if (context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
+		else if (m_context->ZBUF.PSM == PSM_PSMZ16 || m_context->ZBUF.PSM == PSM_PSMZ16S)
 		{
 			if (m_vt.m_max.p.z > 0xffff)
 			{
@ -702,8 +719,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 	// FIXME Opengl support half pixel center (as dx10). Code could be easier!!!
 	float sx = 2.0f * rtscale.x / (rtsize.x << 4);
 	float sy = 2.0f * rtscale.y / (rtsize.y << 4);
-	float ox = (float)(int)context->XYOFFSET.OFX;
-	float oy = (float)(int)context->XYOFFSET.OFY;
+	float ox = (float)(int)m_context->XYOFFSET.OFX;
+	float oy = (float)(int)m_context->XYOFFSET.OFY;
 	float ox2 = -1.0f / rtsize.x;
 	float oy2 = -1.0f / rtsize.y;

@ -727,30 +744,30 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 	ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;

 	if (DATE_GL45) {
-		ps_sel.date = 5 + context->TEST.DATM;
+		ps_sel.date = 5 + m_context->TEST.DATM;
 	} else if (DATE) {
 		if (DATE_GL42)
-			ps_sel.date = 1 + context->TEST.DATM;
+			ps_sel.date = 1 + m_context->TEST.DATM;
 		else
 			om_dssel.date = 1;
 	}

-	ps_sel.fba = context->FBA.FBA;
+	ps_sel.fba = m_context->FBA.FBA;

 	if (PRIM->FGE)
 	{
 		ps_sel.fog = 1;

-		ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]);
+		ps_cb.FogColor_AREF = GSVector4::rgba32(m_env.FOGCOL.u32[0]);
 	}

-	if (context->TEST.ATE)
-		ps_sel.atst = context->TEST.ATST;
+	if (m_context->TEST.ATE)
+		ps_sel.atst = m_context->TEST.ATST;
 	else
 		ps_sel.atst = ATST_ALWAYS;

-	if (context->TEST.ATE && context->TEST.ATST > 1)
-		ps_cb.FogColor_AREF.a = (float)context->TEST.AREF;
+	if (m_context->TEST.ATE && m_context->TEST.ATST > 1)
+		ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;

 	// By default don't use texture
 	ps_sel.tfx = 4;
@ -759,23 +776,23 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	if (tex)
 	{
-		const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM];
-		const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[context->TEX0.CPSM] : psm;
+		const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
+		const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
 		bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0;
-		bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3;
+		bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 3 && m_context->CLAMP.WMT < 3;
 		// Don't force extra filtering on sprite (it creates various upscaling issue)
 		bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear());

-		ps_sel.wms = context->CLAMP.WMS;
-		ps_sel.wmt = context->CLAMP.WMT;
+		ps_sel.wms = m_context->CLAMP.WMS;
+		ps_sel.wmt = m_context->CLAMP.WMT;

 		if (ps_sel.shuffle) {
 			ps_sel.fmt = 0;
 		} else if (tex->m_palette) {
 			ps_sel.fmt = cpsm.fmt | 4;
 			ps_sel.ifmt = !tex->m_target ? 0
-				: (context->TEX0.PSM == PSM_PSMT4HL) ? 2
-				: (context->TEX0.PSM == PSM_PSMT4HH) ? 1
+				: (m_context->TEX0.PSM == PSM_PSMT4HL) ? 2
+				: (m_context->TEX0.PSM == PSM_PSMT4HH) ? 1
 				: 0;

 			// In standard mode palette is only used when alpha channel of the RT is
@ -788,16 +805,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		} else {
 			ps_sel.fmt = cpsm.fmt;
 		}
-		ps_sel.aem = env.TEXA.AEM;
+		ps_sel.aem = m_env.TEXA.AEM;

-		if (context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
+		if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
 			// Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
 			ps_sel.tfx = TFX_DECAL;
 		} else {
-			ps_sel.tfx = context->TEX0.TFX;
+			ps_sel.tfx = m_context->TEX0.TFX;
 		}

-		ps_sel.tcc = context->TEX0.TCC;
+		ps_sel.tcc = m_context->TEX0.TCC;

 		ps_sel.ltf = bilinear && !simple_sample;
 		spritehack = tex->m_spritehack_t;
@ -808,8 +825,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		int w = tex->m_texture->GetWidth();
 		int h = tex->m_texture->GetHeight();

-		int tw = (int)(1 << context->TEX0.TW);
-		int th = (int)(1 << context->TEX0.TH);
+		int tw = (int)(1 << m_context->TEX0.TW);
+		int th = (int)(1 << m_context->TEX0.TH);

 		GSVector4 WH(tw, th, w, h);

@ -821,20 +838,20 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 		ps_cb.WH = WH;
 		ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
-		ps_cb.MskFix = GSVector4i(context->CLAMP.MINU, context->CLAMP.MINV, context->CLAMP.MAXU, context->CLAMP.MAXV);
+		ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);

 		// TC Offset Hack
 		ps_sel.tcoffsethack = !!UserHacks_TCOffset;
 		ps_cb.TC_OffsetHack = GSVector4(UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();

 		GSVector4 clamp(ps_cb.MskFix);
-		GSVector4 ta(env.TEXA & GSVector4i::x000000ff());
+		GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());

 		ps_cb.MinMax = clamp / WH.xyxy();
 		ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));

-		ps_ssel.tau = (context->CLAMP.WMS + 3) >> 1;
-		ps_ssel.tav = (context->CLAMP.WMT + 3) >> 1;
+		ps_ssel.tau = (m_context->CLAMP.WMS + 3) >> 1;
+		ps_ssel.tav = (m_context->CLAMP.WMT + 3) >> 1;
 		ps_ssel.ltf = bilinear && simple_sample;

 		// Setup Texture ressources
@ -868,6 +885,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		dev->PSSetShaderResource(1, NULL);
 #endif
 	}
+	// Always bind the RT. This way special effect can use it.
+	dev->PSSetShaderResource(3, rt);
+

 	// GS

@ -894,7 +914,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	// rs

-	GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
+	GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * m_context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());

 	GL_PUSH("IA");
 	SetupIA();
@ -938,34 +958,24 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		GL_POP();
 	}

-	dev->OMSetRenderTargets(rt, ds, &scissor);
+	if (ps_sel.hdr) {
+		hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F);

-	if (context->TEST.DoFirstPass())
-	{
-		SendDraw(require_barrier);
+		dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false);

-		if (ps_sel.colclip == 1)
-		{
-			ASSERT(!om_bsel.ps);
-			GL_PUSH("COLCLIP");
-			GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
-			GSDeviceOGL::PSSelector ps_selneg(ps_sel);
-
-			om_bselneg.negative = 1;
-			ps_selneg.colclip = 2;
-
-			dev->SetupOM(om_dssel, om_bselneg, afix);
-			dev->SetupPS(ps_selneg);
-
-			SendDraw(false);
-			dev->SetupOM(om_dssel, om_bsel, afix);
-			GL_POP();
-		}
+		dev->OMSetRenderTargets(hdr_rt, ds, &scissor);
+	} else {
+		dev->OMSetRenderTargets(rt, ds, &scissor);
 	}

-	if (context->TEST.DoSecondPass())
+	if (m_context->TEST.DoFirstPass())
 	{
-		ASSERT(!env.PABE.PABE);
+		SendDraw(require_barrier);
+	}
+
+	if (m_context->TEST.DoSecondPass())
+	{
+		ASSERT(!m_env.PABE.PABE);

 		static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};

@ -982,7 +992,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		bool b = om_csel.wb;
 		bool a = om_csel.wa;

-		switch(context->TEST.AFAIL)
+		switch(m_context->TEST.AFAIL)
 		{
 			case AFAIL_KEEP: z = r = g = b = a = false; break; // none
 			case AFAIL_FB_ONLY: z = false; break; // rgba
@ -1003,29 +1013,24 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 			dev->SetupOM(om_dssel, om_bsel, afix);

 			SendDraw(require_barrier);
-
-			if (ps_sel.colclip == 1)
-			{
-				ASSERT(!om_bsel.ps);
-				GL_PUSH("COLCLIP");
-				GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
-				GSDeviceOGL::PSSelector ps_selneg(ps_sel);
-
-				om_bselneg.negative = 1;
-				ps_selneg.colclip = 2;
-
-				dev->SetupOM(om_dssel, om_bselneg, afix);
-				dev->SetupPS(ps_selneg);
-
-				SendDraw(false);
-				GL_POP();
-			}
 		}
 	}
-	if (DATE_GL42)
+
+	if (DATE_GL42) {
 		dev->RecycleDateTexture();
+	}

 	dev->EndScene();

+	// Warning: EndScene must be called before StretchRect otherwise
+	// vertices will be overwritten. Trust me you don't want to do that.
+	if (hdr_rt) {
+		GSVector4 dRect(ComputeBoundingBox(rtscale, rtsize));
+		GSVector4 sRect = dRect / GSVector4(rtsize.x, rtsize.y).xyxy();
+		dev->StretchRect(hdr_rt, sRect, rt, dRect, 4, false);
+
+		dev->Recycle(hdr_rt);
+	}
+
 	GL_POP();
 }
--- a/plugins/GSdx/GSRendererOGL.h
+++ b/plugins/GSdx/GSRendererOGL.h
@ -39,7 +39,7 @@ class GSRendererOGL : public GSRendererHW
 		ACC_BLEND_NONE = 0,
 		ACC_BLEND_FREE = 1,
 		ACC_BLEND_SPRITE = 2,
-		ACC_BLEND_CCLIP = 3,
+		ACC_BLEND_CCLIP_DALPHA = 3,
 		ACC_BLEND_FULL = 4,
 		ACC_BLEND_ULTRA = 5
 	};
@ -54,10 +54,13 @@ class GSRendererOGL : public GSRendererHW

 		PRIM_OVERLAP m_prim_overlap;

+		GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
+
 	protected:
 		void EmulateGS();
 		void SetupIA();
 		bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel, GSDeviceOGL::PSConstantBuffer& ps_cb);
+		bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMBlendSelector& om_bsel, GSDeviceOGL::PSConstantBuffer& ps_cb, float afix, bool DATE_GL42);

 	public:
 		GSRendererOGL();
--- a/plugins/GSdx/GSSetting.cpp
+++ b/plugins/GSdx/GSSetting.cpp
@ -123,11 +123,11 @@ const char* dialog_message(int ID, bool* updateText) {
 				"------------------------------------------------------------------\n"
 				"Basic\t: Emulate correctly most of the effects with a limited speed penality. It is the recommended setting.\n"
 				"------------------------------------------------------------------\n"
-				"Medium\t: Add emulation of all sprites. Performance impact remains reasonable in 3D game.\n"
+				"Medium\t: Extend it to all sprites. Performance impact remains reasonable in 3D game.\n"
 				"------------------------------------------------------------------\n"
-				"High\t: Add full emulation of color wrapping. It helps Castlevania games. Be aware that it will half your FPS.\n"
+				"High\t: Extend it to destination alpha blending and color wrapping. (help shadow and fog effect). A good CPU is required\n"
 				"------------------------------------------------------------------\n"
-				"Full\t\t: Except few cases, the blending unit will be fully emulated by the shader. It is very slow! It is intended for debug\n"
+				"Full\t\t: Except few cases, the blending unit will be fully emulated by the shader. It is ultra slow! It is intended for debug\n"
 				"------------------------------------------------------------------\n"
 				"Ultra\t: The blending unit will be completely emulated by the shader. It is ultra slow! It is intended for debug\n";
 #endif
--- a/plugins/GSdx/GSTextureCache.cpp
+++ b/plugins/GSdx/GSTextureCache.cpp
@ -283,7 +283,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int

 				if (type == DepthStencil) {
 					GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
-					int shader = (TEX0.PSM & 1) ? 13 : 12;
+					int shader = 12 + GSLocalMemory::m_psm[TEX0.PSM].fmt;
+					ASSERT(shader <= 14);
 					m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false);
 				} else {
 					GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
@ -861,7 +862,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con

 		if (is_8bits) {
 			GL_INS("Reading RT as a packed-indexed 8 bits format");
-			shader = 14; // ask a conversion to 8 bits format
+			shader = 15; // ask a conversion to 8 bits format
 		}

 #ifdef ENABLE_OGL_DEBUG
--- a/plugins/GSdx/GSTextureFXOGL.cpp
+++ b/plugins/GSdx/GSTextureFXOGL.cpp
@ -40,7 +40,7 @@ void GSDeviceOGL::CreateTextureFX()

 	// Pre compile all Geometry & Vertex Shader
 	// It might cost a seconds at startup but it would reduce benchmark pollution
-	for (uint32 key = 0; key < GSSelector::size(); key++) {
+	for (uint32 key = 0; key < countof(m_gs); key++) {
 		GSSelector sel(key);
 		if (sel.point == sel.sprite)
 			m_gs[key] = 0;
@ -48,7 +48,7 @@ void GSDeviceOGL::CreateTextureFX()
 			m_gs[key] = CompileGS(GSSelector(key));
 	}

-	for (uint32 key = 0; key < VSSelector::size(); key++) {
+	for (uint32 key = 0; key < countof(m_vs); key++) {
 		// wildhack is only useful if both TME and FST are enabled.
 		VSSelector sel(key);
 		if (sel.wildhack && (!sel.tme || !sel.fst))
@ -61,8 +61,9 @@ void GSDeviceOGL::CreateTextureFX()
 	// enough but buffer is polluted with noise. Clear will be limited
 	// to the mask.
 	glStencilMask(0xFF);
-	for (uint32 key = 0; key < OMDepthStencilSelector::size(); key++)
+	for (uint32 key = 0; key < countof(m_om_dss); key++) {
 		m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
+	}

 	// Help to debug FS in apitrace
 	m_apitrace = CompilePS(PSSelector());
@ -77,7 +78,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
 	if (dssel.date)
 	{
 		dss->EnableStencil();
-		dss->SetStencil(GL_EQUAL, dssel.alpha_stencil ? GL_ZERO : GL_KEEP);
+		dss->SetStencil(GL_EQUAL, GL_KEEP);
 	}

 	if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
@ -104,30 +105,8 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix)
 	{
 		int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;

-		if (bsel.accu)
-			bs->SetRGB(GL_FUNC_ADD, GL_ONE, GL_ONE);
-		else
-			bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
-
-		if (m_blendMapD3D9[i].bogus & A_MAX) {
-			if (!theApp.GetConfig("accurate_blending_unit", 1)) {
-				bs->EnableBlend();
-				if (bsel.a == 0)
-					bs->SetRGB(m_blendMapD3D9[i].op, GL_ONE, m_blendMapD3D9[i].dst);
-				else
-					bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
-			}
-
-			const string afixstr = format("%f", afix);
-			const char *col[3] = {"Cs", "Cd", "0"};
-			const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
-			fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
-		} else {
-			bs->EnableBlend();
-		}
-
-		// Not very good but I don't wanna write another 81 row table
-		if(bsel.negative) bs->RevertOp();
+		bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
+		bs->EnableBlend();
 	}

 	return bs;
@ -196,15 +175,6 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, fl

 	OMSetDepthStencilState(dss, 1);

-	if (bsel.ps && !bsel.accu) {
-		if (GLState::blend) {
-			GLState::blend = false;
-			glDisable(GL_BLEND);
-		}
-		// No hardware blending thank
-		return;
-	}
-
 	// *************************************************************
 	// Static
 	// *************************************************************
--- a/plugins/GSdx/GSdx.cpp
+++ b/plugins/GSdx/GSdx.cpp
@ -187,9 +187,9 @@ GSdxApp::GSdxApp()
 	m_gs_crc_level.push_back(GSSetting(4 , "Aggressive", ""));

 	m_gs_acc_blend_level.push_back(GSSetting(0, "None", "Fastest"));
-	m_gs_acc_blend_level.push_back(GSSetting(1, "Basic", "Recommended"));
+	m_gs_acc_blend_level.push_back(GSSetting(1, "Basic", "Recommended low-end PC"));
 	m_gs_acc_blend_level.push_back(GSSetting(2, "Medium", ""));
-	m_gs_acc_blend_level.push_back(GSSetting(3, "High", "Slow"));
+	m_gs_acc_blend_level.push_back(GSSetting(3, "High", "Recommended high-end PC"));
 	m_gs_acc_blend_level.push_back(GSSetting(4, "Full", "Very Slow"));
 	m_gs_acc_blend_level.push_back(GSSetting(5, "Ultra", "Ultra Slow"));

--- a/plugins/GSdx/res/glsl/convert.glsl
+++ b/plugins/GSdx/res/glsl/convert.glsl
@ -194,17 +194,30 @@ void ps_main12()
 //out float gl_FragDepth;
 void ps_main13()
 {
-	// Same as above but without the alpha channel
+	// Same as above but without the alpha channel (24 bits Z)

 	// Convert a RRGBA texture into a float depth texture
 	// FIXME: I'm afraid of the accuracy
-	const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);
-	gl_FragDepth = dot(sample_c(), bitSh);
+	const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0);
+	gl_FragDepth = dot(sample_c().rgb, bitSh);
 }
 #endif

 #ifdef ps_main14
+//out float gl_FragDepth;
 void ps_main14()
+{
+	// Same as above but without the A/B channels (16 bits Z)
+
+	// Convert a RRGBA texture into a float depth texture
+	// FIXME: I'm afraid of the accuracy
+	const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0);
+	gl_FragDepth = dot(sample_c().rg, bitSh);
+}
+#endif
+
+#ifdef ps_main15
+void ps_main15()
 {

    // Potential speed optimization. There is a high probability that
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -352,17 +352,6 @@ void atst(vec4 C)
 #endif
 }

-void colclip(inout vec4 C)
-{
-#if (PS_COLCLIP == 2)
-	C.rgb = 256.0f - C.rgb;
-#endif
-#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)
-	bvec3 factor = lessThan(C.rgb, vec3(128.0f));
-	C.rgb *= vec3(factor);
-#endif
-}
-
 void fog(inout vec4 C, float f)
 {
 #if PS_FOG != 0
@ -384,8 +373,6 @@ vec4 ps_color()

 	fog(C, PSin_t.z);

-	colclip(C);
-
 #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
 	C.rgb = vec3(255.0f);
 #endif
@ -453,9 +440,6 @@ void ps_blend(inout vec4 Color, float As)

 #if PS_BLEND_A == PS_BLEND_B
    Color.rgb = D;
-#elif PS_BLEND_ACCU == 1
-	// The D addition will be done in the blending unit
-	Color.rgb = trunc(A * C);
 #else
    Color.rgb = trunc((A - B) * C + D);
 #endif
@ -463,7 +447,7 @@ void ps_blend(inout vec4 Color, float As)
 	// FIXME dithering

 	// Correct the Color value based on the output format
-#if PS_COLCLIP != 3
+#if PS_COLCLIP == 0
 	// Standard Clamp
 	Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));
 #endif
@ -478,7 +462,7 @@ void ps_blend(inout vec4 Color, float As)
 	// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania

 	Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));
-#elif PS_COLCLIP == 3
+#elif PS_COLCLIP == 1
 	Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));
 #endif

@ -612,6 +596,12 @@ void ps_main()

 	ps_fbmask(C);

+#if PS_HDR == 1
+	// Use negative value to avoid overflow of the texture (in accumulation mode)
+	if (any(greaterThan(C.rgb, vec3(128.0f)))) {
+		C.rgb = (C.rgb - 256.0f);
+	}
+#endif
 	SV_Target0 = C / 255.0f;
 	SV_Target1 = vec4(alpha_blend);
 }
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -219,18 +219,31 @@ static const char* convert_glsl =
 	"//out float gl_FragDepth;\n"
 	"void ps_main13()\n"
 	"{\n"
-	"	// Same as above but without the alpha channel\n"
+	"	// Same as above but without the alpha channel (24 bits Z)\n"
 	"\n"
 	"	// Convert a RRGBA texture into a float depth texture\n"
 	"	// FIXME: I'm afraid of the accuracy\n"
-	"	const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);\n"
-	"	gl_FragDepth = dot(sample_c(), bitSh);\n"
+	"	const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0);\n"
+	"	gl_FragDepth = dot(sample_c().rgb, bitSh);\n"
 	"}\n"
 	"#endif\n"
 	"\n"
 	"#ifdef ps_main14\n"
+	"//out float gl_FragDepth;\n"
 	"void ps_main14()\n"
 	"{\n"
+	"	// Same as above but without the A/B channels (16 bits Z)\n"
+	"\n"
+	"	// Convert a RRGBA texture into a float depth texture\n"
+	"	// FIXME: I'm afraid of the accuracy\n"
+	"	const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0);\n"
+	"	gl_FragDepth = dot(sample_c().rg, bitSh);\n"
+	"}\n"
+	"#endif\n"
+	"\n"
+	"#ifdef ps_main15\n"
+	"void ps_main15()\n"
+	"{\n"
 	"\n"
 	"    // Potential speed optimization. There is a high probability that\n"
 	"    // game only want to extract a single channel (blue). It will allow\n"
@ -535,6 +548,9 @@ static const char* shadeboost_glsl =
 	"** Contrast, saturation, brightness\n"
 	"** Code of this function is from TGM's shader pack\n"
 	"** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057\n"
+	"** TGM's author comment about the license (included in the previous link)\n"
+	"** \"do with it, what you want! its total free!\n"
+	"** (but would be nice, if you say that you used my shaders  :wink: ) but not necessary\"\n"
 	"*/\n"
 	"\n"
 	"struct vertex_basic\n"
@ -1211,17 +1227,6 @@ static const char* tfx_fs_all_glsl =
 	"#endif\n"
 	"}\n"
 	"\n"
-	"void colclip(inout vec4 C)\n"
-	"{\n"
-	"#if (PS_COLCLIP == 2)\n"
-	"	C.rgb = 256.0f - C.rgb;\n"
-	"#endif\n"
-	"#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n"
-	"	bvec3 factor = lessThan(C.rgb, vec3(128.0f));\n"
-	"	C.rgb *= vec3(factor);\n"
-	"#endif\n"
-	"}\n"
-	"\n"
 	"void fog(inout vec4 C, float f)\n"
 	"{\n"
 	"#if PS_FOG != 0\n"
@ -1243,8 +1248,6 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"	fog(C, PSin_t.z);\n"
 	"\n"
-	"	colclip(C);\n"
-	"\n"
 	"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
 	"	C.rgb = vec3(255.0f);\n"
 	"#endif\n"
@ -1312,9 +1315,6 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"#if PS_BLEND_A == PS_BLEND_B\n"
 	"    Color.rgb = D;\n"
-	"#elif PS_BLEND_ACCU == 1\n"
-	"	// The D addition will be done in the blending unit\n"
-	"	Color.rgb = trunc(A * C);\n"
 	"#else\n"
 	"    Color.rgb = trunc((A - B) * C + D);\n"
 	"#endif\n"
@ -1322,7 +1322,7 @@ static const char* tfx_fs_all_glsl =
 	"	// FIXME dithering\n"
 	"\n"
 	"	// Correct the Color value based on the output format\n"
-	"#if PS_COLCLIP != 3\n"
+	"#if PS_COLCLIP == 0\n"
 	"	// Standard Clamp\n"
 	"	Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
 	"#endif\n"
@ -1337,7 +1337,7 @@ static const char* tfx_fs_all_glsl =
 	"	// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
 	"\n"
 	"	Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n"
-	"#elif PS_COLCLIP == 3\n"
+	"#elif PS_COLCLIP == 1\n"
 	"	Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n"
 	"#endif\n"
 	"\n"
@ -1471,6 +1471,12 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"	ps_fbmask(C);\n"
 	"\n"
+	"#if PS_HDR == 1\n"
+	"	// Use negative value to avoid overflow of the texture (in accumulation mode)\n"
+	"	if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n"
+	"		C.rgb = (C.rgb - 256.0f);\n"
+	"	}\n"
+	"#endif\n"
 	"	SV_Target0 = C / 255.0f;\n"
 	"	SV_Target1 = vec4(alpha_blend);\n"
 	"}\n"