diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h
index 031f3d10d9..8dac1fb563 100644
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@@ -191,10 +191,9 @@ class GSDeviceOGL : public GSDevice
 	{
 		GSVector4 FogColor_AREF;
 		GSVector4 WH;
-		GSVector4 MinF_TA;
+		GSVector4 TA_Af;
 		GSVector4i MskFix;
 		GSVector4i FbMask;
-		GSVector4 AlphaCoeff;
 
 		GSVector4 HalfTexel;
 		GSVector4 MinMax;
@@ -206,9 +205,7 @@ class GSDeviceOGL : public GSDevice
 			HalfTexel     = GSVector4::zero();
 			WH            = GSVector4::zero();
 			MinMax        = GSVector4::zero();
-			MinF_TA       = GSVector4::zero();
 			MskFix        = GSVector4i::zero();
-			AlphaCoeff    = GSVector4::zero();
 			TC_OH_TS      = GSVector4::zero();
 			FbMask        = GSVector4i::zero();
 		}
@@ -220,7 +217,7 @@ class GSDeviceOGL : public GSDevice
 
 			// if WH matches both HalfTexel and TC_OH_TS do too
 			// MinMax depends on WH and MskFix so no need to check it too
-			if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue())
+			if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])).alltrue())
 			{
 				// Note previous check uses SSE already, a plain copy will be faster than any memcpy
 				a[0] = b[0];
diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp
index fd39c435c7..347e21ea20 100644
--- a/plugins/GSdx/GSRendererOGL.cpp
+++ b/plugins/GSdx/GSRendererOGL.cpp
@@ -460,7 +460,7 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G
 
 		// Require the fix alpha vlaue
 		if (ALPHA.C == 2) {
-			ps_cb.AlphaCoeff.a = (float)ALPHA.FIX / 128.0f;
+			ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f;
 		}
 
 		// No need to flush for every primitive
@@ -901,8 +901,12 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 			ps_sel.aem     = m_env.TEXA.AEM;
 			ASSERT(tex->m_target);
 
+			// Shuffle is a 16 bits format, so aem is always required
 			GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
-			ps_cb.MinF_TA = ta.xyxy() / 255.0f;
+			ta /= 255.0f;
+			// FIXME rely on compiler for the optimization
+			ps_cb.TA_Af.x = ta.x;
+			ps_cb.TA_Af.y = ta.y;
 
 			// FIXME: it is likely a bad idea to do the bilinear interpolation here
 			// bilinear &= m_vt.IsLinear();
@@ -915,8 +919,14 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 			ps_sel.tex_fmt = cpsm.fmt;
 			ps_sel.aem     = m_env.TEXA.AEM;
 
-			GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
-			ps_cb.MinF_TA = ta.xyxy() / 255.0f;
+			// Don't upload AEM if format is 32 bits
+			if (cpsm.fmt) {
+				GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
+				ta /= 255.0f;
+				// FIXME rely on compiler for the optimization
+				ps_cb.TA_Af.x = ta.x;
+				ps_cb.TA_Af.y = ta.y;
+			}
 
 			// Select the index format
 			if (tex->m_palette) {
diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl
index d56e43433b..91642a8e22 100644
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@@ -82,16 +82,14 @@ layout(std140, binding = 21) uniform cb21
 
     vec4 WH;
 
-    vec2 _pad0;
     vec2 TA;
+    float _pad0;
+    float Af;
 
     uvec4 MskFix;
 
     uvec4 FbMask;
 
-    vec3 _pad1;
-    float Af;
-
     vec4 HalfTexel;
 
     vec4 MinMax;
diff --git a/plugins/GSdx/res/glsl/tfx_vgs.glsl b/plugins/GSdx/res/glsl/tfx_vgs.glsl
index 4f28e90b56..c89720d644 100644
--- a/plugins/GSdx/res/glsl/tfx_vgs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_vgs.glsl
@@ -16,16 +16,14 @@ layout(std140, binding = 21) uniform cb21
 
     vec4 WH;
 
-    vec2 _pad0;
     vec2 TA;
+    float _pad0;
+    float Af;
 
     uvec4 MskFix;
 
     uvec4 FbMask;
 
-    vec3 _pad1;
-    float Af;
-
     vec4 HalfTexel;
 
     vec4 MinMax;
diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h
index 9626b217c8..dc04e09aa1 100644
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@@ -641,16 +641,14 @@ static const char* tfx_vgs_glsl =
 	"\n"
 	"    vec4 WH;\n"
 	"\n"
-	"    vec2 _pad0;\n"
 	"    vec2 TA;\n"
+	"    float _pad0;\n"
+	"    float Af;\n"
 	"\n"
 	"    uvec4 MskFix;\n"
 	"\n"
 	"    uvec4 FbMask;\n"
 	"\n"
-	"    vec3 _pad1;\n"
-	"    float Af;\n"
-	"\n"
 	"    vec4 HalfTexel;\n"
 	"\n"
 	"    vec4 MinMax;\n"
@@ -975,16 +973,14 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"    vec4 WH;\n"
 	"\n"
-	"    vec2 _pad0;\n"
 	"    vec2 TA;\n"
+	"    float _pad0;\n"
+	"    float Af;\n"
 	"\n"
 	"    uvec4 MskFix;\n"
 	"\n"
 	"    uvec4 FbMask;\n"
 	"\n"
-	"    vec3 _pad1;\n"
-	"    float Af;\n"
-	"\n"
 	"    vec4 HalfTexel;\n"
 	"\n"
 	"    vec4 MinMax;\n"