diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp
index 7230316708..84c9915af6 100644
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@@ -626,6 +626,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
 		+ format("#define PS_WMS %d\n", sel.wms)
 		+ format("#define PS_WMT %d\n", sel.wmt)
 		+ format("#define PS_FMT %d\n", sel.fmt)
+		+ format("#define PS_IFMT %d\n", sel.ifmt)
 		+ format("#define PS_AEM %d\n", sel.aem)
 		+ format("#define PS_TFX %d\n", sel.tfx)
 		+ format("#define PS_TCC %d\n", sel.tcc)
diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h
index be659cf713..c46df0dbb9 100644
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@@ -315,8 +315,9 @@ class GSDeviceOGL : public GSDevice
 				uint32 wms:2;
 				uint32 wmt:2;
 				uint32 ltf:1;
+				uint32 ifmt:2;
 
-				uint32 _free1:4;
+				uint32 _free1:2;
 
 				// Word 2
 				uint32 blend:8;
diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp
index 99c2156fae..a64643f246 100644
--- a/plugins/GSdx/GSRendererOGL.cpp
+++ b/plugins/GSdx/GSRendererOGL.cpp
@@ -506,7 +506,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 
 		ps_sel.wms = context->CLAMP.WMS;
 		ps_sel.wmt = context->CLAMP.WMT;
-		ps_sel.fmt = tex->m_palette ? cpsm.fmt | 4 : cpsm.fmt;
+		if (tex->m_palette) {
+			ps_sel.fmt = cpsm.fmt | 4;
+			ps_sel.ifmt = (context->TEX0.PSM == 0x1B) ? 3
+				: (context->TEX0.PSM == 0x24) ? 2
+				: (context->TEX0.PSM == 0x2C) ? 1
+				: 0;
+			GL_INS("Use palette with format %d and index format %d", ps_sel.fmt, ps_sel.ifmt);
+		} else {
+			ps_sel.fmt = cpsm.fmt;
+		}
 		ps_sel.aem = env.TEXA.AEM;
 		ps_sel.tfx = context->TEX0.TFX;
 		ps_sel.tcc = context->TEX0.TCC;
@@ -573,6 +582,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 #ifdef ENABLE_OGL_DEBUG
 		// Unattach texture to avoid noise in debugger
 		dev->PSSetShaderResource(0, NULL);
+		dev->PSSetShaderResource(1, NULL);
 #endif
 	}
 
diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl
index c9f6d20a39..a36fcb1444 100644
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@@ -99,11 +99,9 @@ vec4 sample_c(vec2 uv)
 	return texture(TextureSampler, uv);
 }
 
-vec4 sample_p(float u)
+vec4 sample_p(uint idx)
 {
-	//FIXME do we need a 1D sampler. Big impact on opengl to find 1 dim
-	// So for the moment cheat with 0.0f dunno if it work
-	return texture(PaletteSampler, vec2(u, 0.0f));
+	return texelFetch(PaletteSampler, ivec2(idx, 0u), 0);
 }
 
 vec4 wrapuv(vec4 uv)
@@ -168,21 +166,47 @@ mat4 sample_4c(vec4 uv)
 	return c;
 }
 
-vec4 sample_4a(vec4 uv)
+uvec4 sample_4_index(vec4 uv)
 {
 	vec4 c;
 
-	// Dx used the alpha channel.
-	// Opengl is only 8 bits on red channel.
+	// Either GSdx will send a texture that contains a single channel
+	// in this case we must use the red channel (whereas Dx uses alpha)
+	//
+	// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
+
+#if PS_IFMT == 0
+	// Single channel texture
 	c.x = sample_c(uv.xy).r;
 	c.y = sample_c(uv.zy).r;
 	c.z = sample_c(uv.xw).r;
 	c.w = sample_c(uv.zw).r;
+	//return c * 255.0/256.0 + 0.5/256.0;
+#else
+	// 4 channels texture
+	c.x = sample_c(uv.xy).a;
+	c.y = sample_c(uv.zy).a;
+	c.z = sample_c(uv.xw).a;
+	c.w = sample_c(uv.zw).a;
+#endif
+
+	uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
+	//return (i/uint(16)) & uint(0xF);
+
+#if PS_IFMT == 1
+	// 4HH alpha
+	return i >> 4u;
+#elif PS_IFMT == 2
+	// 4HL alpha
+	return i & 16u;
+#else
+	// 8 bits alpha or red
+	return i;
+#endif
 
-	return c * 255.0/256.0 + 0.5/256.0;
 }
 
-mat4 sample_4p(vec4 u)
+mat4 sample_4p(uvec4 u)
 {
 	mat4 c;
 
@@ -231,7 +255,7 @@ vec4 sample_color(vec2 st, float q)
 
 	if((PS_FMT & FMT_PAL) != 0)
 	{
-		c = sample_4p(sample_4a(uv));
+		c = sample_4p(sample_4_index(uv));
 	}
 	else
 	{
diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h
index 3f5083eefa..8a1fa11d27 100644
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@@ -852,11 +852,9 @@ static const char* tfx_fs_all_glsl =
 	"	return texture(TextureSampler, uv);\n"
 	"}\n"
 	"\n"
-	"vec4 sample_p(float u)\n"
+	"vec4 sample_p(uint idx)\n"
 	"{\n"
-	"	//FIXME do we need a 1D sampler. Big impact on opengl to find 1 dim\n"
-	"	// So for the moment cheat with 0.0f dunno if it work\n"
-	"	return texture(PaletteSampler, vec2(u, 0.0f));\n"
+	"	return texelFetch(PaletteSampler, ivec2(idx, 0u), 0);\n"
 	"}\n"
 	"\n"
 	"vec4 wrapuv(vec4 uv)\n"
@@ -921,21 +919,47 @@ static const char* tfx_fs_all_glsl =
 	"	return c;\n"
 	"}\n"
 	"\n"
-	"vec4 sample_4a(vec4 uv)\n"
+	"uvec4 sample_4_index(vec4 uv)\n"
 	"{\n"
 	"	vec4 c;\n"
 	"\n"
-	"	// Dx used the alpha channel.\n"
-	"	// Opengl is only 8 bits on red channel.\n"
+	"	// Either GSdx will send a texture that contains a single channel\n"
+	"	// in this case we must use the red channel (whereas Dx uses alpha)\n"
+	"	//\n"
+	"	// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
+	"\n"
+	"#if PS_IFMT == 0\n"
+	"	// Single channel texture\n"
 	"	c.x = sample_c(uv.xy).r;\n"
 	"	c.y = sample_c(uv.zy).r;\n"
 	"	c.z = sample_c(uv.xw).r;\n"
 	"	c.w = sample_c(uv.zw).r;\n"
+	"	//return c * 255.0/256.0 + 0.5/256.0;\n"
+	"#else\n"
+	"	// 4 channels texture\n"
+	"	c.x = sample_c(uv.xy).a;\n"
+	"	c.y = sample_c(uv.zy).a;\n"
+	"	c.z = sample_c(uv.xw).a;\n"
+	"	c.w = sample_c(uv.zw).a;\n"
+	"#endif\n"
+	"\n"
+	"	uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n"
+	"	//return (i/uint(16)) & uint(0xF);\n"
+	"\n"
+	"#if PS_IFMT == 1\n"
+	"	// 4HH alpha\n"
+	"	return i >> 4u;\n"
+	"#elif PS_IFMT == 2\n"
+	"	// 4HL alpha\n"
+	"	return i & 16u;\n"
+	"#else\n"
+	"	// 8 bits alpha or red\n"
+	"	return i;\n"
+	"#endif\n"
 	"\n"
-	"	return c * 255.0/256.0 + 0.5/256.0;\n"
 	"}\n"
 	"\n"
-	"mat4 sample_4p(vec4 u)\n"
+	"mat4 sample_4p(uvec4 u)\n"
 	"{\n"
 	"	mat4 c;\n"
 	"\n"
@@ -984,7 +1008,7 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"	if((PS_FMT & FMT_PAL) != 0)\n"
 	"	{\n"
-	"		c = sample_4p(sample_4a(uv));\n"
+	"		c = sample_4p(sample_4_index(uv));\n"
 	"	}\n"
 	"	else\n"
 	"	{\n"