diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp
index 0a43a81c94..73a84fb948 100644
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@@ -584,7 +584,7 @@ void GSDeviceOGL::DebugOutput()
 	} else {
 		if (m_state.rtv != NULL) m_state.rtv->Save(format("/tmp/out_f%d__d%d__tex.bmp", g_frame_count, g_draw_count));
 	}
-	//if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count));
+	if (m_state.dsv != NULL) m_state.dsv->Save(format("/tmp/ds_out_%d.bmp", g_draw_count));
 
 	fprintf(stderr, "\n");
 	//DebugBB();
@@ -668,7 +668,17 @@ void GSDeviceOGL::ClearDepth(GSTexture* t, float c)
 	OMSetFBO(m_fbo);
 	static_cast<GSTextureOGL*>(t)->Attach(GL_DEPTH_STENCIL_ATTACHMENT);
 	// FIXME can you clean depth and stencil separately
-	glClearBufferfv(GL_DEPTH, 0, &c);
+	// XXX: glClear* depends on the scissor test!!! Disable it because the viewport 
+	// could be smaller than the texture and we really want to clean all pixels.
+	glDisable(GL_SCISSOR_TEST);
+	if (m_state.dss != NULL && m_state.dss->IsMaskEnable()) {
+		glClearBufferfv(GL_DEPTH, 0, &c);
+	} else {
+		glDepthMask(true);
+		glClearBufferfv(GL_DEPTH, 0, &c);
+		glDepthMask(false);
+	}
+	glEnable(GL_SCISSOR_TEST);
 	OMSetFBO(fbo_old);
 }
 
diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h
index 07b8bd1486..8ad6b4554e 100644
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@@ -228,6 +228,8 @@ public:
 		if (!m_stencil_enable) return;
 		fprintf(stderr, "Stencil %s. Both pass op %s\n", NameOfParam(m_stencil_func), NameOfParam(m_stencil_spass_dpass_op));
 	}
+
+	bool IsMaskEnable() { return m_depth_mask; }
 };
 
 class GSDeviceOGL : public GSDevice
diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp
index 1eb11535a6..6b08be207b 100644
--- a/plugins/GSdx/GSTextureFXOGL.cpp
+++ b/plugins/GSdx/GSTextureFXOGL.cpp
@@ -46,21 +46,6 @@ void GSDeviceOGL::CreateTextureFX()
 	glSamplerParameteri(m_rt_ss, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
 	// FIXME: need ogl extension sd.MaxAnisotropy = 16;
 
-	//{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//float2 t : TEXCOORD0;
-	//float q : TEXCOORD1;
-	//
-	//{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//uint2 p : POSITION0;
-	//uint z : POSITION1;
-	//
-	//{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
-	//float4 c : COLOR0;
-	//float4 f : COLOR1;
-
 	GSInputLayoutOGL vert_format[] =
 	{
 		// FIXME
@@ -74,6 +59,13 @@ void GSDeviceOGL::CreateTextureFX()
 		{6 , 4 , GL_UNSIGNED_BYTE  , GL_TRUE  , sizeof(GSVertex) , (const GLvoid*)(28) } ,
 	};
 	m_vb = new GSVertexBufferStateOGL(sizeof(GSVertex), vert_format, countof(vert_format));
+
+	// Compile some dummy shaders to allow modification inside Apitrace for debug
+	GLuint dummy;
+	std::string macro = "";
+	CompileShaderFromSource("tfx.glsl", "vs_main", GL_VERTEX_SHADER, &dummy, macro);
+	CompileShaderFromSource("tfx.glsl", "gs_main", GL_GEOMETRY_SHADER, &dummy, macro);
+	CompileShaderFromSource("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, &dummy, macro);
 }
 
 void GSDeviceOGL::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp
index 73aef4a5d9..0cc0b6a8b2 100644
--- a/plugins/GSdx/GSTextureOGL.cpp
+++ b/plugins/GSdx/GSTextureOGL.cpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <limits.h>
 #include "GSTextureOGL.h"
 static int g_state_texture_unit = -1;
 static int g_state_texture_id = -1;
@@ -372,16 +373,22 @@ void GSTextureOGL::Save(const string& fn, const void* image, uint32 pitch)
 
 	for(int h = m_size.y; h > 0; h--, data -= pitch)
 	{
-		if (IsDss()) {
+		if (false && IsDss()) {
 			// Only get the depth and convert it to an integer
 			uint8* better_data = data;
 			for (int w = m_size.x; w > 0; w--, better_data += 8) {
 				float* input = (float*)better_data;
 				// FIXME how to dump 32 bits value into 8bits component color
-				uint32 depth = (uint32)ldexpf(*input, 32);
-				uint8 small_depth = depth >> 24;
-				uint8 better_data[4] = {small_depth, small_depth, small_depth, 0 };
-				fwrite(&better_data, 1, 4, fp);
+				GLuint depth_integer = (GLuint)(*input * (float)UINT_MAX);
+				uint8 r = (depth_integer >>  0) & 0xFF;
+				uint8 g = (depth_integer >>  8) & 0xFF;
+				uint8 b = (depth_integer >> 16) & 0xFF;
+				uint8 a = (depth_integer >> 24) & 0xFF;
+
+				fwrite(&r, 1, 1, fp);
+				fwrite(&g, 1, 1, fp);
+				fwrite(&b, 1, 1, fp);
+				fwrite(&a, 1, 1, fp);
 			}
 		} else {
 			// swap red and blue
@@ -402,7 +409,6 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
 {
 	// Collect the texture data
 	uint32 pitch = 4 * m_size.x;
-	if (IsDss()) pitch *= 2;
 	char* image = (char*)malloc(pitch * m_size.y);
 	bool status = true;
 
@@ -413,8 +419,14 @@ bool GSTextureOGL::Save(const string& fn, bool dds)
 		//glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
 		glReadPixels(0, 0, m_size.x, m_size.y, GL_RGBA, GL_UNSIGNED_BYTE, image);
 	} else if(IsDss()) {
-		EnableUnit(0);
-		glGetTexImage(m_texture_target, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, image);
+		glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
+
+		//EnableUnit(0);
+
+		glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, m_texture_target, m_texture_id, 0);
+		glReadPixels(0, 0, m_size.x, m_size.y, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, image);
+
+		glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
 	} else {
 		glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
 
diff --git a/plugins/GSdx/res/tfx.glsl b/plugins/GSdx/res/tfx.glsl
index dda0f1b7b2..7df9c2226b 100644
--- a/plugins/GSdx/res/tfx.glsl
+++ b/plugins/GSdx/res/tfx.glsl
@@ -11,6 +11,7 @@
 #define VS_BPPZ 0
 #define VS_TME 1
 #define VS_FST 1
+#define VS_LOGZ 0
 #endif
 
 #ifndef GS_IIP
@@ -57,74 +58,74 @@ layout(location = 6) in vec4  i_f;
 layout(location = 0) out vertex VSout;
 
 out gl_PerVertex {
-    vec4 gl_Position;
+    invariant vec4 gl_Position;
     float gl_PointSize;
     float gl_ClipDistance[];
 };
 
 layout(std140, binding = 4) uniform cb0
 {
-	vec4 VertexScale;
-	vec4 VertexOffset;
-	vec2 TextureScale;
+    vec4 VertexScale;
+    vec4 VertexOffset;
+    vec2 TextureScale;
 };
 
 void vs_main()
 {
     uint z;
-	if(VS_BPPZ == 1) // 24
-		z = i_z & uint(0xffffff);
-	else if(VS_BPPZ == 2) // 16
-		z = i_z & uint(0xffff);
+    if(VS_BPPZ == 1) // 24
+        z = i_z & uint(0xffffff);
+    else if(VS_BPPZ == 2) // 16
+        z = i_z & uint(0xffff);
     else
         z = i_z;
 
-	// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
-	// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
-	// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
-	// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
+    // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
+    // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
+    // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
+    // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
 
-	vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); 
-	vec4 final_p = p * VertexScale - VertexOffset;
+    vec4 p = vec4(i_p, z, 0) - vec4(0.05f, 0.05f, 0, 0); 
+    vec4 final_p = p * VertexScale - VertexOffset;
     // FIXME
     // FLIP vertically
     final_p.y *= -1.0f;
 
-	if(VS_LOGZ == 1)
-	{
-		final_p.z = log2(1.0f + float(z)) / 32.0f;
-	}
+    if(VS_LOGZ == 1)
+    {
+        final_p.z = log2(1.0f + float(z)) / 32.0f;
+    }
 
-	VSout.p = final_p;
+    VSout.p = final_p;
     gl_Position = final_p; // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position
 #if VS_RTCOPY
-	VSout.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
+    VSout.tp = final_p * vec4(0.5, -0.5, 0, 0) + 0.5;
 #endif
 
 
-	if(VS_TME != 0)
-	{
-		if(VS_FST != 0)
-		{
-			//VSout.t.xy = i_t * TextureScale;
-			VSout.t.xy = i_uv * TextureScale;
-			VSout.t.w = 1.0f;
-		}
-		else
-		{
-			//VSout.t.xy = i_t;
-			VSout.t.xy = i_st;
-			VSout.t.w = i_q;
-		}
-	}
-	else
-	{
-		VSout.t.xy = vec2(0.0f, 0.0f);
-		VSout.t.w = 1.0f;
-	}
+    if(VS_TME != 0)
+    {
+        if(VS_FST != 0)
+        {
+            //VSout.t.xy = i_t * TextureScale;
+            VSout.t.xy = i_uv * TextureScale;
+            VSout.t.w = 1.0f;
+        }
+        else
+        {
+            //VSout.t.xy = i_t;
+            VSout.t.xy = i_st;
+            VSout.t.w = i_q;
+        }
+    }
+    else
+    {
+        VSout.t.xy = vec2(0.0f, 0.0f);
+        VSout.t.w = 1.0f;
+    }
 
-	VSout.c = i_c;
-	VSout.t.z = i_f.r;
+    VSout.c = i_c;
+    VSout.t.z = i_f.r;
 }
 
 #endif
@@ -212,17 +213,17 @@ void gs_main()
 
     lt.p.z = rb.p.z;
     lt.t.zw = rb.t.zw;
-	#if GS_IIP == 0
-	lt.c = rb.c;
-	#endif
+#if GS_IIP == 0
+    lt.c = rb.c;
+#endif
 
     vertex lb = rb;
-	lb.p.x = lt.p.x;
-	lb.t.x = lt.t.x;
+    lb.p.x = lt.p.x;
+    lb.t.x = lt.t.x;
 
     vertex rt = rb;
-	rt.p.y = lt.p.y;
-	rt.t.y = lt.t.y;
+    rt.p.y = lt.p.y;
+    rt.t.y = lt.t.y;
 
     // Triangle 1
     gl_Position = lt.p;
@@ -237,9 +238,9 @@ void gs_main()
     GSout = rt;
     EmitVertex();
 
-	EndPrimitive();
+    EndPrimitive();
 
-	// Triangle 2
+    // Triangle 2
     gl_Position = lb.p;
     GSout = lb;
     EmitVertex();
@@ -273,369 +274,366 @@ layout(binding = 2) uniform sampler2D RTCopySampler;
 
 layout(std140, binding = 5) uniform cb1
 {
-	vec3 FogColor;
-	float AREF;
-	vec4 HalfTexel;
-	vec4 WH;
-	vec4 MinMax;
-	vec2 MinF;
-	vec2 TA;
-	uvec4 MskFix;
+    vec3 FogColor;
+    float AREF;
+    vec4 HalfTexel;
+    vec4 WH;
+    vec4 MinMax;
+    vec2 MinF;
+    vec2 TA;
+    uvec4 MskFix;
 };
 
 vec4 sample_c(vec2 uv)
 {
     // FIXME I'm not sure it is a good solution to flip texture
-	return texture(TextureSampler, uv);
+    return texture(TextureSampler, uv);
     //FIXME another way to FLIP vertically
-	//return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );
+    //return texture(TextureSampler, vec2(uv.x, 1.0f-uv.y) );
 }
 
 vec4 sample_p(float u)
 {
     //FIXME do we need a 1D sampler. Big impact on opengl to find 1 dim
     // So for the moment cheat with 0.0f dunno if it work
-	return texture(PaletteSampler, vec2(u, 0.0f));
+    return texture(PaletteSampler, vec2(u, 0.0f));
 }
 
 vec4 sample_rt(vec2 uv)
 {
-	return texture(RTCopySampler, uv);
+    return texture(RTCopySampler, uv);
 }
 
 vec4 wrapuv(vec4 uv)
 {
     vec4 uv_out = uv;
 
-	if(PS_WMS == PS_WMT)
-	{
-		if(PS_WMS == 2)
-		{
-			uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
-		}
-		else if(PS_WMS == 3)
-		{
-			uv_out = vec4(((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy);
-		}
-	}
-	else
-	{
-		if(PS_WMS == 2)
-		{
-			uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
-		}
-		else if(PS_WMS == 3)
-		{
-			uv_out.xz = vec2(((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx);
-		}
-		if(PS_WMT == 2)
-		{
-			uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
-		}
-		else if(PS_WMT == 3)
-		{
-			uv_out.yw = vec2(((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy);
-		}
-	}
-	
-	return uv_out;
+    if(PS_WMS == PS_WMT)
+    {
+        if(PS_WMS == 2)
+        {
+            uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);
+        }
+        else if(PS_WMS == 3)
+        {
+            uv_out = vec4(((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy);
+        }
+    }
+    else
+    {
+        if(PS_WMS == 2)
+        {
+            uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);
+        }
+        else if(PS_WMS == 3)
+        {
+            uv_out.xz = vec2(((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx);
+        }
+        if(PS_WMT == 2)
+        {
+            uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);
+        }
+        else if(PS_WMT == 3)
+        {
+            uv_out.yw = vec2(((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy);
+        }
+    }
+
+    return uv_out;
 }
 
 vec2 clampuv(vec2 uv)
 {
     vec2 uv_out = uv;
 
-	if(PS_WMS == 2 && PS_WMT == 2) 
-	{
-		uv_out = clamp(uv, MinF, MinMax.zw);
-	}
-	else if(PS_WMS == 2)
-	{
-		uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
-	}
-	else if(PS_WMT == 2)
-	{
-		uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
-	}
-	
-	return uv_out;
+    if(PS_WMS == 2 && PS_WMT == 2) 
+    {
+        uv_out = clamp(uv, MinF, MinMax.zw);
+    }
+    else if(PS_WMS == 2)
+    {
+        uv_out.x = clamp(uv.x, MinF.x, MinMax.z);
+    }
+    else if(PS_WMT == 2)
+    {
+        uv_out.y = clamp(uv.y, MinF.y, MinMax.w);
+    }
+
+    return uv_out;
 }
 
 mat4 sample_4c(vec4 uv)
 {
-	mat4 c;
-	
-	c[0] = sample_c(uv.xy);
-	c[1] = sample_c(uv.zy);
-	c[2] = sample_c(uv.xw);
-	c[3] = sample_c(uv.zw);
+    mat4 c;
 
-	return c;
+    c[0] = sample_c(uv.xy);
+    c[1] = sample_c(uv.zy);
+    c[2] = sample_c(uv.xw);
+    c[3] = sample_c(uv.zw);
+
+    return c;
 }
 
 vec4 sample_4a(vec4 uv)
 {
-	vec4 c;
+    vec4 c;
 
-	c.x = sample_c(uv.xy).a;
-	c.y = sample_c(uv.zy).a;
-	c.z = sample_c(uv.xw).a;
-	c.w = sample_c(uv.zw).a;
-	
-	return c;
+    c.x = sample_c(uv.xy).a;
+    c.y = sample_c(uv.zy).a;
+    c.z = sample_c(uv.xw).a;
+    c.w = sample_c(uv.zw).a;
+
+    return c;
 }
 
 mat4 sample_4p(vec4 u)
 {
-	mat4 c;
-	
-	c[0] = sample_p(u.x);
-	c[1] = sample_p(u.y);
-	c[2] = sample_p(u.z);
-	c[3] = sample_p(u.w);
+    mat4 c;
 
-	return c;
+    c[0] = sample_p(u.x);
+    c[1] = sample_p(u.y);
+    c[2] = sample_p(u.z);
+    c[3] = sample_p(u.w);
+
+    return c;
 }
 
 vec4 sample_color(vec2 st, float q)
 {
-	if(PS_FST == 0)
-	{
-		st /= q;
-	}
-	
-	vec4 t;
-	if((PS_FMT <= FMT_16) && (PS_WMS < 3) && (PS_WMT < 3))
-	{
-		t = sample_c(clampuv(st));
-	}
-	else
-	{
-		vec4 uv;
-		vec2 dd;
-		
-		if(PS_LTF != 0)
-		{
-			uv = st.xyxy + HalfTexel;
-			dd = fract(uv.xy * WH.zw); 
-		}
-		else
-		{
-			uv = st.xyxy;
-		}
-		
-		uv = wrapuv(uv);
+    if(PS_FST == 0)
+    {
+        st /= q;
+    }
 
-		mat4 c;
+    vec4 t;
+    if((PS_FMT <= FMT_16) && (PS_WMS < 3) && (PS_WMT < 3))
+    {
+        t = sample_c(clampuv(st));
+    }
+    else
+    {
+        vec4 uv;
+        vec2 dd;
 
-		if((PS_FMT & FMT_PAL) != 0)
-			c = sample_4p(sample_4a(uv));
-		else
-			c = sample_4c(uv);
+        if(PS_LTF != 0)
+        {
+            uv = st.xyxy + HalfTexel;
+            dd = fract(uv.xy * WH.zw); 
+        }
+        else
+        {
+            uv = st.xyxy;
+        }
 
-		// PERF: see the impact of the exansion before/after the interpolation
-		for (int i = 0; i < 4; i++) {
-			if((PS_FMT & ~FMT_PAL) == FMT_16)
-			{
-				// FIXME GLSL any only support bvec so try to mix it with notEqual
-				bvec3 rgb_check = notEqual( t.rgb, vec3(0.0f, 0.0f, 0.0f) );
-				t.a = t.a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; 
-			}
-		}
+        uv = wrapuv(uv);
 
-		if(PS_LTF != 0)
-		{
-			t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
-		}
-		else
-		{
-			t = c[0];
-		}
-	}
-	
-	if(PS_FMT == FMT_24)
-	{
+        mat4 c;
+
+        if((PS_FMT & FMT_PAL) != 0)
+            c = sample_4p(sample_4a(uv));
+        else
+            c = sample_4c(uv);
+
+        // PERF: see the impact of the exansion before/after the interpolation
+        for (int i = 0; i < 4; i++) {
+            if((PS_FMT & ~FMT_PAL) == FMT_16)
+            {
+                // FIXME GLSL any only support bvec so try to mix it with notEqual
+                bvec3 rgb_check = notEqual( t.rgb, vec3(0.0f, 0.0f, 0.0f) );
+                t.a = t.a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; 
+            }
+        }
+
+        if(PS_LTF != 0)
+        {
+            t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);
+        }
+        else
+        {
+            t = c[0];
+        }
+    }
+
+    if(PS_FMT == FMT_24)
+    {
         // FIXME GLSL any only support bvec so try to mix it with notEqual
         bvec3 rgb_check = notEqual( t.rgb, vec3(0.0f, 0.0f, 0.0f) );
-		t.a = ( (PS_AEM == 0) || any(rgb_check)  ) ? TA.x : 0.0f;
-	}
-	else if(PS_FMT == FMT_16)
-	{
-		// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
+        t.a = ( (PS_AEM == 0) || any(rgb_check)  ) ? TA.x : 0.0f;
+    }
+    else if(PS_FMT == FMT_16)
+    {
+        // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
         // FIXME GLSL any only support bvec so try to mix it with notEqual
         bvec3 rgb_check = notEqual( t.rgb, vec3(0.0f, 0.0f, 0.0f) );
-		t.a = t.a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; 
-	}
-	
-	return t;
+        t.a = t.a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(rgb_check) ) ? TA.x : 0.0f; 
+    }
+
+    return t;
 }
 
 vec4 tfx(vec4 t, vec4 c)
 {
     vec4 c_out = c;
-	if(PS_TFX == 0)
-	{
-		if(PS_TCC != 0) 
-		{
-			c_out = c * t * 255.0f / 128;
-		}
-		else
-		{
-			c_out.rgb = c.rgb * t.rgb * 255.0f / 128;
-		}
-	}
-	else if(PS_TFX == 1)
-	{
-		if(PS_TCC != 0) 
-		{
-			c_out = t;
-		}
-		else
-		{
-			c_out.rgb = t.rgb;
-		}
-	}
-	else if(PS_TFX == 2)
-	{
-		c_out.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
+    if(PS_TFX == 0)
+    {
+        if(PS_TCC != 0) 
+        {
+            c_out = c * t * 255.0f / 128;
+        }
+        else
+        {
+            c_out.rgb = c.rgb * t.rgb * 255.0f / 128;
+        }
+    }
+    else if(PS_TFX == 1)
+    {
+        if(PS_TCC != 0) 
+        {
+            c_out = t;
+        }
+        else
+        {
+            c_out.rgb = t.rgb;
+        }
+    }
+    else if(PS_TFX == 2)
+    {
+        c_out.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
 
-		if(PS_TCC != 0) 
-		{
-			c_out.a += t.a;
-		}
-	}
-	else if(PS_TFX == 3)
-	{
-		c_out.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
+        if(PS_TCC != 0) 
+        {
+            c_out.a += t.a;
+        }
+    }
+    else if(PS_TFX == 3)
+    {
+        c_out.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
 
-		if(PS_TCC != 0) 
-		{
-			c_out.a = t.a;
-		}
-	}
-	
-	return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f));
+        if(PS_TCC != 0) 
+        {
+            c_out.a = t.a;
+        }
+    }
+
+    return clamp(c_out, vec4(0.0f, 0.0f, 0.0f, 0.0f), vec4(1.0f, 1.0f, 1.0f, 1.0f));
 }
 
 void datst()
 {
 #if PS_DATE > 0
-	float alpha = sample_rt(PSin.tp.xy).a;
-	float alpha0x80 = 128. / 255;
+    float alpha = sample_rt(PSin.tp.xy).a;
+    float alpha0x80 = 128. / 255;
 
-	if (PS_DATE == 1 && alpha >= alpha0x80)
-		discard;
-	else if (PS_DATE == 2 && alpha < alpha0x80)
-		discard;
+    if (PS_DATE == 1 && alpha >= alpha0x80)
+        discard;
+    else if (PS_DATE == 2 && alpha < alpha0x80)
+        discard;
 #endif
 }
 
 void atst(vec4 c)
 {
-	float a = trunc(c.a * 255);
-	
-	if(PS_ATST == 0) // never
-	{
-		discard;
-	}
-	else if(PS_ATST == 1) // always
-	{
-		// nothing to do
-	}
-	else if(PS_ATST == 2)
-	{
-	}
-	else if(PS_ATST == 2 ) // l
-	{
-		if (PS_SPRITEHACK == 0)
-			if ((AREF - a) < 0.0f)
-				discard;
-	}
-	else if(PS_ATST == 2 ) // le
-	{
-		if ((AREF - a) < 0.0f)
+    float a = trunc(c.a * 255);
+
+    if(PS_ATST == 0) // never
+    {
+        discard;
+    }
+    else if(PS_ATST == 1) // always
+    {
+        // nothing to do
+    }
+    else if(PS_ATST == 2 ) // l
+    {
+        if (PS_SPRITEHACK == 0)
+            if ((AREF - a) < 0.0f)
+                discard;
+    }
+    else if(PS_ATST == 3 ) // le
+    {
+        if ((AREF - a) < 0.0f)
             discard;
-	}
-	else if(PS_ATST == 4) // e
-	{
+    }
+    else if(PS_ATST == 4) // e
+    {
         if ((0.5f - abs(a - AREF)) < 0.0f)
             discard;
-	}
-	else if(PS_ATST == 5 || PS_ATST == 6) // ge, g
-	{
+    }
+    else if(PS_ATST == 5 || PS_ATST == 6) // ge, g
+    {
         if ((a-AREF) < 0.0f)
             discard;
-	}
-	else if(PS_ATST == 7) // ne
-	{
-		if ((abs(a - AREF) - 0.5f) < 0.0f)
+    }
+    else if(PS_ATST == 7) // ne
+    {
+        if ((abs(a - AREF) - 0.5f) < 0.0f)
             discard;
-	}
+    }
 }
 
 vec4 fog(vec4 c, float f)
 {
     vec4 c_out = c;
-	if(PS_FOG != 0)
-	{
-		c_out.rgb = mix(FogColor, c.rgb, f);
-	}
+    if(PS_FOG != 0)
+    {
+        c_out.rgb = mix(FogColor, c.rgb, f);
+    }
 
-	return c_out;
+    return c_out;
 }
 
 vec4 ps_color()
 {
-	datst();
+    datst();
 
-	vec4 t = sample_color(PSin.t.xy, PSin.t.w);
+    vec4 t = sample_color(PSin.t.xy, PSin.t.w);
 
-	vec4 c = tfx(t, PSin.c);
+    vec4 c = tfx(t, PSin.c);
 
-	atst(c);
+    atst(c);
 
-	c = fog(c, PSin.t.z);
+    c = fog(c, PSin.t.z);
 
-	if (PS_COLCLIP == 2)
-	{
-		c.rgb = 256.0f/255.0f - c.rgb;
-	}
-	if (PS_COLCLIP > 0)
-	{
+    if (PS_COLCLIP == 2)
+    {
+        c.rgb = 256.0f/255.0f - c.rgb;
+    }
+    if (PS_COLCLIP > 0)
+    {
         // FIXME !!!!
-		//c.rgb *= c.rgb < 128./255;
-		bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
-		c.rgb *= vec3(factor);
-	}
+        //c.rgb *= c.rgb < 128./255;
+        bvec3 factor = bvec3(128.0f/255.0f, 128.0f/255.0f, 128.0f/255.0f);
+        c.rgb *= vec3(factor);
+    }
 
-	if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
-	{
-		c.rgb = vec3(1.0f, 1.0f, 1.0f); 
-	}
+    if(PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
+    {
+        c.rgb = vec3(1.0f, 1.0f, 1.0f); 
+    }
 
-	return c;
+    return c;
 }
 
 void ps_main()
 {
-	//FIXME
-	vec4 c = ps_color();
+    //FIXME
+    vec4 c = ps_color();
 
     // FIXME: I'm not sure about the value of others field
-	// output.c1 = c.a * 2; // used for alpha blending
+    // output.c1 = c.a * 2; // used for alpha blending
 
-	float alpha = c.a * 2;
+    float alpha = c.a * 2;
 
-	if(PS_AOUT != 0) // 16 bit output
-	{
-		float a = 128.0f / 255; // alpha output will be 0x80
-		
-		c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
-	}
-	else if(PS_FBA != 0)
-	{
-		if(c.a < 0.5) c.a += 0.5;
-	}
+    if(PS_AOUT != 0) // 16 bit output
+    {
+        float a = 128.0f / 255; // alpha output will be 0x80
+
+        c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
+    }
+    else if(PS_FBA != 0)
+    {
+        if(c.a < 0.5) c.a += 0.5;
+    }
 
     SV_Target0 = c;
     SV_Target1 = vec4(alpha, alpha, alpha, alpha);