gsdx-ogl: move depth conversion shader

Add 2 new shaders: * ps_main12: cast a 16 bit depth to a RGB5A1 color * ps_main16: cast a a RGB5A1 color to a 16 bit depth Shader might be used in future commit as it seems Silent Hill uses this kind of format. Fix tab/indentation too
2015-09-08 11:19:07 +02:00 · 2015-09-08 11:19:07 +02:00 · ca9b5ce11d
parent 4eed4ca3a1
commit ca9b5ce11d
5 changed files with 181 additions and 145 deletions
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -950,7 +950,8 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
 		return;
 	}

-	bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13] || ps == m_convert.ps[14]);
+	bool draw_in_depth = (ps == m_convert.ps[13] || ps == m_convert.ps[14] ||
+		ps == m_convert.ps[15] || ps == m_convert.ps[16]);

 	// Performance optimization. It might be faster to use a framebuffer blit for standard case
 	// instead to emulate it with shader
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@ -411,7 +411,7 @@ class GSDeviceOGL : public GSDevice

 	struct {
 		GLuint vs;		// program object
-		GLuint ps[16];	// program object
+		GLuint ps[18];	// program object
 		GLuint ln;		// sampler object
 		GLuint pt;		// sampler object
 		GSDepthStencilOGL* dss;
--- a/plugins/GSdx/GSTextureCache.cpp
+++ b/plugins/GSdx/GSTextureCache.cpp
@ -291,8 +291,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int

 				if (type == DepthStencil) {
 					GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
-					int shader = 12 + GSLocalMemory::m_psm[TEX0.PSM].fmt;
-					ASSERT(shader <= 14);
+					int shader = 13 + GSLocalMemory::m_psm[TEX0.PSM].fmt;
 					m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false);
 				} else {
 					GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
@ -870,7 +869,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con

 		if (is_8bits) {
 			GL_INS("Reading RT as a packed-indexed 8 bits format");
-			shader = 15; // ask a conversion to 8 bits format
+			shader = 17; // ask a conversion to 8 bits format
 		}

 #ifdef ENABLE_OGL_DEBUG
--- a/plugins/GSdx/res/glsl/convert.glsl
+++ b/plugins/GSdx/res/glsl/convert.glsl
@ -88,17 +88,6 @@ vec4 ps_crt(uint i)
    return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);
 }

-vec4 ps_scanlines(uint i)
-{
-	vec4 mask[2] =
-	{
-		vec4(1, 1, 1, 0),
-		vec4(0, 0, 0, 0)
-	};
-
-	return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);
-}
-
 #ifdef ps_main0
 void ps_main0()
 {
@ -143,7 +132,7 @@ void ps_main1()

    highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));

-	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
+    // bit field operation requires GL4 HW.
    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
 #endif

@ -155,20 +144,17 @@ void ps_main1()
 void ps_main10()
 {
    // Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture
-	vec4 c = sample_c();
-	const float exp2_32 = exp2(32.0f);
-	SV_Target1 = uint(exp2_32 * c.r);
+    SV_Target1 = uint(exp2(32.0f) * sample_c().r);
 }
 #endif

 #ifdef ps_main11
 void ps_main11()
 {
-	const float exp2_32 = exp2(32.0f);
-	const vec4 bitSh = vec4(256.0*256.0*256.0, 256.0*256.0, 256.0, 1.0);
+    // Convert a GL_FLOAT32 depth texture into a RGBA color texture
+    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);

-	// Convert a GL_FLOAT32 depth texture into a RGBA texture
    vec4 res = fract(vec4(sample_c().r) * bitSh);

    SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f;
@ -176,44 +162,65 @@ void ps_main11()
 #endif

 #ifdef ps_main12
-//out float gl_FragDepth;
 void ps_main12()
 {
-	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 1.0) * vec4(255.0/256.0);
-	gl_FragDepth = dot(sample_c(), bitSh);
+    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
+    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
+    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
+    uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk;
+
+    SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
 }
 #endif

 #ifdef ps_main13
-//out float gl_FragDepth;
 void ps_main13()
+{
+    // Convert a RRGBA texture into a float depth texture
+    // FIXME: I'm afraid of the accuracy
+    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp(-8.0f)) * vec4(255.0);
+    gl_FragDepth = dot(sample_c(), bitSh);
+}
+#endif
+
+#ifdef ps_main14
+void ps_main14()
 {
    // Same as above but without the alpha channel (24 bits Z)

    // Convert a RRGBA texture into a float depth texture
    // FIXME: I'm afraid of the accuracy
-	const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0);
+    const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);
    gl_FragDepth = dot(sample_c().rgb, bitSh);
 }
 #endif

-#ifdef ps_main14
-//out float gl_FragDepth;
-void ps_main14()
-{
-	// Same as above but without the A/B channels (16 bits Z)
-
-	// Convert a RRGBA texture into a float depth texture
-	// FIXME: I'm afraid of the accuracy
-	const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0);
-	gl_FragDepth = dot(sample_c().rg, bitSh);
-}
-#endif
-
 #ifdef ps_main15
 void ps_main15()
+{
+    // Same as above but without the A/B channels (16 bits Z)
+
+    // Convert a RRGBA texture into a float depth texture
+    // FIXME: I'm afraid of the accuracy
+    const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);
+    gl_FragDepth = dot(sample_c().rg, bitSh);
+}
+#endif
+
+#ifdef ps_main16
+void ps_main16()
+{
+    // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z
+    // FIXME: I'm afraid of the accuracy
+    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp(-17.0f));
+    // Trunc color to drop useless lsb
+    vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));
+    gl_FragDepth = dot(vec4(color), bitSh);
+}
+#endif
+
+#ifdef ps_main17
+void ps_main17()
 {

    // Potential speed optimization. There is a high probability that
@ -222,7 +229,7 @@ void ps_main15()
    // boost on MGS3
    //
    // Hypothesis wrong in Prince of Persia ... Seriously WTF !
-//#define ONLY_BLUE;
+    //#define ONLY_BLUE;

    // Convert a RGBA texture into a 8 bits packed texture
    // Input column: 8x2 RGBA pixels
@ -236,7 +243,7 @@ void ps_main15()
    float c;

    uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
-    ivec2 tb  = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);
+    ivec2 tb  = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1);

    int ty   = tb.y | (int(gl_FragCoord.y) & 1);
    int txN  = tb.x | (int(gl_FragCoord.x) & 7);
@ -306,6 +313,17 @@ void ps_main7()
 #endif

 #ifdef ps_main5
+vec4 ps_scanlines(uint i)
+{
+    vec4 mask[2] =
+    {
+        vec4(1, 1, 1, 0),
+        vec4(0, 0, 0, 0)
+    };
+
+    return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);
+}
+
 void ps_main5() // scanlines
 {
    highp uvec4 p = uvec4(PSin_p);
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -113,17 +113,6 @@ static const char* convert_glsl =
 	"    return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
 	"}\n"
 	"\n"
-	"vec4 ps_scanlines(uint i)\n"
-	"{\n"
-	"	vec4 mask[2] =\n"
-	"	{\n"
-	"		vec4(1, 1, 1, 0),\n"
-	"		vec4(0, 0, 0, 0)\n"
-	"	};\n"
-	"\n"
-	"	return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
-	"}\n"
-	"\n"
 	"#ifdef ps_main0\n"
 	"void ps_main0()\n"
 	"{\n"
@ -168,7 +157,7 @@ static const char* convert_glsl =
 	"\n"
 	"    highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
 	"\n"
-	"	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
+	"    // bit field operation requires GL4 HW.\n"
 	"    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
 	"#endif\n"
 	"\n"
@ -180,20 +169,17 @@ static const char* convert_glsl =
 	"void ps_main10()\n"
 	"{\n"
 	"    // Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture\n"
-	"	vec4 c = sample_c();\n"
-	"	const float exp2_32 = exp2(32.0f);\n"
-	"	SV_Target1 = uint(exp2_32 * c.r);\n"
+	"    SV_Target1 = uint(exp2(32.0f) * sample_c().r);\n"
 	"}\n"
 	"#endif\n"
 	"\n"
 	"#ifdef ps_main11\n"
 	"void ps_main11()\n"
 	"{\n"
-	"	const float exp2_32 = exp2(32.0f);\n"
-	"	const vec4 bitSh = vec4(256.0*256.0*256.0, 256.0*256.0, 256.0, 1.0);\n"
+	"    // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n"
+	"    const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n"
 	"    const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n"
 	"\n"
-	"	// Convert a GL_FLOAT32 depth texture into a RGBA texture\n"
 	"    vec4 res = fract(vec4(sample_c().r) * bitSh);\n"
 	"\n"
 	"    SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f;\n"
@ -201,45 +187,66 @@ static const char* convert_glsl =
 	"#endif\n"
 	"\n"
 	"#ifdef ps_main12\n"
-	"//out float gl_FragDepth;\n"
 	"void ps_main12()\n"
 	"{\n"
-	"	// Convert a RRGBA texture into a float depth texture\n"
-	"	// FIXME: I'm afraid of the accuracy\n"
-	"	const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 1.0) * vec4(255.0/256.0);\n"
-	"	gl_FragDepth = dot(sample_c(), bitSh);\n"
+	"    // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n"
+	"    const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n"
+	"    const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n"
+	"    uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk;\n"
+	"\n"
+	"    SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);\n"
 	"}\n"
 	"#endif\n"
 	"\n"
 	"#ifdef ps_main13\n"
-	"//out float gl_FragDepth;\n"
 	"void ps_main13()\n"
 	"{\n"
+	"    // Convert a RRGBA texture into a float depth texture\n"
+	"    // FIXME: I'm afraid of the accuracy\n"
+	"    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp(-8.0f)) * vec4(255.0);\n"
+	"    gl_FragDepth = dot(sample_c(), bitSh);\n"
+	"}\n"
+	"#endif\n"
+	"\n"
+	"#ifdef ps_main14\n"
+	"void ps_main14()\n"
+	"{\n"
 	"    // Same as above but without the alpha channel (24 bits Z)\n"
 	"\n"
 	"    // Convert a RRGBA texture into a float depth texture\n"
 	"    // FIXME: I'm afraid of the accuracy\n"
-	"	const vec3 bitSh = vec3(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0) * vec3(255.0/256.0);\n"
+	"    const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);\n"
 	"    gl_FragDepth = dot(sample_c().rgb, bitSh);\n"
 	"}\n"
 	"#endif\n"
 	"\n"
-	"#ifdef ps_main14\n"
-	"//out float gl_FragDepth;\n"
-	"void ps_main14()\n"
-	"{\n"
-	"	// Same as above but without the A/B channels (16 bits Z)\n"
-	"\n"
-	"	// Convert a RRGBA texture into a float depth texture\n"
-	"	// FIXME: I'm afraid of the accuracy\n"
-	"	const vec2 bitSh = vec2(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0)) * vec2(255.0/256.0);\n"
-	"	gl_FragDepth = dot(sample_c().rg, bitSh);\n"
-	"}\n"
-	"#endif\n"
-	"\n"
 	"#ifdef ps_main15\n"
 	"void ps_main15()\n"
 	"{\n"
+	"    // Same as above but without the A/B channels (16 bits Z)\n"
+	"\n"
+	"    // Convert a RRGBA texture into a float depth texture\n"
+	"    // FIXME: I'm afraid of the accuracy\n"
+	"    const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);\n"
+	"    gl_FragDepth = dot(sample_c().rg, bitSh);\n"
+	"}\n"
+	"#endif\n"
+	"\n"
+	"#ifdef ps_main16\n"
+	"void ps_main16()\n"
+	"{\n"
+	"    // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z\n"
+	"    // FIXME: I'm afraid of the accuracy\n"
+	"    const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp(-17.0f));\n"
+	"    // Trunc color to drop useless lsb\n"
+	"    vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));\n"
+	"    gl_FragDepth = dot(vec4(color), bitSh);\n"
+	"}\n"
+	"#endif\n"
+	"\n"
+	"#ifdef ps_main17\n"
+	"void ps_main17()\n"
+	"{\n"
 	"\n"
 	"    // Potential speed optimization. There is a high probability that\n"
 	"    // game only want to extract a single channel (blue). It will allow\n"
@ -247,7 +254,7 @@ static const char* convert_glsl =
 	"    // boost on MGS3\n"
 	"    //\n"
 	"    // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
-	"//#define ONLY_BLUE;\n"
+	"    //#define ONLY_BLUE;\n"
 	"\n"
 	"    // Convert a RGBA texture into a 8 bits packed texture\n"
 	"    // Input column: 8x2 RGBA pixels\n"
@ -261,7 +268,7 @@ static const char* convert_glsl =
 	"    float c;\n"
 	"\n"
 	"    uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
-	"    ivec2 tb  = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);\n"
+	"    ivec2 tb  = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1);\n"
 	"\n"
 	"    int ty   = tb.y | (int(gl_FragCoord.y) & 1);\n"
 	"    int txN  = tb.x | (int(gl_FragCoord.x) & 7);\n"
@ -331,6 +338,17 @@ static const char* convert_glsl =
 	"#endif\n"
 	"\n"
 	"#ifdef ps_main5\n"
+	"vec4 ps_scanlines(uint i)\n"
+	"{\n"
+	"    vec4 mask[2] =\n"
+	"    {\n"
+	"        vec4(1, 1, 1, 0),\n"
+	"        vec4(0, 0, 0, 0)\n"
+	"    };\n"
+	"\n"
+	"    return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
+	"}\n"
+	"\n"
 	"void ps_main5() // scanlines\n"
 	"{\n"
 	"    highp uvec4 p = uvec4(PSin_p);\n"
@ -369,11 +387,11 @@ static const char* convert_glsl =
 	"\n"
 	"    const float PI = 3.14159265359f;\n"
 	"\n"
-	"	vec2 texdim = vec2(textureSize(TextureSampler, 0)); \n"
+	"    vec2 texdim = vec2(textureSize(TextureSampler, 0));\n"
 	"\n"
 	"    vec4 c;\n"
 	"    if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) {\n"
-	"        c = sample_c(); \n"
+	"        c = sample_c();\n"
 	"    } else {\n"
 	"        float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y));\n"
 	"        c =  factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y));\n"