Merge pull request #555 from PCSX2/real-fb-format

GSdx: better framebuffer format
2015-06-01 11:48:07 +02:00 · 2015-06-01 11:48:07 +02:00 · 2cbde89084
parent f81cf360bc c43ddaec4f
commit 2cbde89084
7 changed files with 106 additions and 37 deletions
--- a/plugins/GSdx/GSDeviceOGL.cpp
+++ b/plugins/GSdx/GSDeviceOGL.cpp
@ -627,6 +627,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
 		+ format("#define PS_WMT %d\n", sel.wmt)
 		+ format("#define PS_FMT %d\n", sel.fmt)
 		+ format("#define PS_IFMT %d\n", sel.ifmt)
+		+ format("#define PS_DFMT %d\n", sel.dfmt)
 		+ format("#define PS_AEM %d\n", sel.aem)
 		+ format("#define PS_TFX %d\n", sel.tfx)
 		+ format("#define PS_TCC %d\n", sel.tcc)
--- a/plugins/GSdx/GSDeviceOGL.h
+++ b/plugins/GSdx/GSDeviceOGL.h
@ -321,7 +321,9 @@ class GSDeviceOGL : public GSDevice

 				// Word 2
 				uint32 blend:8;
-				uint32 _free2:24;
+				uint32 dfmt:2;
+
+				uint32 _free2:22;
 			};

 			uint64 key;
@ -617,7 +619,7 @@ class GSDeviceOGL : public GSDevice
 	GLuint CreateSampler(bool bilinear, bool tau, bool tav);
 	GLuint CreateSampler(PSSamplerSelector sel);
 	GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
-	GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix);
+	GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix);


 	void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
@ -626,7 +628,7 @@ class GSDeviceOGL : public GSDevice
 	void SetupPS(PSSelector sel);
 	void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
 	void SetupSampler(PSSamplerSelector ssel);
-	void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending =  false);
+	void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending =  false);
 	GLuint GetSamplerID(PSSamplerSelector ssel);
 	GLuint GetPaletteSamplerID();

--- a/plugins/GSdx/GSRendererOGL.cpp
+++ b/plugins/GSdx/GSRendererOGL.cpp
@ -249,16 +249,22 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 	GSDeviceOGL::OMColorMaskSelector om_csel;
 	GSDeviceOGL::OMDepthStencilSelector om_dssel;

+	// Format of the output
+	ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
+
+	GIFRegALPHA ALPHA = context->ALPHA;
+	float afix = (float)context->ALPHA.FIX / 0x80;
+
 	// Blend

 	if (!IsOpaque())
 	{
 		om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;

-		om_bsel.a = context->ALPHA.A;
-		om_bsel.b = context->ALPHA.B;
-		om_bsel.c = context->ALPHA.C;
-		om_bsel.d = context->ALPHA.D;
+		om_bsel.a = ALPHA.A;
+		om_bsel.b = ALPHA.B;
+		om_bsel.c = ALPHA.C;
+		om_bsel.d = ALPHA.D;

 		if (env.PABE.PABE)
 		{
@ -282,6 +288,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 	}

 	om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
+	if (ps_sel.dfmt == 1) {
+		// 24 bits no alpha channel so use 1.0f fix factor as equivalent
+		ALPHA.C = 2;
+		afix = 1.0f;
+		// Disable writing of the alpha channel
+		om_csel.wa = 0;
+	}

 	if (DATE) {
 		if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) {
@ -436,25 +449,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour

 	bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST && !m_accurate_colclip;
 	bool acc_colclip_wrap = env.COLCLAMP.CLAMP == 0 && m_accurate_colclip;
-	if (context->ALPHA.A == context->ALPHA.B) { // Optimize-away colclip
+	if (ALPHA.A == ALPHA.B) { // Optimize-away colclip
 		// No addition neither substraction so no risk of overflow the [0:255] range.
 		colclip_wrap = false;
 		acc_colclip_wrap = false;
 #ifdef ENABLE_OGL_DEBUG
 		if (colclip_wrap || acc_colclip_wrap) {
 			const char *col[3] = {"Cs", "Cd", "0"};
-			GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[context->ALPHA.D]);
+			GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[ALPHA.D]);
 		}
 #endif
 	}
 	if (colclip_wrap) {
 		ps_sel.colclip = 1;
-		GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
+		GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
 	} else if (acc_colclip_wrap) {
 		ps_sel.colclip = 3;
-			GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
-	} else if (env.COLCLAMP.CLAMP == 0 && (context->ALPHA.A != context->ALPHA.B)) {
-			GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
+		GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
+	} else if (env.COLCLAMP.CLAMP == 0 && (ALPHA.A != ALPHA.B)) {
+		GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
 	}

 	ps_sel.fba = context->FBA.FBA;
@ -611,8 +624,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 		dev->PSSetShaderResource(3, rt);

 		// Require the fix alpha vlaue
-		if (context->ALPHA.C == 2) {
-			ps_cb.AlphaCoeff = GSVector4((float)(int)context->ALPHA.FIX / 0x80);
+		if (ALPHA.C == 2) {
+			ps_cb.AlphaCoeff = GSVector4(afix);
 		}

 		// No need to flush for every primitive
@ -630,7 +643,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
 	dev->SetupPS(ps_sel);

 	// rs
-	uint8 afix = context->ALPHA.FIX;

 	GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());

--- a/plugins/GSdx/GSState.cpp
+++ b/plugins/GSdx/GSState.cpp
@ -4315,6 +4315,18 @@ bool GSC_Castlevania(const GSFrameInfo& fi, int& skip)
 {
 	if(skip == 0)
 	{
+		// This hack removes the shadows and globally darker image
+		// I think there are 2 issues on GSdx
+		//
+		// 1/ potential not correctly supported colclip.
+		//
+		// 2/ use of a 32 bits format to emulate a 16 bit formats
+		// For example, if you blend 64 time the value 4 on a dark destination pixels
+		//
+		// FMT32: 4*64 = 256 <= white pixels
+		//
+		// FMT16: output of blending will always be 0 because the 3 lsb of color is dropped.
+		//		  Therefore the pixel remains dark !!!
 		if(fi.TME && fi.FBP == 0 && fi.TBP0 && fi.TPSM == 10 && fi.FBMSK == 0xFFFFFF)
 		{
 			skip = 2;
--- a/plugins/GSdx/GSTextureFXOGL.cpp
+++ b/plugins/GSdx/GSTextureFXOGL.cpp
@ -100,7 +100,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
 	return dss;
 }

-GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
+GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix)
 {
 	GSBlendStateOGL* bs = new GSBlendStateOGL();

@ -119,7 +119,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
 					bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
 			}

-			const string afixstr = format("%d >> 7", afix);
+			const string afixstr = format("%f", afix);
 			const char *col[3] = {"Cs", "Cd", "0"};
 			const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
 			fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
@ -235,7 +235,7 @@ GLuint GSDeviceOGL::GetPaletteSamplerID()
 	return m_palette_ss;
 }

-void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending)
+void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending)
 {
 	GSDepthStencilOGL* dss = m_om_dss[dssel];

@ -267,5 +267,5 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, ui
 	// *************************************************************
 	// Dynamic
 	// *************************************************************
-	OMSetBlendState(bs, (float)(int)afix / 0x80);
+	OMSetBlendState(bs, afix);
 }
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -404,8 +404,13 @@ vec4 ps_color()
 void ps_blend(inout vec4 c, in float As)
 {
 	vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
+#if PS_DFMT == FMT_24
+	float Ad = 1.0f;
+#else
+	// FIXME FMT_16 case
 	// FIXME Ad or Ad * 2?
-	float Ad = rt.a;
+	float Ad = rt.a * 255.0f / 128.0f;
+#endif
 	// Let the compiler do its jobs !
 	vec3 Cd = rt.rgb;
 	vec3 Cs = c.rgb;
@ -640,12 +645,26 @@ void ps_blend(inout vec4 c, in float As)

 #endif

-#if PS_COLCLIP == 3
+	// FIXME dithering
+
+	// Correct the Color value based on the output format
+#if PS_COLCLIP != 3
+	// Standard Clamp
+	c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
+#endif
+
+#if PS_DFMT == FMT_16
+	// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
+
+	// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
+	c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;
+#elif PS_COLCLIP == 3
+	// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
 	c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;
+#endif

 	// Don't compile => unable to find compatible overloaded function "mod(vec3)"
 	//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
-#endif
 }

 void ps_main()
@ -700,14 +719,16 @@ void ps_main()
 	c.a = 0.5f;
 #endif

-	float alpha = c.a * 2.0;
+	// Must be done before alpha correction
+	float alpha = c.a * 255.0f / 128.0f;

-#if (PS_AOUT != 0) // 16 bit output
+	// Correct the ALPHA value based on the output format
+	// FIXME add support of alpha mask to replace properly PS_AOUT
+#if (PS_DFMT == FMT_16) || (PS_AOUT)
 	float a = 128.0f / 255.0; // alpha output will be 0x80
-
 	c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
-#elif (PS_FBA != 0)
-	if(c.a < 0.5) c.a += 0.5;
+#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
+	if(c.a < 0.5) c.a += 128.0f/255.0f;
 #endif

 	// Get first primitive that will write a failling alpha value
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -1157,8 +1157,13 @@ static const char* tfx_fs_all_glsl =
 	"void ps_blend(inout vec4 c, in float As)\n"
 	"{\n"
 	"	vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
+	"#if PS_DFMT == FMT_24\n"
+	"	float Ad = 1.0f;\n"
+	"#else\n"
+	"	// FIXME FMT_16 case\n"
 	"	// FIXME Ad or Ad * 2?\n"
-	"	float Ad = rt.a;\n"
+	"	float Ad = rt.a * 255.0f / 128.0f;\n"
+	"#endif\n"
 	"	// Let the compiler do its jobs !\n"
 	"	vec3 Cd = rt.rgb;\n"
 	"	vec3 Cs = c.rgb;\n"
@ -1393,12 +1398,26 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"#endif\n"
 	"\n"
-	"#if PS_COLCLIP == 3\n"
+	"	// FIXME dithering\n"
+	"\n"
+	"	// Correct the Color value based on the output format\n"
+	"#if PS_COLCLIP != 3\n"
+	"	// Standard Clamp\n"
+	"	c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
+	"#endif\n"
+	"\n"
+	"#if PS_DFMT == FMT_16\n"
+	"	// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
+	"\n"
+	"	// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
+	"	c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n"
+	"#elif PS_COLCLIP == 3\n"
+	"	// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
 	"	c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n"
+	"#endif\n"
 	"\n"
 	"	// Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
 	"	//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
-	"#endif\n"
 	"}\n"
 	"\n"
 	"void ps_main()\n"
@ -1453,14 +1472,16 @@ static const char* tfx_fs_all_glsl =
 	"	c.a = 0.5f;\n"
 	"#endif\n"
 	"\n"
-	"	float alpha = c.a * 2.0;\n"
+	"	// Must be done before alpha correction\n"
+	"	float alpha = c.a * 255.0f / 128.0f;\n"
 	"\n"
-	"#if (PS_AOUT != 0) // 16 bit output\n"
+	"	// Correct the ALPHA value based on the output format\n"
+	"	// FIXME add support of alpha mask to replace properly PS_AOUT\n"
+	"#if (PS_DFMT == FMT_16) || (PS_AOUT)\n"
 	"	float a = 128.0f / 255.0; // alpha output will be 0x80\n"
-	"\n"
 	"	c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
-	"#elif (PS_FBA != 0)\n"
-	"	if(c.a < 0.5) c.a += 0.5;\n"
+	"#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
+	"	if(c.a < 0.5) c.a += 128.0f/255.0f;\n"
 	"#endif\n"
 	"\n"
 	"	// Get first primitive that will write a failling alpha value\n"