From d31bd97d5952e115f430bacc47c405b7ce9fb5b5 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 26 May 2015 14:59:07 +0200 Subject: [PATCH 1/4] gsdx-ogl: add a variable to select FB output Either 32bits/24bits/16bits --- plugins/GSdx/GSDeviceOGL.cpp | 1 + plugins/GSdx/GSDeviceOGL.h | 4 +++- plugins/GSdx/GSRendererOGL.cpp | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 6b5044603b..23c13de24a 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -627,6 +627,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_WMT %d\n", sel.wmt) + format("#define PS_FMT %d\n", sel.fmt) + format("#define PS_IFMT %d\n", sel.ifmt) + + format("#define PS_DFMT %d\n", sel.dfmt) + format("#define PS_AEM %d\n", sel.aem) + format("#define PS_TFX %d\n", sel.tfx) + format("#define PS_TCC %d\n", sel.tcc) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index c46df0dbb9..88b7a25231 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -321,7 +321,9 @@ class GSDeviceOGL : public GSDevice // Word 2 uint32 blend:8; - uint32 _free2:24; + uint32 dfmt:2; + + uint32 _free2:22; }; uint64 key; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0cffc776cc..1d1bf27721 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -249,6 +249,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDeviceOGL::OMColorMaskSelector om_csel; GSDeviceOGL::OMDepthStencilSelector om_dssel; + // Format of the output + ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; + // Blend if (!IsOpaque()) From 9ee3a173d0a51d1401bd2775293c7b755c417730 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 26 May 2015 15:36:48 +0200 Subject: [PATCH 2/4] gsdx-ogl: use a local ALPHA register It would allow to easy tune the parameter to support 24 bits format --- plugins/GSdx/GSDeviceOGL.h | 4 ++-- plugins/GSdx/GSRendererOGL.cpp | 30 ++++++++++++++++-------------- plugins/GSdx/GSTextureFXOGL.cpp | 8 ++++---- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 88b7a25231..64f42e9f80 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -619,7 +619,7 @@ class GSDeviceOGL : public GSDevice GLuint CreateSampler(bool bilinear, bool tau, bool tav); GLuint CreateSampler(PSSamplerSelector sel); GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel); - GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix); + GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix); void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim); @@ -628,7 +628,7 @@ class GSDeviceOGL : public GSDevice void SetupPS(PSSelector sel); void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb); void SetupSampler(PSSamplerSelector ssel); - void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending = false); + void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending = false); GLuint GetSamplerID(PSSamplerSelector ssel); GLuint GetPaletteSamplerID(); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 1d1bf27721..42b5f4261f 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -252,16 +252,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Format of the output ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; + GIFRegALPHA ALPHA = context->ALPHA; + float afix = (float)context->ALPHA.FIX / 0x80; + // Blend if (!IsOpaque()) { om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS; - om_bsel.a = context->ALPHA.A; - om_bsel.b = context->ALPHA.B; - om_bsel.c = context->ALPHA.C; - om_bsel.d = context->ALPHA.D; + om_bsel.a = ALPHA.A; + om_bsel.b = ALPHA.B; + om_bsel.c = ALPHA.C; + om_bsel.d = ALPHA.D; if (env.PABE.PABE) { @@ -439,25 +442,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST && !m_accurate_colclip; bool acc_colclip_wrap = env.COLCLAMP.CLAMP == 0 && m_accurate_colclip; - if (context->ALPHA.A == context->ALPHA.B) { // Optimize-away colclip + if (ALPHA.A == ALPHA.B) { // Optimize-away colclip // No addition neither substraction so no risk of overflow the [0:255] range. colclip_wrap = false; acc_colclip_wrap = false; #ifdef ENABLE_OGL_DEBUG if (colclip_wrap || acc_colclip_wrap) { const char *col[3] = {"Cs", "Cd", "0"}; - GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[context->ALPHA.D]); + GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[ALPHA.D]); } #endif } if (colclip_wrap) { ps_sel.colclip = 1; - GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); + GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } else if (acc_colclip_wrap) { - ps_sel.colclip = 3; - GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); - } else if (env.COLCLAMP.CLAMP == 0 && (context->ALPHA.A != context->ALPHA.B)) { - GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D); + ps_sel.colclip = 3; + GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); + } else if (env.COLCLAMP.CLAMP == 0 && (ALPHA.A != ALPHA.B)) { + GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D); } ps_sel.fba = context->FBA.FBA; @@ -613,8 +616,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->PSSetShaderResource(3, rt); // Require the fix alpha vlaue - if (context->ALPHA.C == 2) { - ps_cb.AlphaCoeff = GSVector4((float)(int)context->ALPHA.FIX / 0x80); + if (ALPHA.C == 2) { + ps_cb.AlphaCoeff = GSVector4(afix); } // No need to flush for every primitive @@ -632,7 +635,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour dev->SetupPS(ps_sel); // rs - uint8 afix = context->ALPHA.FIX; GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy()); diff --git a/plugins/GSdx/GSTextureFXOGL.cpp b/plugins/GSdx/GSTextureFXOGL.cpp index 4f0fb08247..7e3ddb09a7 100644 --- a/plugins/GSdx/GSTextureFXOGL.cpp +++ b/plugins/GSdx/GSTextureFXOGL.cpp @@ -100,7 +100,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) return dss; } -GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix) +GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix) { GSBlendStateOGL* bs = new GSBlendStateOGL(); @@ -119,7 +119,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix) bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE); } - const string afixstr = format("%d >> 7", afix); + const string afixstr = format("%f", afix); const char *col[3] = {"Cs", "Cd", "0"}; const char *alpha[3] = {"As", "Ad", afixstr.c_str()}; fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]); @@ -235,7 +235,7 @@ GLuint GSDeviceOGL::GetPaletteSamplerID() return m_palette_ss; } -void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending) +void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending) { GSDepthStencilOGL* dss = m_om_dss[dssel]; @@ -267,5 +267,5 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, ui // ************************************************************* // Dynamic // ************************************************************* - OMSetBlendState(bs, (float)(int)afix / 0x80); + OMSetBlendState(bs, afix); } From 419dfe054464eeacadd1fb725c1ca8a1687571d4 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 26 May 2015 16:16:36 +0200 Subject: [PATCH 3/4] glsl: redo color/alpha management correction Please test it! GS supports 3 formats for the output: 32 bits: normal case => no change 24 bits: like 32 bits but without alpha channel => mask alpha channel (ie don't write it anymore) => Always uses 1.0f as blending coefficient 16 bits: RGB5A1, emulated by a 32 bits openGL texture. I think it will be more correct to use a real 16 bits GL texture. Unfortunately it would cost several (slow) target conversions. Anyway as a current solution => apply a mask of 0xF8 on color when SW blending is used (improve Castlevania shadow) unfortunately normal blending mode still uses the full range of colors! This commit also corrects a couple of blending factor. 128/255 is equivalent to 1.0f in PS2, whereas GPU uses 1.0f. So the blending factor must be 255/128 instead of 2 Note: disable CRC hack and enable accurate_colclip to see Castlevania shadow ^^ (issue #380). Note2: SW renderer is darker on Castlevania. I don't know why maybe linked to the 16 bits format poorly emulated --- plugins/GSdx/GSRendererOGL.cpp | 7 ++++++ plugins/GSdx/res/glsl/tfx_fs.glsl | 37 ++++++++++++++++++++++++------- plugins/GSdx/res/glsl_source.h | 37 ++++++++++++++++++++++++------- 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 42b5f4261f..030961b540 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -288,6 +288,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + if (ps_sel.dfmt == 1) { + // 24 bits no alpha channel so use 1.0f fix factor as equivalent + ALPHA.C = 2; + afix = 1.0f; + // Disable writing of the alpha channel + om_csel.wa = 0; + } if (DATE) { if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) { diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index a36fcb1444..c0f8e00056 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -404,8 +404,13 @@ vec4 ps_color() void ps_blend(inout vec4 c, in float As) { vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); +#if PS_DFMT == FMT_24 + float Ad = 1.0f; +#else + // FIXME FMT_16 case // FIXME Ad or Ad * 2? - float Ad = rt.a; + float Ad = rt.a * 255.0f / 128.0f; +#endif // Let the compiler do its jobs ! vec3 Cd = rt.rgb; vec3 Cs = c.rgb; @@ -640,12 +645,26 @@ void ps_blend(inout vec4 c, in float As) #endif -#if PS_COLCLIP == 3 + // FIXME dithering + + // Correct the Color value based on the output format +#if PS_COLCLIP != 3 + // Standard Clamp + c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f)); +#endif + +#if PS_DFMT == FMT_16 + // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania + + // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode + c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f; +#elif PS_COLCLIP == 3 + // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f; +#endif // Don't compile => unable to find compatible overloaded function "mod(vec3)" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f; -#endif } void ps_main() @@ -700,14 +719,16 @@ void ps_main() c.a = 0.5f; #endif - float alpha = c.a * 2.0; + // Must be done before alpha correction + float alpha = c.a * 255.0f / 128.0f; -#if (PS_AOUT != 0) // 16 bit output + // Correct the ALPHA value based on the output format + // FIXME add support of alpha mask to replace properly PS_AOUT +#if (PS_DFMT == FMT_16) || (PS_AOUT) float a = 128.0f / 255.0; // alpha output will be 0x80 - c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a; -#elif (PS_FBA != 0) - if(c.a < 0.5) c.a += 0.5; +#elif (PS_DFMT == FMT_32) && (PS_FBA != 0) + if(c.a < 0.5) c.a += 128.0f/255.0f; #endif // Get first primitive that will write a failling alpha value diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 8a1fa11d27..1a6cc4dacd 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1157,8 +1157,13 @@ static const char* tfx_fs_all_glsl = "void ps_blend(inout vec4 c, in float As)\n" "{\n" " vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n" + "#if PS_DFMT == FMT_24\n" + " float Ad = 1.0f;\n" + "#else\n" + " // FIXME FMT_16 case\n" " // FIXME Ad or Ad * 2?\n" - " float Ad = rt.a;\n" + " float Ad = rt.a * 255.0f / 128.0f;\n" + "#endif\n" " // Let the compiler do its jobs !\n" " vec3 Cd = rt.rgb;\n" " vec3 Cs = c.rgb;\n" @@ -1393,12 +1398,26 @@ static const char* tfx_fs_all_glsl = "\n" "#endif\n" "\n" - "#if PS_COLCLIP == 3\n" + " // FIXME dithering\n" + "\n" + " // Correct the Color value based on the output format\n" + "#if PS_COLCLIP != 3\n" + " // Standard Clamp\n" + " c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n" + "#endif\n" + "\n" + "#if PS_DFMT == FMT_16\n" + " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" + "\n" + " // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n" + " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n" + "#elif PS_COLCLIP == 3\n" + " // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n" " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n" + "#endif\n" "\n" " // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n" " //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n" - "#endif\n" "}\n" "\n" "void ps_main()\n" @@ -1453,14 +1472,16 @@ static const char* tfx_fs_all_glsl = " c.a = 0.5f;\n" "#endif\n" "\n" - " float alpha = c.a * 2.0;\n" + " // Must be done before alpha correction\n" + " float alpha = c.a * 255.0f / 128.0f;\n" "\n" - "#if (PS_AOUT != 0) // 16 bit output\n" + " // Correct the ALPHA value based on the output format\n" + " // FIXME add support of alpha mask to replace properly PS_AOUT\n" + "#if (PS_DFMT == FMT_16) || (PS_AOUT)\n" " float a = 128.0f / 255.0; // alpha output will be 0x80\n" - "\n" " c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n" - "#elif (PS_FBA != 0)\n" - " if(c.a < 0.5) c.a += 0.5;\n" + "#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n" + " if(c.a < 0.5) c.a += 128.0f/255.0f;\n" "#endif\n" "\n" " // Get first primitive that will write a failling alpha value\n" From c43ddaec4f765bddd2775a2d64571a32c52c6ad8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 26 May 2015 17:03:13 +0200 Subject: [PATCH 4/4] gsdx: add Castlevania hack explanation --- plugins/GSdx/GSState.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 16fbd40b49..c55bdf73f3 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -4314,6 +4314,18 @@ bool GSC_Castlevania(const GSFrameInfo& fi, int& skip) { if(skip == 0) { + // This hack removes the shadows and globally darker image + // I think there are 2 issues on GSdx + // + // 1/ potential not correctly supported colclip. + // + // 2/ use of a 32 bits format to emulate a 16 bit formats + // For example, if you blend 64 time the value 4 on a dark destination pixels + // + // FMT32: 4*64 = 256 <= white pixels + // + // FMT16: output of blending will always be 0 because the 3 lsb of color is dropped. + // Therefore the pixel remains dark !!! if(fi.TME && fi.FBP == 0 && fi.TBP0 && fi.TPSM == 10 && fi.FBMSK == 0xFFFFFF) { skip = 2;