glsl: redo color/alpha management correction

Please test it!

GS supports 3 formats for the output:

32 bits: normal case
=> no change

24 bits: like 32 bits but without alpha channel
=> mask alpha channel (ie don't write it anymore)
=> Always uses 1.0f as blending coefficient

16 bits: RGB5A1, emulated by a 32 bits openGL texture. I think it will be more correct to use
a real 16 bits GL texture. Unfortunately it would cost several (slow) target conversions.
Anyway as a current solution
=>  apply a mask of 0xF8 on color when SW blending is used (improve Castlevania shadow)
unfortunately normal blending mode still uses the full range of colors!

This commit also corrects a couple of blending factor. 128/255 is equivalent to 1.0f in PS2, whereas GPU uses 1.0f. So the blending factor must be 255/128 instead of 2

Note: disable CRC hack and enable accurate_colclip to see Castlevania shadow ^^
(issue #380).
Note2: SW renderer is darker on Castlevania. I don't know why maybe linked to the 16 bits format poorly emulated
This commit is contained in:
Gregory Hainaut 2015-05-26 16:16:36 +02:00
parent 9ee3a173d0
commit 419dfe0544
3 changed files with 65 additions and 16 deletions

View File

@ -288,6 +288,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
} }
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
if (ps_sel.dfmt == 1) {
// 24 bits no alpha channel so use 1.0f fix factor as equivalent
ALPHA.C = 2;
afix = 1.0f;
// Disable writing of the alpha channel
om_csel.wa = 0;
}
if (DATE) { if (DATE) {
if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) { if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) {

View File

@ -404,8 +404,13 @@ vec4 ps_color()
void ps_blend(inout vec4 c, in float As) void ps_blend(inout vec4 c, in float As)
{ {
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
#if PS_DFMT == FMT_24
float Ad = 1.0f;
#else
// FIXME FMT_16 case
// FIXME Ad or Ad * 2? // FIXME Ad or Ad * 2?
float Ad = rt.a; float Ad = rt.a * 255.0f / 128.0f;
#endif
// Let the compiler do its jobs ! // Let the compiler do its jobs !
vec3 Cd = rt.rgb; vec3 Cd = rt.rgb;
vec3 Cs = c.rgb; vec3 Cs = c.rgb;
@ -640,12 +645,26 @@ void ps_blend(inout vec4 c, in float As)
#endif #endif
#if PS_COLCLIP == 3 // FIXME dithering
// Correct the Color value based on the output format
#if PS_COLCLIP != 3
// Standard Clamp
c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
#endif
#if PS_DFMT == FMT_16
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;
#elif PS_COLCLIP == 3
// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f; c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;
#endif
// Don't compile => unable to find compatible overloaded function "mod(vec3)" // Don't compile => unable to find compatible overloaded function "mod(vec3)"
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f; //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
#endif
} }
void ps_main() void ps_main()
@ -700,14 +719,16 @@ void ps_main()
c.a = 0.5f; c.a = 0.5f;
#endif #endif
float alpha = c.a * 2.0; // Must be done before alpha correction
float alpha = c.a * 255.0f / 128.0f;
#if (PS_AOUT != 0) // 16 bit output // Correct the ALPHA value based on the output format
// FIXME add support of alpha mask to replace properly PS_AOUT
#if (PS_DFMT == FMT_16) || (PS_AOUT)
float a = 128.0f / 255.0; // alpha output will be 0x80 float a = 128.0f / 255.0; // alpha output will be 0x80
c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a; c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
#elif (PS_FBA != 0) #elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
if(c.a < 0.5) c.a += 0.5; if(c.a < 0.5) c.a += 128.0f/255.0f;
#endif #endif
// Get first primitive that will write a failling alpha value // Get first primitive that will write a failling alpha value

View File

@ -1157,8 +1157,13 @@ static const char* tfx_fs_all_glsl =
"void ps_blend(inout vec4 c, in float As)\n" "void ps_blend(inout vec4 c, in float As)\n"
"{\n" "{\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n" " vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
"#if PS_DFMT == FMT_24\n"
" float Ad = 1.0f;\n"
"#else\n"
" // FIXME FMT_16 case\n"
" // FIXME Ad or Ad * 2?\n" " // FIXME Ad or Ad * 2?\n"
" float Ad = rt.a;\n" " float Ad = rt.a * 255.0f / 128.0f;\n"
"#endif\n"
" // Let the compiler do its jobs !\n" " // Let the compiler do its jobs !\n"
" vec3 Cd = rt.rgb;\n" " vec3 Cd = rt.rgb;\n"
" vec3 Cs = c.rgb;\n" " vec3 Cs = c.rgb;\n"
@ -1393,12 +1398,26 @@ static const char* tfx_fs_all_glsl =
"\n" "\n"
"#endif\n" "#endif\n"
"\n" "\n"
"#if PS_COLCLIP == 3\n" " // FIXME dithering\n"
"\n"
" // Correct the Color value based on the output format\n"
"#if PS_COLCLIP != 3\n"
" // Standard Clamp\n"
" c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
"#endif\n"
"\n"
"#if PS_DFMT == FMT_16\n"
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
"\n"
" // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n"
"#elif PS_COLCLIP == 3\n"
" // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n" " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n"
"#endif\n"
"\n" "\n"
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n" " // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n" " //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
"#endif\n"
"}\n" "}\n"
"\n" "\n"
"void ps_main()\n" "void ps_main()\n"
@ -1453,14 +1472,16 @@ static const char* tfx_fs_all_glsl =
" c.a = 0.5f;\n" " c.a = 0.5f;\n"
"#endif\n" "#endif\n"
"\n" "\n"
" float alpha = c.a * 2.0;\n" " // Must be done before alpha correction\n"
" float alpha = c.a * 255.0f / 128.0f;\n"
"\n" "\n"
"#if (PS_AOUT != 0) // 16 bit output\n" " // Correct the ALPHA value based on the output format\n"
" // FIXME add support of alpha mask to replace properly PS_AOUT\n"
"#if (PS_DFMT == FMT_16) || (PS_AOUT)\n"
" float a = 128.0f / 255.0; // alpha output will be 0x80\n" " float a = 128.0f / 255.0; // alpha output will be 0x80\n"
"\n"
" c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n" " c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
"#elif (PS_FBA != 0)\n" "#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
" if(c.a < 0.5) c.a += 0.5;\n" " if(c.a < 0.5) c.a += 128.0f/255.0f;\n"
"#endif\n" "#endif\n"
"\n" "\n"
" // Get first primitive that will write a failling alpha value\n" " // Get first primitive that will write a failling alpha value\n"