Merge pull request #555 from PCSX2/real-fb-format

GSdx: better framebuffer format
This commit is contained in:
Gregory Hainaut 2015-06-01 11:48:07 +02:00
commit 2cbde89084
7 changed files with 106 additions and 37 deletions

View File

@ -627,6 +627,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
+ format("#define PS_WMT %d\n", sel.wmt)
+ format("#define PS_FMT %d\n", sel.fmt)
+ format("#define PS_IFMT %d\n", sel.ifmt)
+ format("#define PS_DFMT %d\n", sel.dfmt)
+ format("#define PS_AEM %d\n", sel.aem)
+ format("#define PS_TFX %d\n", sel.tfx)
+ format("#define PS_TCC %d\n", sel.tcc)

View File

@ -321,7 +321,9 @@ class GSDeviceOGL : public GSDevice
// Word 2
uint32 blend:8;
uint32 _free2:24;
uint32 dfmt:2;
uint32 _free2:22;
};
uint64 key;
@ -617,7 +619,7 @@ class GSDeviceOGL : public GSDevice
GLuint CreateSampler(bool bilinear, bool tau, bool tav);
GLuint CreateSampler(PSSamplerSelector sel);
GSDepthStencilOGL* CreateDepthStencil(OMDepthStencilSelector dssel);
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, uint8 afix);
GSBlendStateOGL* CreateBlend(OMBlendSelector bsel, float afix);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
@ -626,7 +628,7 @@ class GSDeviceOGL : public GSDevice
void SetupPS(PSSelector sel);
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
void SetupSampler(PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending = false);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending = false);
GLuint GetSamplerID(PSSamplerSelector ssel);
GLuint GetPaletteSamplerID();

View File

@ -249,16 +249,22 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GSDeviceOGL::OMColorMaskSelector om_csel;
GSDeviceOGL::OMDepthStencilSelector om_dssel;
// Format of the output
ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
GIFRegALPHA ALPHA = context->ALPHA;
float afix = (float)context->ALPHA.FIX / 0x80;
// Blend
if (!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
om_bsel.a = ALPHA.A;
om_bsel.b = ALPHA.B;
om_bsel.c = ALPHA.C;
om_bsel.d = ALPHA.D;
if (env.PABE.PABE)
{
@ -282,6 +288,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
if (ps_sel.dfmt == 1) {
// 24 bits no alpha channel so use 1.0f fix factor as equivalent
ALPHA.C = 2;
afix = 1.0f;
// Disable writing of the alpha channel
om_csel.wa = 0;
}
if (DATE) {
if (GLLoader::found_GL_ARB_texture_barrier && !PrimitiveOverlap()) {
@ -436,25 +449,25 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST && !m_accurate_colclip;
bool acc_colclip_wrap = env.COLCLAMP.CLAMP == 0 && m_accurate_colclip;
if (context->ALPHA.A == context->ALPHA.B) { // Optimize-away colclip
if (ALPHA.A == ALPHA.B) { // Optimize-away colclip
// No addition neither substraction so no risk of overflow the [0:255] range.
colclip_wrap = false;
acc_colclip_wrap = false;
#ifdef ENABLE_OGL_DEBUG
if (colclip_wrap || acc_colclip_wrap) {
const char *col[3] = {"Cs", "Cd", "0"};
GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[context->ALPHA.D]);
GL_INS("COLCLIP: DISABLED: blending is a plain copy of %s", col[ALPHA.D]);
}
#endif
}
if (colclip_wrap) {
ps_sel.colclip = 1;
GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
} else if (acc_colclip_wrap) {
ps_sel.colclip = 3;
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
} else if (env.COLCLAMP.CLAMP == 0 && (context->ALPHA.A != context->ALPHA.B)) {
GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
} else if (env.COLCLAMP.CLAMP == 0 && (ALPHA.A != ALPHA.B)) {
GL_INS("COLCLIP NOT SUPPORTED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
}
ps_sel.fba = context->FBA.FBA;
@ -611,8 +624,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->PSSetShaderResource(3, rt);
// Require the fix alpha vlaue
if (context->ALPHA.C == 2) {
ps_cb.AlphaCoeff = GSVector4((float)(int)context->ALPHA.FIX / 0x80);
if (ALPHA.C == 2) {
ps_cb.AlphaCoeff = GSVector4(afix);
}
// No need to flush for every primitive
@ -630,7 +643,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->SetupPS(ps_sel);
// rs
uint8 afix = context->ALPHA.FIX;
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());

View File

@ -4315,6 +4315,18 @@ bool GSC_Castlevania(const GSFrameInfo& fi, int& skip)
{
if(skip == 0)
{
// This hack removes the shadows and globally darker image
// I think there are 2 issues on GSdx
//
// 1/ potential not correctly supported colclip.
//
// 2/ use of a 32 bits format to emulate a 16 bit formats
// For example, if you blend 64 time the value 4 on a dark destination pixels
//
// FMT32: 4*64 = 256 <= white pixels
//
// FMT16: output of blending will always be 0 because the 3 lsb of color is dropped.
// Therefore the pixel remains dark !!!
if(fi.TME && fi.FBP == 0 && fi.TBP0 && fi.TPSM == 10 && fi.FBMSK == 0xFFFFFF)
{
skip = 2;

View File

@ -100,7 +100,7 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
return dss;
}
GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix)
{
GSBlendStateOGL* bs = new GSBlendStateOGL();
@ -119,7 +119,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, GL_ONE);
}
const string afixstr = format("%d >> 7", afix);
const string afixstr = format("%f", afix);
const char *col[3] = {"Cs", "Cd", "0"};
const char *alpha[3] = {"As", "Ad", afixstr.c_str()};
fprintf(stderr, "Impossible blend for D3D: (%s - %s) * %s + %s\n", col[bsel.a], col[bsel.b], alpha[bsel.c], col[bsel.d]);
@ -235,7 +235,7 @@ GLuint GSDeviceOGL::GetPaletteSamplerID()
return m_palette_ss;
}
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending)
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending)
{
GSDepthStencilOGL* dss = m_om_dss[dssel];
@ -267,5 +267,5 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, ui
// *************************************************************
// Dynamic
// *************************************************************
OMSetBlendState(bs, (float)(int)afix / 0x80);
OMSetBlendState(bs, afix);
}

View File

@ -404,8 +404,13 @@ vec4 ps_color()
void ps_blend(inout vec4 c, in float As)
{
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
#if PS_DFMT == FMT_24
float Ad = 1.0f;
#else
// FIXME FMT_16 case
// FIXME Ad or Ad * 2?
float Ad = rt.a;
float Ad = rt.a * 255.0f / 128.0f;
#endif
// Let the compiler do its jobs !
vec3 Cd = rt.rgb;
vec3 Cs = c.rgb;
@ -640,12 +645,26 @@ void ps_blend(inout vec4 c, in float As)
#endif
#if PS_COLCLIP == 3
// FIXME dithering
// Correct the Color value based on the output format
#if PS_COLCLIP != 3
// Standard Clamp
c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
#endif
#if PS_DFMT == FMT_16
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;
#elif PS_COLCLIP == 3
// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;
#endif
// Don't compile => unable to find compatible overloaded function "mod(vec3)"
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
#endif
}
void ps_main()
@ -700,14 +719,16 @@ void ps_main()
c.a = 0.5f;
#endif
float alpha = c.a * 2.0;
// Must be done before alpha correction
float alpha = c.a * 255.0f / 128.0f;
#if (PS_AOUT != 0) // 16 bit output
// Correct the ALPHA value based on the output format
// FIXME add support of alpha mask to replace properly PS_AOUT
#if (PS_DFMT == FMT_16) || (PS_AOUT)
float a = 128.0f / 255.0; // alpha output will be 0x80
c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
#elif (PS_FBA != 0)
if(c.a < 0.5) c.a += 0.5;
#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
if(c.a < 0.5) c.a += 128.0f/255.0f;
#endif
// Get first primitive that will write a failling alpha value

View File

@ -1157,8 +1157,13 @@ static const char* tfx_fs_all_glsl =
"void ps_blend(inout vec4 c, in float As)\n"
"{\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
"#if PS_DFMT == FMT_24\n"
" float Ad = 1.0f;\n"
"#else\n"
" // FIXME FMT_16 case\n"
" // FIXME Ad or Ad * 2?\n"
" float Ad = rt.a;\n"
" float Ad = rt.a * 255.0f / 128.0f;\n"
"#endif\n"
" // Let the compiler do its jobs !\n"
" vec3 Cd = rt.rgb;\n"
" vec3 Cs = c.rgb;\n"
@ -1393,12 +1398,26 @@ static const char* tfx_fs_all_glsl =
"\n"
"#endif\n"
"\n"
"#if PS_COLCLIP == 3\n"
" // FIXME dithering\n"
"\n"
" // Correct the Color value based on the output format\n"
"#if PS_COLCLIP != 3\n"
" // Standard Clamp\n"
" c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
"#endif\n"
"\n"
"#if PS_DFMT == FMT_16\n"
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
"\n"
" // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n"
"#elif PS_COLCLIP == 3\n"
" // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n"
"#endif\n"
"\n"
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
"#endif\n"
"}\n"
"\n"
"void ps_main()\n"
@ -1453,14 +1472,16 @@ static const char* tfx_fs_all_glsl =
" c.a = 0.5f;\n"
"#endif\n"
"\n"
" float alpha = c.a * 2.0;\n"
" // Must be done before alpha correction\n"
" float alpha = c.a * 255.0f / 128.0f;\n"
"\n"
"#if (PS_AOUT != 0) // 16 bit output\n"
" // Correct the ALPHA value based on the output format\n"
" // FIXME add support of alpha mask to replace properly PS_AOUT\n"
"#if (PS_DFMT == FMT_16) || (PS_AOUT)\n"
" float a = 128.0f / 255.0; // alpha output will be 0x80\n"
"\n"
" c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
"#elif (PS_FBA != 0)\n"
" if(c.a < 0.5) c.a += 0.5;\n"
"#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
" if(c.a < 0.5) c.a += 128.0f/255.0f;\n"
"#endif\n"
"\n"
" // Get first primitive that will write a failling alpha value\n"