gsdx-ogl: more blend rework to support accurate_colclip

So far few blending equations are implemented in PS. It is only
for test the behavior on GoW
This commit is contained in:
Gregory Hainaut 2015-05-20 00:51:37 +02:00
parent c5341a2711
commit 8d3e3e6c5b
7 changed files with 115 additions and 68 deletions

View File

@ -1135,6 +1135,9 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
void GSDeviceOGL::OMSetBlendState(GSBlendStateOGL* bs, float bf)
{
// SW date might change the enable state without updating the object
// Time to remove this micro-optimization
#if 0
// State is checkd inside the object but worst case is 8 comparaisons
if (m_state.bs != bs || m_state.bf != bf)
{
@ -1143,6 +1146,9 @@ void GSDeviceOGL::OMSetBlendState(GSBlendStateOGL* bs, float bf)
bs->SetupBlend(bf);
}
#else
bs->SetupBlend(bf);
#endif
}
void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor)

View File

@ -47,7 +47,6 @@ class GSBlendStateOGL {
GLenum m_func_sRGB;
GLenum m_func_dRGB;
bool m_constant_factor;
int m_bogus;
public:
@ -56,7 +55,6 @@ public:
, m_func_sRGB(0)
, m_func_dRGB(0)
, m_constant_factor(false)
, m_bogus(0)
{}
void SetRGB(GLenum op, GLenum src, GLenum dst)
@ -67,10 +65,6 @@ public:
if (IsConstant(src) || IsConstant(dst)) m_constant_factor = true;
}
void SetBogus(int bogus) { m_bogus = bogus; }
int GetBogus() { return m_bogus; }
void RevertOp()
{
if(m_equation_RGB == GL_FUNC_ADD)
@ -95,11 +89,6 @@ public:
glDisable(GL_BLEND);
}
#ifdef ENABLE_OGL_DEBUG
if (m_bogus & A_MAX) {
GL_INS("!!! Bogus blending effect used (%d) !!!", m_bogus);
}
#endif
if (m_enable) {
if (HasConstantFactor()) {
if (GLState::bf != factor) {
@ -327,14 +316,18 @@ class GSDeviceOGL : public GSDevice
uint32 wmt:2;
uint32 ltf:1;
uint32 blend:4;
uint32 _free1:4;
// Word 2
uint32 blend:8;
uint32 _free2:24;
};
uint32 key;
uint64 key;
};
// FIXME is the & useful ?
operator uint32() {return key & 0xffffffff;}
operator uint64() {return key;}
PSSelector() : key(0) {}
};
@ -528,7 +521,7 @@ class GSDeviceOGL : public GSDevice
GLuint m_gs;
GLuint m_ps_ss[1<<3];
GSDepthStencilOGL* m_om_dss[1<<6];
hash_map<uint32, GLuint > m_ps;
hash_map<uint64, GLuint > m_ps;
hash_map<uint32, GSBlendStateOGL* > m_om_bs;
GLuint m_apitrace;
@ -632,7 +625,7 @@ class GSDeviceOGL : public GSDevice
void SetupPS(PSSelector sel);
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
void SetupSampler(PSSamplerSelector ssel);
int SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending = false);
GLuint GetSamplerID(PSSamplerSelector ssel);
GLuint GetPaletteSamplerID();

View File

@ -31,6 +31,7 @@ GSRendererOGL::GSRendererOGL()
m_accurate_blend = theApp.GetConfig("accurate_blend", 0);
m_accurate_date = theApp.GetConfig("accurate_date", 0);
m_accurate_colclip = theApp.GetConfig("accurate_colclip", 0);
UserHacks_AlphaHack = theApp.GetConfig("UserHacks_AlphaHack", 0);
UserHacks_AlphaStencil = theApp.GetConfig("UserHacks_AlphaStencil", 0);
@ -51,7 +52,7 @@ GSRendererOGL::GSRendererOGL()
bool GSRendererOGL::CreateDevice(GSDevice* dev)
{
if(!GSRenderer::CreateDevice(dev))
if (!GSRenderer::CreateDevice(dev))
return false;
return true;
@ -70,7 +71,7 @@ void GSRendererOGL::EmulateGS()
// assume vertices are tightly packed and sequentially indexed (it should be the case)
if(m_vertex.next >= 2)
if (m_vertex.next >= 2)
{
size_t count = m_vertex.next;
@ -245,7 +246,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Blend
if(!IsOpaque())
if (!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
@ -254,9 +255,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
om_bsel.c = context->ALPHA.C;
om_bsel.d = context->ALPHA.D;
if(env.PABE.PABE)
if (env.PABE.PABE)
{
if(om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
if (om_bsel.a == 0 && om_bsel.b == 1 && om_bsel.c == 0 && om_bsel.d == 1)
{
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
@ -340,7 +341,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// om
if(context->TEST.ZTE)
if (context->TEST.ZTE)
{
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
@ -360,11 +361,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
if(om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
if (context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt.m_max.p.z > 0xffffff)
if (m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
@ -376,9 +377,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
else if (context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt.m_max.p.z > 0xffff)
if (m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
@ -406,7 +407,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
//The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges.
if(rt->LikelyOffset)
if (rt->LikelyOffset)
{
ox2 *= rt->OffsetHack_modx;
oy2 *= rt->OffsetHack_mody;
@ -421,44 +422,53 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (DATE_GL45) {
ps_sel.date = 5 + context->TEST.DATM;
} else if(DATE) {
} else if (DATE) {
if (DATE_GL42)
ps_sel.date = 1 + context->TEST.DATM;
else
om_dssel.date = 1;
}
bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST;
if(colclip_wrap)
{
#ifdef ENABLE_OGL_DEBUG
const char *col[3] = {"Cs", "Cd", "0"};
#endif
if (context->ALPHA.A == context->ALPHA.B) {
bool colclip_wrap = env.COLCLAMP.CLAMP == 0 && !tex && PRIM->PRIM != GS_POINTLIST && !m_accurate_colclip;
bool acc_colclip_wrap = env.COLCLAMP.CLAMP == 0 && m_accurate_colclip;
if (context->ALPHA.A == context->ALPHA.B) { // Optimize-away colclip
if (colclip_wrap || acc_colclip_wrap) {
// No addition neither substraction so no risk of overflow the [0:255] range.
GL_INS("Disable COLCLIP wrap: blending is a plain copy of %s", col[context->ALPHA.D]);
colclip_wrap = false;
} else {
GL_INS("Enable COLCLIP wrap (blending is %d/%d/%d/%d)",
context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
ps_sel.colclip = 1;
acc_colclip_wrap = false;
#ifdef ENABLE_OGL_DEBUG
const char *col[3] = {"Cs", "Cd", "0"};
GL_INS("Disable COLCLIP wrap: blending is a plain copy of %s", col[context->ALPHA.D]);
#endif
}
}
if (colclip_wrap) {
ps_sel.colclip = 1;
GL_INS("Enable COLCLIP wrap (blending is %d/%d/%d/%d)",
context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
} else if (acc_colclip_wrap) {
ps_sel.colclip = 3;
GL_INS("Enable accurate COLCLIP wrap (blending is %d/%d/%d/%d)",
context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
} else if (env.COLCLAMP.CLAMP == 0) {
GL_INS("COLCLIP wrap not supported (blending is %d/%d/%d/%d)",
context->ALPHA.A, context->ALPHA.B, context->ALPHA.C, context->ALPHA.D);
}
ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = context->FBA.FBA;
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
if(UserHacks_AlphaHack) ps_sel.aout = 1;
if (UserHacks_AlphaHack) ps_sel.aout = 1;
if(PRIM->FGE)
if (PRIM->FGE)
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)
if (context->TEST.ATE)
ps_sel.atst = context->TEST.ATST;
else
ps_sel.atst = ATST_ALWAYS;
@ -489,7 +499,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bool spritehack = false;
int atst = ps_sel.atst;
if(tex)
if (tex)
{
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[context->TEX0.PSM];
const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[context->TEX0.CPSM] : psm;
@ -519,7 +529,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
if (PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
ps_sel.fst = 1;
@ -565,6 +575,28 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
}
// Compute the blending equation to detect special case
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
int bogus_blend = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || (acc_colclip_wrap);
if (sw_blending) {
GL_INS("!!! SW blending effect used (0x%x) !!!", bogus_blend);
// select a shader that support blending
ps_sel.blend = bogus_blend & 0xFF;
dev->PSSetShaderResource(3, rt);
// Require the fix alpha vlaue
if (context->ALPHA.C == 2) {
ps_cb.AlphaCoeff = GSVector4((float)(int)context->ALPHA.FIX / 0x80);
}
// No need to flush for every primitive
require_barrier = !(bogus_blend & NO_BAR);
}
// WARNING: setup of the program must be done first. So you can setup
// 1/ subroutine uniform
// 2/ bindless texture uniform
@ -574,29 +606,16 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->SetupPS(ps_sel);
// rs
uint8 afix = context->ALPHA.FIX;
GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
uint8 afix = context->ALPHA.FIX;
GL_PUSH("IA");
SetupIA();
GL_POP();
dev->OMSetColorMaskState(om_csel);
// Handle blending with care
int bogus_blend = dev->SetupOM(om_dssel, om_bsel, afix);
if (m_accurate_blend && (bogus_blend & A_MAX)) {
ps_sel.blend = bogus_blend & 0xF;
dev->SetupPS(ps_sel);
dev->PSSetShaderResource(3, rt);
if (context->ALPHA.C == 2) {
ps_cb.AlphaCoeff = GSVector4((float)(int)afix / 0x80);
}
require_barrier = !(bogus_blend & NO_BAR);
}
dev->SetupOM(om_dssel, om_bsel, afix, sw_blending);
dev->SetupCB(&vs_cb, &ps_cb);
@ -634,7 +653,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
dev->OMSetRenderTargets(rt, ds, &scissor);
if(context->TEST.DoFirstPass())
if (context->TEST.DoFirstPass())
{
SendDraw(require_barrier);
@ -657,7 +676,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
}
if(context->TEST.DoSecondPass())
if (context->TEST.DoSecondPass())
{
ASSERT(!env.PABE.PABE);
@ -685,7 +704,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
default: __assume(0);
}
if(z || r || g || b || a)
if (z || r || g || b || a)
{
om_dssel.zwe = z;
om_csel.wr = r;

View File

@ -33,6 +33,7 @@ class GSRendererOGL : public GSRendererHW
GSVector2 m_pixelcenter;
bool m_accurate_blend;
bool m_accurate_date;
bool m_accurate_colclip;
bool UserHacks_AlphaHack;
bool UserHacks_AlphaStencil;

View File

@ -109,7 +109,6 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, uint8 afix)
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
bs->SetBogus(m_blendMapD3D9[i].bogus);
if (m_blendMapD3D9[i].bogus & A_MAX) {
if (!theApp.GetConfig("accurate_blend", 0)) {
@ -236,12 +235,21 @@ GLuint GSDeviceOGL::GetPaletteSamplerID()
return m_palette_ss;
}
int GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix, bool sw_blending)
{
GSDepthStencilOGL* dss = m_om_dss[dssel];
OMSetDepthStencilState(dss, 1);
if (sw_blending) {
if (GLState::blend) {
GLState::blend = false;
glDisable(GL_BLEND);
}
// No hardware blending thank
return;
}
// *************************************************************
// Static
// *************************************************************
@ -260,6 +268,4 @@ int GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uin
// Dynamic
// *************************************************************
OMSetBlendState(bs, (float)(int)afix / 0x80);
return bs->GetBogus();
}

View File

@ -366,7 +366,9 @@ vec4 ps_color()
fog(c, PSin_t.z);
#if (PS_COLCLIP < 3)
colclip(c);
#endif
#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
c.rgb = vec3(1.0f, 1.0f, 1.0f);
@ -408,6 +410,15 @@ void ps_blend(inout vec4 c, in float As)
#elif PS_BLEND == 12
// { A_MAX | 12 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F
c.rgb = rt.rgb * (Af.x + 1.0f) - c.rgb * Af.x;
#elif PS_BLEND == 45
// { NO_BAR | 45 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F
c.rgb = - c.rgb * Af.x;
#elif PS_BLEND > 0
error not yet implemented;
#endif
#if PS_COLCLIP == 3
c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;
#endif
}

View File

@ -1108,7 +1108,9 @@ static const char* tfx_fs_all_glsl =
"\n"
" fog(c, PSin_t.z);\n"
"\n"
"#if (PS_COLCLIP < 3)\n"
" colclip(c);\n"
"#endif\n"
"\n"
"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
" c.rgb = vec3(1.0f, 1.0f, 1.0f);\n"
@ -1150,6 +1152,15 @@ static const char* tfx_fs_all_glsl =
"#elif PS_BLEND == 12\n"
" // { A_MAX | 12 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , //*1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F\n"
" c.rgb = rt.rgb * (Af.x + 1.0f) - c.rgb * Af.x;\n"
"#elif PS_BLEND == 45\n"
" // { NO_BAR | 45 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F\n"
" c.rgb = - c.rgb * Af.x;\n"
"#elif PS_BLEND > 0\n"
" error not yet implemented;\n"
"#endif\n"
"\n"
"#if PS_COLCLIP == 3\n"
" c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n"
"#endif\n"
"}\n"
"\n"