mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: optimize Cs * As + Cd and Cs * Af + Cd blending
Basically the code does the alpha multiplication in the shader therefore the blend unit only does a pure addition. This way the multiplication is accurate and accurate_blending doesn't requires a costly barrier. This code also avoid variable duplication to make the code more separated. Hopefully blending can be done in a separated function It is preliminary work to support fast color clipping with HDR v2: fix assertion compilation failure v3: fix regression in not accurate mode v3: Cs * As/Af is not an accumulation Those cases don't need the Cd addition and were already optimized anyway Fix a regression on GoW2
This commit is contained in:
parent
12fdc37599
commit
ae8df002af
|
@ -666,6 +666,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
||||||
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
||||||
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
||||||
|
+ format("#define PS_BLEND_ACCU %d\n", sel.blend_accu)
|
||||||
;
|
;
|
||||||
|
|
||||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||||
|
|
|
@ -355,6 +355,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 blend_b:2;
|
uint32 blend_b:2;
|
||||||
uint32 blend_c:2;
|
uint32 blend_c:2;
|
||||||
uint32 blend_d:2;
|
uint32 blend_d:2;
|
||||||
|
uint32 blend_accu:1;
|
||||||
uint32 dfmt:2;
|
uint32 dfmt:2;
|
||||||
|
|
||||||
uint32 _free2:21;
|
uint32 _free2:21;
|
||||||
|
@ -461,8 +462,10 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 c:2;
|
uint32 c:2;
|
||||||
uint32 d:2;
|
uint32 d:2;
|
||||||
uint32 negative:1;
|
uint32 negative:1;
|
||||||
|
uint32 accu:1;
|
||||||
|
uint32 ps:1;
|
||||||
|
|
||||||
uint32 _free:22;
|
uint32 _free:20;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
|
@ -470,15 +473,16 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 _abe:1;
|
uint32 _abe:1;
|
||||||
uint32 abcd:8;
|
uint32 abcd:8;
|
||||||
uint32 _negative:1;
|
uint32 _negative:1;
|
||||||
|
uint32 _accu:1;
|
||||||
|
uint32 _ps:1;
|
||||||
|
|
||||||
uint32 _free2:22;
|
uint32 _free2:20;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
uint32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME is the & useful ?
|
operator uint32() {return key;}
|
||||||
operator uint32() {return key & 0x3ff;}
|
|
||||||
|
|
||||||
OMBlendSelector() : key(0) {}
|
OMBlendSelector() : key(0) {}
|
||||||
|
|
||||||
|
@ -666,7 +670,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
void SetupPS(PSSelector sel);
|
void SetupPS(PSSelector sel);
|
||||||
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
|
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
|
||||||
void SetupSampler(PSSamplerSelector ssel);
|
void SetupSampler(PSSamplerSelector ssel);
|
||||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending = false);
|
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix);
|
||||||
GLuint GetSamplerID(PSSamplerSelector ssel);
|
GLuint GetSamplerID(PSSamplerSelector ssel);
|
||||||
GLuint GetPaletteSamplerID();
|
GLuint GetPaletteSamplerID();
|
||||||
|
|
||||||
|
|
|
@ -452,8 +452,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
|
|
||||||
const GIFRegALPHA& ALPHA = context->ALPHA;
|
const GIFRegALPHA& ALPHA = context->ALPHA;
|
||||||
float afix = (float)context->ALPHA.FIX / 0x80;
|
float afix = (float)context->ALPHA.FIX / 0x80;
|
||||||
bool sw_blending = false;
|
|
||||||
bool colclip_wrap = false;
|
|
||||||
|
|
||||||
if (!IsOpaque() && rt)
|
if (!IsOpaque() && rt)
|
||||||
{
|
{
|
||||||
|
@ -471,7 +469,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
{
|
{
|
||||||
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
|
// this works because with PABE alpha blending is on when alpha >= 0x80, but since the pixel shader
|
||||||
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
|
// cannot output anything over 0x80 (== 1.0) blending with 0x80 or turning it off gives the same result
|
||||||
|
|
||||||
om_bsel.abe = 0;
|
om_bsel.abe = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -489,24 +486,34 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
|
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
|
||||||
int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
|
int blend_flag = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
|
||||||
// SW Blend is (nearly) free. Let's use it.
|
// SW Blend is (nearly) free. Let's use it.
|
||||||
int free_blend = m_sw_blending && ((blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO));
|
bool free_blend = (blend_flag & NO_BAR) || (m_prim_overlap == PRIM_OVERLAP_NO);
|
||||||
|
// We really need SW blending for this one, barely used
|
||||||
|
bool impossible_blend = (blend_flag & A_MAX);
|
||||||
|
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
||||||
|
ps_sel.blend_accu = m_sw_blending && ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1;
|
||||||
|
om_bsel.accu = ps_sel.blend_accu;
|
||||||
|
|
||||||
|
bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend /*|| ps_sel.blend_accu*/);
|
||||||
|
|
||||||
// Color clip
|
// Color clip
|
||||||
bool acc_colclip_wrap = false;
|
bool acc_colclip_wrap = false;
|
||||||
if (env.COLCLAMP.CLAMP == 0) {
|
if (env.COLCLAMP.CLAMP == 0) {
|
||||||
colclip_wrap = !tex && PRIM->PRIM != GS_POINTLIST;
|
// Not supported yet in colclip
|
||||||
acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || free_blend);
|
om_bsel.accu = ps_sel.blend_accu = 0;
|
||||||
|
|
||||||
|
acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base);
|
||||||
if (acc_colclip_wrap) {
|
if (acc_colclip_wrap) {
|
||||||
colclip_wrap = false;
|
|
||||||
ps_sel.colclip = 3;
|
ps_sel.colclip = 3;
|
||||||
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
||||||
} else if (colclip_wrap) {
|
} else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) {
|
||||||
|
// Standard (inaccurate) colclip
|
||||||
ps_sel.colclip = 1;
|
ps_sel.colclip = 1;
|
||||||
GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
sw_blending_base |= m_sw_blending && ps_sel.blend_accu;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool impossible_blend = m_sw_blending && (blend_flag & A_MAX);
|
|
||||||
bool all_blend_sw;
|
bool all_blend_sw;
|
||||||
switch (m_sw_blending) {
|
switch (m_sw_blending) {
|
||||||
case ACC_BLEND_ULTRA: all_blend_sw = true; break;
|
case ACC_BLEND_ULTRA: all_blend_sw = true; break;
|
||||||
|
@ -516,8 +523,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
default: all_blend_sw = false; break;
|
default: all_blend_sw = false; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
sw_blending = free_blend // Free case
|
bool sw_blending = sw_blending_base // Free case or Impossible blend
|
||||||
|| impossible_blend || all_blend_sw // Impossible blend or all
|
|| all_blend_sw // all blend
|
||||||
|| acc_colclip_wrap // accurate colclip
|
|| acc_colclip_wrap // accurate colclip
|
||||||
|| ps_sel.fbmask; // accurate fbmask
|
|| ps_sel.fbmask; // accurate fbmask
|
||||||
|
|
||||||
|
@ -536,6 +543,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
#endif
|
#endif
|
||||||
if (sw_blending && om_bsel.abe) {
|
if (sw_blending && om_bsel.abe) {
|
||||||
// select a shader that support blending
|
// select a shader that support blending
|
||||||
|
om_bsel.ps = 1;
|
||||||
ps_sel.blend_a = om_bsel.a;
|
ps_sel.blend_a = om_bsel.a;
|
||||||
ps_sel.blend_b = om_bsel.b;
|
ps_sel.blend_b = om_bsel.b;
|
||||||
ps_sel.blend_c = om_bsel.c;
|
ps_sel.blend_c = om_bsel.c;
|
||||||
|
@ -549,18 +557,18 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
}
|
}
|
||||||
|
|
||||||
// No need to flush for every primitive
|
// No need to flush for every primitive
|
||||||
require_barrier |= !(blend_flag & NO_BAR);
|
require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu;
|
||||||
} else {
|
} else {
|
||||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||||
}
|
if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
|
||||||
}
|
|
||||||
|
|
||||||
if (ps_sel.dfmt == 1) {
|
|
||||||
if (ALPHA.C == 1 && !sw_blending) {
|
|
||||||
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
||||||
om_bsel.c = 2;
|
om_bsel.c = 2;
|
||||||
afix = 1.0f;
|
afix = 1.0f;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ps_sel.dfmt == 1) {
|
||||||
// Disable writing of the alpha channel
|
// Disable writing of the alpha channel
|
||||||
om_csel.wa = 0;
|
om_csel.wa = 0;
|
||||||
}
|
}
|
||||||
|
@ -864,7 +872,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
GL_POP();
|
GL_POP();
|
||||||
|
|
||||||
dev->OMSetColorMaskState(om_csel);
|
dev->OMSetColorMaskState(om_csel);
|
||||||
dev->SetupOM(om_dssel, om_bsel, afix, sw_blending);
|
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||||
|
|
||||||
dev->SetupCB(&vs_cb, &ps_cb);
|
dev->SetupCB(&vs_cb, &ps_cb);
|
||||||
|
|
||||||
|
@ -907,9 +915,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
{
|
{
|
||||||
SendDraw(require_barrier);
|
SendDraw(require_barrier);
|
||||||
|
|
||||||
if (colclip_wrap)
|
if (ps_sel.colclip == 1)
|
||||||
{
|
{
|
||||||
ASSERT(!sw_blending);
|
ASSERT(!om_bsel.ps);
|
||||||
GL_PUSH("COLCLIP");
|
GL_PUSH("COLCLIP");
|
||||||
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
|
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
|
||||||
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
|
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
|
||||||
|
@ -963,13 +971,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
om_csel.wa = a;
|
om_csel.wa = a;
|
||||||
|
|
||||||
dev->OMSetColorMaskState(om_csel);
|
dev->OMSetColorMaskState(om_csel);
|
||||||
dev->SetupOM(om_dssel, om_bsel, afix, sw_blending);
|
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||||
|
|
||||||
SendDraw(require_barrier);
|
SendDraw(require_barrier);
|
||||||
|
|
||||||
if (colclip_wrap)
|
if (ps_sel.colclip == 1)
|
||||||
{
|
{
|
||||||
ASSERT(!sw_blending);
|
ASSERT(!om_bsel.ps);
|
||||||
GL_PUSH("COLCLIP");
|
GL_PUSH("COLCLIP");
|
||||||
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
|
GSDeviceOGL::OMBlendSelector om_bselneg(om_bsel);
|
||||||
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
|
GSDeviceOGL::PSSelector ps_selneg(ps_sel);
|
||||||
|
|
|
@ -104,6 +104,9 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix)
|
||||||
{
|
{
|
||||||
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
||||||
|
|
||||||
|
if (bsel.accu)
|
||||||
|
bs->SetRGB(GL_FUNC_ADD, GL_ONE, GL_ONE);
|
||||||
|
else
|
||||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
||||||
|
|
||||||
if (m_blendMapD3D9[i].bogus & A_MAX) {
|
if (m_blendMapD3D9[i].bogus & A_MAX) {
|
||||||
|
@ -187,13 +190,13 @@ GLuint GSDeviceOGL::GetPaletteSamplerID()
|
||||||
return m_palette_ss;
|
return m_palette_ss;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix, bool sw_blending)
|
void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float afix)
|
||||||
{
|
{
|
||||||
GSDepthStencilOGL* dss = m_om_dss[dssel];
|
GSDepthStencilOGL* dss = m_om_dss[dssel];
|
||||||
|
|
||||||
OMSetDepthStencilState(dss, 1);
|
OMSetDepthStencilState(dss, 1);
|
||||||
|
|
||||||
if (sw_blending) {
|
if (bsel.ps && !bsel.accu) {
|
||||||
if (GLState::blend) {
|
if (GLState::blend) {
|
||||||
GLState::blend = false;
|
GLState::blend = false;
|
||||||
glDisable(GL_BLEND);
|
glDisable(GL_BLEND);
|
||||||
|
|
|
@ -453,6 +453,9 @@ void ps_blend(inout vec4 Color, float As)
|
||||||
|
|
||||||
#if PS_BLEND_A == PS_BLEND_B
|
#if PS_BLEND_A == PS_BLEND_B
|
||||||
Color.rgb = D;
|
Color.rgb = D;
|
||||||
|
#elif PS_BLEND_ACCU == 1
|
||||||
|
// The D addition will be done in the blending unit
|
||||||
|
Color.rgb = trunc(A * C);
|
||||||
#else
|
#else
|
||||||
Color.rgb = trunc((A - B) * C + D);
|
Color.rgb = trunc((A - B) * C + D);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1316,6 +1316,9 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
"#if PS_BLEND_A == PS_BLEND_B\n"
|
"#if PS_BLEND_A == PS_BLEND_B\n"
|
||||||
" Color.rgb = D;\n"
|
" Color.rgb = D;\n"
|
||||||
|
"#elif PS_BLEND_ACCU == 1\n"
|
||||||
|
" // The D addition will be done in the blending unit\n"
|
||||||
|
" Color.rgb = trunc(A * C);\n"
|
||||||
"#else\n"
|
"#else\n"
|
||||||
" Color.rgb = trunc((A - B) * C + D);\n"
|
" Color.rgb = trunc((A - B) * C + D);\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
|
|
Loading…
Reference in New Issue