mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: replace old colclip algo with the HDR algo
Similar speed but more accurate Allow to clean the code
This commit is contained in:
parent
83f874db93
commit
93c47feb7c
|
@ -667,7 +667,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
||||||
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
||||||
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
||||||
+ format("#define PS_BLEND_ACCU %d\n", sel.blend_accu)
|
+ format("#define PS_HDR %d\n", sel.hdr)
|
||||||
;
|
;
|
||||||
|
|
||||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||||
|
@ -1551,13 +1551,13 @@ const GSDeviceOGL::D3D9Blend GSDeviceOGL::m_blendMapD3D9[3*3*3*3] =
|
||||||
{ 17 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)
|
{ 17 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_INVBLENDFACTOR} , // 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)
|
||||||
{ 18 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F
|
{ 18 , D3DBLENDOP_SUBTRACT , D3DBLEND_BLENDFACTOR , D3DBLEND_BLENDFACTOR} , // 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F
|
||||||
{ NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)
|
{ NO_BAR | A_MAX | 7 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , //*0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)
|
||||||
{ 19 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
|
{ BLEND_ACCU | 19 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
|
||||||
{ NO_BAR | 20 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As
|
{ NO_BAR | 20 , D3DBLENDOP_ADD , D3DBLEND_SRCALPHA , D3DBLEND_ZERO} , // 0202: (Cs - 0)*As + 0 ==> Cs*As
|
||||||
{ A_MAX | 8 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
|
{ A_MAX | 8 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , //*0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
|
||||||
{ 21 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
|
{ 21 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ONE} , // 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
|
||||||
{ 22 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
|
{ 22 , D3DBLENDOP_ADD , D3DBLEND_DESTALPHA , D3DBLEND_ZERO} , // 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
|
||||||
{ NO_BAR| A_MAX | 9 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
|
{ NO_BAR| A_MAX | 9 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , //*0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
|
||||||
{ 23 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
|
{ BLEND_ACCU | 23 , D3DBLENDOP_ADD , D3DBLEND_ONE , D3DBLEND_ONE} , // 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
|
||||||
{ NO_BAR | 24 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F
|
{ NO_BAR | 24 , D3DBLENDOP_ADD , D3DBLEND_BLENDFACTOR , D3DBLEND_ZERO} , // 0222: (Cs - 0)*F + 0 ==> Cs*F
|
||||||
{ 25 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
|
{ 25 , D3DBLENDOP_ADD , D3DBLEND_INVSRCALPHA , D3DBLEND_SRCALPHA} , // 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
|
||||||
{ A_MAX | 10 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
|
{ A_MAX | 10 , D3DBLENDOP_REVSUBTRACT , D3DBLEND_SRCALPHA , D3DBLEND_SRCALPHA} , //*1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
|
||||||
|
|
|
@ -33,6 +33,7 @@
|
||||||
#define A_MAX (0x100) // Impossible blending uses coeff bigger than 1
|
#define A_MAX (0x100) // Impossible blending uses coeff bigger than 1
|
||||||
#define C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor)
|
#define C_CLR (0x200) // Clear color blending (use directly the destination color as blending factor)
|
||||||
#define NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used)
|
#define NO_BAR (0x400) // don't require texture barrier for the blending (because the RT is not used)
|
||||||
|
#define BLEND_ACCU (0x800) // Allow to use a mix of SW and HW blending to keep the best of the 2 worlds
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
extern uint64 g_real_texture_upload_byte;
|
extern uint64 g_real_texture_upload_byte;
|
||||||
|
@ -350,8 +351,8 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 blend_b:2;
|
uint32 blend_b:2;
|
||||||
uint32 blend_c:2;
|
uint32 blend_c:2;
|
||||||
uint32 blend_d:2;
|
uint32 blend_d:2;
|
||||||
uint32 blend_accu:1;
|
|
||||||
uint32 dfmt:2;
|
uint32 dfmt:2;
|
||||||
|
uint32 hdr:1;
|
||||||
|
|
||||||
uint32 _free2:21;
|
uint32 _free2:21;
|
||||||
};
|
};
|
||||||
|
@ -451,9 +452,8 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 c:2;
|
uint32 c:2;
|
||||||
uint32 d:2;
|
uint32 d:2;
|
||||||
uint32 negative:1;
|
uint32 negative:1;
|
||||||
uint32 accu:1;
|
|
||||||
|
|
||||||
uint32 _free:21;
|
uint32 _free:22;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
|
|
|
@ -357,30 +357,26 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
// We really need SW blending for this one, barely used
|
// We really need SW blending for this one, barely used
|
||||||
bool impossible_blend = (blend_flag & A_MAX);
|
bool impossible_blend = (blend_flag & A_MAX);
|
||||||
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
||||||
bool accumulation_blend = (ALPHA.A == 0 && ALPHA.B == 2 && ALPHA.C != 1 && ALPHA.D == 1);
|
bool accumulation_blend = (blend_flag & BLEND_ACCU);
|
||||||
|
|
||||||
bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend || ps_sel.blend_accu);
|
bool sw_blending_base = m_sw_blending && (free_blend || impossible_blend);
|
||||||
|
|
||||||
// Color clip
|
// Color clip
|
||||||
bool acc_colclip_wrap = false;
|
|
||||||
if (m_env.COLCLAMP.CLAMP == 0) {
|
if (m_env.COLCLAMP.CLAMP == 0) {
|
||||||
acc_colclip_wrap = (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base);
|
if (accumulation_blend) {
|
||||||
if (acc_colclip_wrap) {
|
ps_sel.hdr = 1;
|
||||||
|
GL_INS("COLCLIP Fast HDR mode ENABLED");
|
||||||
|
} else if (m_sw_blending >= ACC_BLEND_CCLIP || sw_blending_base) {
|
||||||
ps_sel.colclip = 3;
|
ps_sel.colclip = 3;
|
||||||
|
sw_blending_base = true;
|
||||||
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
GL_INS("COLCLIP SW ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
||||||
} else if (!PRIM->TME && PRIM->PRIM != GS_POINTLIST) {
|
} else {
|
||||||
// Standard (inaccurate) colclip
|
fprintf(stderr, "Sorry colclip isn't supported\n");
|
||||||
ps_sel.colclip = 1;
|
|
||||||
accumulation_blend = false;
|
|
||||||
GL_INS("COLCLIP ENABLED (blending is %d/%d/%d/%d)", ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: Option is duplicated, one impact the blend unit / the other the shader.
|
// Note: Option is duplicated, one impact the blend unit / the other the shader.
|
||||||
if (accumulation_blend && m_sw_blending) {
|
sw_blending_base |= accumulation_blend;
|
||||||
om_bsel.accu = ps_sel.blend_accu = 1;
|
|
||||||
sw_blending_base = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool all_blend_sw;
|
bool all_blend_sw;
|
||||||
switch (m_sw_blending) {
|
switch (m_sw_blending) {
|
||||||
|
@ -393,7 +389,6 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
|
|
||||||
bool sw_blending = sw_blending_base // Free case or Impossible blend
|
bool sw_blending = sw_blending_base // Free case or Impossible blend
|
||||||
|| all_blend_sw // all blend
|
|| all_blend_sw // all blend
|
||||||
|| acc_colclip_wrap // accurate colclip
|
|
||||||
|| ps_sel.fbmask; // accurate fbmask
|
|| ps_sel.fbmask; // accurate fbmask
|
||||||
|
|
||||||
|
|
||||||
|
@ -402,6 +397,9 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
|
// that write the bad alpha value. Sw blending will force the draw to run primitive by primitive
|
||||||
// (therefore primitiveID will be constant to 1)
|
// (therefore primitiveID will be constant to 1)
|
||||||
sw_blending &= !DATE_GL42;
|
sw_blending &= !DATE_GL42;
|
||||||
|
// Seriously don't expect me to support this kind of crazyness.
|
||||||
|
// No mix of COLCLIP + accumulation_blend + DATE GL42
|
||||||
|
ASSERT(!(ps_sel.hdr && DATE_GL42));
|
||||||
|
|
||||||
// For stat to optimize accurate option
|
// For stat to optimize accurate option
|
||||||
#if 0
|
#if 0
|
||||||
|
@ -409,20 +407,28 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL
|
||||||
om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
|
om_bsel.a, om_bsel.b, om_bsel.c, om_bsel.d, m_env.COLCLAMP.CLAMP, m_vt.m_primclass, m_vertex.next, sw_blending);
|
||||||
#endif
|
#endif
|
||||||
if (sw_blending) {
|
if (sw_blending) {
|
||||||
// Disable HW blending except in accu mode
|
|
||||||
om_bsel.abe = ps_sel.blend_accu;
|
|
||||||
ps_sel.blend_a = om_bsel.a;
|
ps_sel.blend_a = om_bsel.a;
|
||||||
ps_sel.blend_b = om_bsel.b;
|
ps_sel.blend_b = om_bsel.b;
|
||||||
ps_sel.blend_c = om_bsel.c;
|
ps_sel.blend_c = om_bsel.c;
|
||||||
ps_sel.blend_d = om_bsel.d;
|
ps_sel.blend_d = om_bsel.d;
|
||||||
|
|
||||||
|
if (accumulation_blend) {
|
||||||
|
// Keep HW blending to do the addition
|
||||||
|
om_bsel.abe = 1;
|
||||||
|
// Remove the addition from the SW blending
|
||||||
|
ps_sel.blend_d = 2;
|
||||||
|
} else {
|
||||||
|
// Disable HW blending
|
||||||
|
om_bsel.abe = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Require the fix alpha vlaue
|
// Require the fix alpha vlaue
|
||||||
if (ALPHA.C == 2) {
|
if (ALPHA.C == 2) {
|
||||||
ps_cb.AlphaCoeff.a = afix;
|
ps_cb.AlphaCoeff.a = afix;
|
||||||
}
|
}
|
||||||
|
|
||||||
// No need to flush for every primitive
|
// No need to flush for every primitive
|
||||||
require_barrier |= !(blend_flag & NO_BAR) && !ps_sel.blend_accu;
|
require_barrier |= !(blend_flag & NO_BAR) && !accumulation_blend;
|
||||||
} else {
|
} else {
|
||||||
ps_sel.clr1 = om_bsel.IsCLR1();
|
ps_sel.clr1 = om_bsel.IsCLR1();
|
||||||
if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
|
if (ps_sel.dfmt == 1 && ALPHA.C == 1) {
|
||||||
|
@ -924,7 +930,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
GL_POP();
|
GL_POP();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_env.COLCLAMP.CLAMP == 0 && om_bsel.accu) {
|
if (ps_sel.hdr) {
|
||||||
hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F);
|
hdr_rt = dev->CreateTexture(rtsize.x, rtsize.y, GL_RGBA16F);
|
||||||
|
|
||||||
dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false);
|
dev->CopyRectConv(rt, hdr_rt, ComputeBoundingBox(rtscale, rtsize), false);
|
||||||
|
|
|
@ -105,10 +105,7 @@ GSBlendStateOGL* GSDeviceOGL::CreateBlend(OMBlendSelector bsel, float afix)
|
||||||
{
|
{
|
||||||
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
int i = ((bsel.a * 3 + bsel.b) * 3 + bsel.c) * 3 + bsel.d;
|
||||||
|
|
||||||
if (bsel.accu)
|
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
||||||
bs->SetRGB(GL_FUNC_ADD, GL_ONE, GL_ONE);
|
|
||||||
else
|
|
||||||
bs->SetRGB(m_blendMapD3D9[i].op, m_blendMapD3D9[i].src, m_blendMapD3D9[i].dst);
|
|
||||||
|
|
||||||
if (m_blendMapD3D9[i].bogus & A_MAX) {
|
if (m_blendMapD3D9[i].bogus & A_MAX) {
|
||||||
if (!theApp.GetConfig("accurate_blending_unit", 1)) {
|
if (!theApp.GetConfig("accurate_blending_unit", 1)) {
|
||||||
|
|
|
@ -453,9 +453,6 @@ void ps_blend(inout vec4 Color, float As)
|
||||||
|
|
||||||
#if PS_BLEND_A == PS_BLEND_B
|
#if PS_BLEND_A == PS_BLEND_B
|
||||||
Color.rgb = D;
|
Color.rgb = D;
|
||||||
#elif PS_BLEND_ACCU == 1
|
|
||||||
// The D addition will be done in the blending unit
|
|
||||||
Color.rgb = trunc(A * C);
|
|
||||||
#else
|
#else
|
||||||
Color.rgb = trunc((A - B) * C + D);
|
Color.rgb = trunc((A - B) * C + D);
|
||||||
#endif
|
#endif
|
||||||
|
@ -612,8 +609,8 @@ void ps_main()
|
||||||
|
|
||||||
ps_fbmask(C);
|
ps_fbmask(C);
|
||||||
|
|
||||||
#if PS_BLEND_ACCU && PS_COLCLIP
|
#if PS_HDR == 1
|
||||||
// Use negative value to avoid overflow of the texture
|
// Use negative value to avoid overflow of the texture (in accumulation mode)
|
||||||
if (any(greaterThan(C.rgb, vec3(128.0f)))) {
|
if (any(greaterThan(C.rgb, vec3(128.0f)))) {
|
||||||
C.rgb = (C.rgb - 256.0f);
|
C.rgb = (C.rgb - 256.0f);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1312,9 +1312,6 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
"#if PS_BLEND_A == PS_BLEND_B\n"
|
"#if PS_BLEND_A == PS_BLEND_B\n"
|
||||||
" Color.rgb = D;\n"
|
" Color.rgb = D;\n"
|
||||||
"#elif PS_BLEND_ACCU == 1\n"
|
|
||||||
" // The D addition will be done in the blending unit\n"
|
|
||||||
" Color.rgb = trunc(A * C);\n"
|
|
||||||
"#else\n"
|
"#else\n"
|
||||||
" Color.rgb = trunc((A - B) * C + D);\n"
|
" Color.rgb = trunc((A - B) * C + D);\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
|
@ -1471,8 +1468,8 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
" ps_fbmask(C);\n"
|
" ps_fbmask(C);\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#if PS_BLEND_ACCU && PS_COLCLIP\n"
|
"#if PS_HDR == 1\n"
|
||||||
" // Use negative value to avoid overflow of the texture\n"
|
" // Use negative value to avoid overflow of the texture (in accumulation mode)\n"
|
||||||
" if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n"
|
" if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n"
|
||||||
" C.rgb = (C.rgb - 256.0f);\n"
|
" C.rgb = (C.rgb - 256.0f);\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
|
|
Loading…
Reference in New Issue