mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: add support of partial frame buffer masking
It might help to fix a bit the color on a couple of games accurate_fbmask = 1 Code uses GL4.5 extensions. So far it seems the effect is ony used a couple of time and often in non-overlapping primitive. Speed impact will likely remain small
This commit is contained in:
parent
87f54ae0ff
commit
839003467e
|
@ -649,6 +649,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
|||
+ format("#define PS_IIP %d\n", sel.iip)
|
||||
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
||||
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
||||
+ format("#define PS_FBMASK %d\n", sel.fbmask)
|
||||
;
|
||||
|
||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||
|
|
|
@ -254,6 +254,7 @@ class GSDeviceOGL : public GSDevice
|
|||
GSVector4 WH;
|
||||
GSVector4 MinF_TA;
|
||||
GSVector4i MskFix;
|
||||
GSVector4i FbMask;
|
||||
GSVector4 AlphaCoeff;
|
||||
|
||||
GSVector4 HalfTexel;
|
||||
|
@ -263,13 +264,14 @@ class GSDeviceOGL : public GSDevice
|
|||
PSConstantBuffer()
|
||||
{
|
||||
FogColor_AREF = GSVector4::zero();
|
||||
HalfTexel = GSVector4::zero();
|
||||
WH = GSVector4::zero();
|
||||
MinMax = GSVector4::zero();
|
||||
MinF_TA = GSVector4::zero();
|
||||
MskFix = GSVector4i::zero();
|
||||
AlphaCoeff = GSVector4::zero();
|
||||
HalfTexel = GSVector4::zero();
|
||||
WH = GSVector4::zero();
|
||||
MinMax = GSVector4::zero();
|
||||
MinF_TA = GSVector4::zero();
|
||||
MskFix = GSVector4i::zero();
|
||||
AlphaCoeff = GSVector4::zero();
|
||||
TC_OffsetHack = GSVector4::zero();
|
||||
FbMask = GSVector4i::zero();
|
||||
}
|
||||
|
||||
__forceinline bool Update(const PSConstantBuffer* cb)
|
||||
|
@ -279,7 +281,7 @@ class GSDeviceOGL : public GSDevice
|
|||
|
||||
// if WH matches both HalfTexel and TC_OffsetHack do too
|
||||
// MinMax depends on WH and MskFix so no need to check it too
|
||||
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])).alltrue())
|
||||
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue())
|
||||
{
|
||||
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
|
||||
a[0] = b[0];
|
||||
|
@ -287,6 +289,7 @@ class GSDeviceOGL : public GSDevice
|
|||
a[2] = b[2];
|
||||
a[3] = b[3];
|
||||
a[4] = b[4];
|
||||
a[5] = b[5];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -330,8 +333,9 @@ class GSDeviceOGL : public GSDevice
|
|||
// Word 2
|
||||
uint32 blend:8;
|
||||
uint32 dfmt:2;
|
||||
uint32 fbmask:1;
|
||||
|
||||
uint32 _free2:22;
|
||||
uint32 _free2:21;
|
||||
};
|
||||
|
||||
uint64 key;
|
||||
|
|
|
@ -295,6 +295,7 @@ void populate_hw_table(GtkWidget* hw_table)
|
|||
GtkWidget* acc_blend_check = CreateCheckBox("Accurate Blend", "accurate_blend", true);
|
||||
GtkWidget* acc_date_check = CreateCheckBox("Accurate Date", "accurate_date", false);
|
||||
GtkWidget* acc_cclip_check = CreateCheckBox("Accurate Color Clipping", "accurate_colclip", false);
|
||||
GtkWidget* acc_fbmsk_check = CreateCheckBox("Accurate FrameBuffer Mask", "accurate_fbmask", false);
|
||||
|
||||
GtkWidget* MT_nvidia_check = CreateCheckBox("Nvidia Multi-Thread support", "enable_nvidia_multi_thread", true);
|
||||
|
||||
|
@ -303,6 +304,7 @@ void populate_hw_table(GtkWidget* hw_table)
|
|||
gtk_widget_set_tooltip_text(acc_blend_check, dialog_message(IDC_ACCURATE_BLEND));
|
||||
gtk_widget_set_tooltip_text(acc_date_check, dialog_message(IDC_ACCURATE_DATE));
|
||||
gtk_widget_set_tooltip_text(acc_cclip_check, dialog_message(IDC_ACCURATE_COLCLIP));
|
||||
gtk_widget_set_tooltip_text(acc_fbmsk_check, dialog_message(IDC_ACCURATE_FBMASK));
|
||||
gtk_widget_set_tooltip_text(MT_nvidia_check, "Huge speedup on Nvidia binary driver! No effect otherwise.");
|
||||
gtk_widget_set_tooltip_text(crc_label, dialog_message(IDC_CRC_LEVEL));
|
||||
gtk_widget_set_tooltip_text(crc_combo_box, dialog_message(IDC_CRC_LEVEL));
|
||||
|
@ -310,7 +312,7 @@ void populate_hw_table(GtkWidget* hw_table)
|
|||
s_table_line = 0;
|
||||
InsertWidgetInTable(hw_table, paltex_check, MT_nvidia_check);
|
||||
InsertWidgetInTable(hw_table, acc_blend_check, acc_date_check);
|
||||
InsertWidgetInTable(hw_table, acc_cclip_check);
|
||||
InsertWidgetInTable(hw_table, acc_cclip_check, acc_fbmsk_check);
|
||||
InsertWidgetInTable(hw_table, filter_label, filter_combo_box);
|
||||
InsertWidgetInTable(hw_table, af_label, af_combo_box);
|
||||
InsertWidgetInTable(hw_table, crc_label, crc_combo_box);
|
||||
|
|
|
@ -32,6 +32,7 @@ GSRendererOGL::GSRendererOGL()
|
|||
m_accurate_blend = theApp.GetConfig("accurate_blend", 1);
|
||||
m_accurate_date = theApp.GetConfig("accurate_date", 0);
|
||||
m_accurate_colclip = theApp.GetConfig("accurate_colclip", 0);
|
||||
m_accurate_fbmask = theApp.GetConfig("accurate_fbmask", 0);
|
||||
|
||||
UserHacks_AlphaHack = theApp.GetConfig("UserHacks_AlphaHack", 0);
|
||||
UserHacks_AlphaStencil = theApp.GetConfig("UserHacks_AlphaStencil", 0);
|
||||
|
@ -293,10 +294,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
// 1/ Reduce the frame mask to a 16 bit format
|
||||
const uint32& m = context->FRAME.FBMSK;
|
||||
uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000);
|
||||
// FIXME GSVector will be nice here
|
||||
uint8 rg_mask = fbmask & 0xFF;
|
||||
uint8 ba_mask = (fbmask >> 8) & 0xFF;
|
||||
om_csel.wrgba = 0;
|
||||
|
||||
// 2 Select the new mask (Please someone put SSE here)
|
||||
if ((fbmask & 0xFF) == 0) {
|
||||
if (rg_mask != 0xFF) {
|
||||
if (write_ba) {
|
||||
GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R");
|
||||
om_csel.wb = 1;
|
||||
|
@ -304,13 +308,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R");
|
||||
om_csel.wr = 1;
|
||||
}
|
||||
} else if ((fbmask & 0xFF) != 0xFF) {
|
||||
GL_INS("ERROR: not supported RG mask:%x", fbmask & 0xFF);
|
||||
ASSERT(0);
|
||||
if (rg_mask)
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
|
||||
fbmask >>= 8;
|
||||
if ((fbmask & 0xFF) == 0) {
|
||||
if (ba_mask != 0xFF) {
|
||||
if (write_ba) {
|
||||
GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G");
|
||||
om_csel.wa = 1;
|
||||
|
@ -318,9 +320,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G");
|
||||
om_csel.wg = 1;
|
||||
}
|
||||
} else if ((fbmask & 0xFF) != 0xFF) {
|
||||
GL_INS("ERROR: not supported BA mask:%x", fbmask & 0xFF);
|
||||
ASSERT(0);
|
||||
if (ba_mask)
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
|
||||
ps_sel.fbmask &= m_accurate_fbmask;
|
||||
if (ps_sel.fbmask) {
|
||||
GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask);
|
||||
ps_cb.FbMask.r = rg_mask;
|
||||
ps_cb.FbMask.g = rg_mask;
|
||||
ps_cb.FbMask.b = ba_mask;
|
||||
ps_cb.FbMask.a = ba_mask;
|
||||
require_barrier = true;
|
||||
dev->PSSetShaderResource(3, rt);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -329,29 +341,35 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||
|
||||
{
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
// FIXME GSVector will be nice here
|
||||
uint8 r_mask = (context->FRAME.FBMSK >> 0) & 0xFF;
|
||||
uint8 g_mask = (context->FRAME.FBMSK >> 8) & 0xFF;
|
||||
uint8 b_mask = (context->FRAME.FBMSK >> 16) & 0xFF;
|
||||
uint8 a_mask = (context->FRAME.FBMSK >> 24) & 0xFF;
|
||||
uint8 bits = (GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32;
|
||||
if (r_mask != 0 && r_mask != 0xFF) {
|
||||
GL_INS("ERROR: not supported r_mask:%x on %d bits format", r_mask, bits);
|
||||
ASSERT(0);
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
if (g_mask != 0 && g_mask != 0xFF) {
|
||||
GL_INS("ERROR: not supported g_mask:%x on %d bits format", g_mask, bits);
|
||||
ASSERT(0);
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
if (b_mask != 0 && b_mask != 0xFF) {
|
||||
GL_INS("ERROR: not supported b_mask:%x on %d bits format", b_mask, bits);
|
||||
ASSERT(0);
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
if (a_mask != 0 && a_mask != 0xFF) {
|
||||
GL_INS("ERROR: not supported a_mask:%x on %d bits format", a_mask, bits);
|
||||
ASSERT(0);
|
||||
ps_sel.fbmask = 1;
|
||||
}
|
||||
|
||||
ps_sel.fbmask &= m_accurate_fbmask;
|
||||
if (ps_sel.fbmask) {
|
||||
GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", context->FRAME.FBMSK,
|
||||
(GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32);
|
||||
ps_cb.FbMask.r = r_mask;
|
||||
ps_cb.FbMask.g = g_mask;
|
||||
ps_cb.FbMask.b = b_mask;
|
||||
ps_cb.FbMask.a = a_mask;
|
||||
require_barrier = true;
|
||||
dev->PSSetShaderResource(3, rt);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -730,7 +748,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d;
|
||||
int bogus_blend = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus;
|
||||
bool all_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ) && (m_accurate_blend > 1);
|
||||
bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw;
|
||||
bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw || ps_sel.fbmask;
|
||||
|
||||
if (sw_blending && om_bsel.abe && rt) {
|
||||
GL_INS("!!! SW blending effect used (0x%x from sel %d) !!!", bogus_blend, blend_sel);
|
||||
|
|
|
@ -34,6 +34,7 @@ class GSRendererOGL : public GSRendererHW
|
|||
int m_accurate_blend;
|
||||
bool m_accurate_date;
|
||||
bool m_accurate_colclip;
|
||||
bool m_accurate_fbmask;
|
||||
|
||||
bool UserHacks_AlphaHack;
|
||||
bool UserHacks_AlphaStencil;
|
||||
|
|
|
@ -117,8 +117,11 @@ const char* dialog_message(int ID, bool* updateText) {
|
|||
return "Allow to solve the impossible blending error message.\n\n"
|
||||
"It could be slower when the effect are used.\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
|
||||
case IDC_ACCURATE_COLCLIP:
|
||||
return "Debug option to implement the wrapping of color after an overflow\n\n"
|
||||
"It will be slow when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
|
||||
return "Implement the wrapping of color after an overflow\n\n"
|
||||
"It will be slow (half speed) when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
|
||||
case IDC_ACCURATE_FBMASK:
|
||||
return "Implement partial color masking\n\n"
|
||||
"No status yet on the speed impact\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier";
|
||||
#endif
|
||||
default:
|
||||
if (updateText)
|
||||
|
|
|
@ -66,6 +66,7 @@ enum {
|
|||
IDC_ACCURATE_BLEND,
|
||||
IDC_ACCURATE_DATE,
|
||||
IDC_ACCURATE_COLCLIP,
|
||||
IDC_ACCURATE_FBMASK,
|
||||
IDC_CRC_LEVEL
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -65,11 +65,12 @@ layout(std140, binding = 21) uniform cb21
|
|||
vec2 MinF;
|
||||
vec2 TA;
|
||||
uvec4 MskFix;
|
||||
vec3 FbMask;
|
||||
uvec4 FbMask;
|
||||
vec3 _not_yet_used;
|
||||
float Af;
|
||||
vec4 HalfTexel;
|
||||
vec4 MinMax;
|
||||
vec4 TC_OffsetHack;
|
||||
vec2 TC_OffsetHack;
|
||||
};
|
||||
|
||||
#ifdef SUBROUTINE_GL40
|
||||
|
@ -393,6 +394,18 @@ vec4 ps_color()
|
|||
return c;
|
||||
}
|
||||
|
||||
void ps_fbmask(inout vec4 c)
|
||||
{
|
||||
// FIXME do I need special case for 16 bits
|
||||
#if PS_FBMASK
|
||||
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);
|
||||
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
|
||||
c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if PS_BLEND > 0
|
||||
void ps_blend(inout vec4 c, in float As)
|
||||
{
|
||||
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
|
@ -658,6 +671,7 @@ void ps_blend(inout vec4 c, in float As)
|
|||
// Don't compile => unable to find compatible overloaded function "mod(vec3)"
|
||||
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ps_main()
|
||||
{
|
||||
|
@ -770,6 +784,8 @@ void ps_main()
|
|||
ps_blend(c, alpha);
|
||||
#endif
|
||||
|
||||
ps_fbmask(c);
|
||||
|
||||
SV_Target0 = c;
|
||||
SV_Target1 = vec4(alpha, alpha, alpha, alpha);
|
||||
}
|
||||
|
|
|
@ -857,11 +857,12 @@ static const char* tfx_fs_all_glsl =
|
|||
" vec2 MinF;\n"
|
||||
" vec2 TA;\n"
|
||||
" uvec4 MskFix;\n"
|
||||
" vec3 FbMask;\n"
|
||||
" uvec4 FbMask;\n"
|
||||
" vec3 _not_yet_used;\n"
|
||||
" float Af;\n"
|
||||
" vec4 HalfTexel;\n"
|
||||
" vec4 MinMax;\n"
|
||||
" vec4 TC_OffsetHack;\n"
|
||||
" vec2 TC_OffsetHack;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"#ifdef SUBROUTINE_GL40\n"
|
||||
|
@ -1185,6 +1186,18 @@ static const char* tfx_fs_all_glsl =
|
|||
" return c;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void ps_fbmask(inout vec4 c)\n"
|
||||
"{\n"
|
||||
" // FIXME do I need special case for 16 bits\n"
|
||||
"#if PS_FBMASK\n"
|
||||
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
|
||||
" uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);\n"
|
||||
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
|
||||
" c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;\n"
|
||||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"#if PS_BLEND > 0\n"
|
||||
"void ps_blend(inout vec4 c, in float As)\n"
|
||||
"{\n"
|
||||
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
|
||||
|
@ -1450,6 +1463,7 @@ static const char* tfx_fs_all_glsl =
|
|||
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
|
||||
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
|
||||
"}\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"void ps_main()\n"
|
||||
"{\n"
|
||||
|
@ -1562,6 +1576,8 @@ static const char* tfx_fs_all_glsl =
|
|||
" ps_blend(c, alpha);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" ps_fbmask(c);\n"
|
||||
"\n"
|
||||
" SV_Target0 = c;\n"
|
||||
" SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n"
|
||||
"}\n"
|
||||
|
|
Loading…
Reference in New Issue