mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl: accelerate special case of accurate date.
Game often uses date to allow a single pixel pass. If this use case is detected, stencil buffer will be cleared after first pixels that pass both depth&stencil test. It seems to reduce the load on the GPU. Note: with the help of texture barriere, maybe we could implement the algo with a single pass.
This commit is contained in:
parent
025be70c42
commit
3ab12cef2f
|
@ -699,7 +699,10 @@ GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel)
|
|||
if (dssel.date)
|
||||
{
|
||||
dss->EnableStencil();
|
||||
dss->SetStencil(GL_EQUAL, GL_KEEP);
|
||||
if (dssel.date_one)
|
||||
dss->SetStencil(GL_EQUAL, GL_ZERO);
|
||||
else
|
||||
dss->SetStencil(GL_EQUAL, GL_KEEP);
|
||||
}
|
||||
|
||||
if(dssel.ztst != ZTST_ALWAYS || dssel.zwe)
|
||||
|
|
|
@ -336,8 +336,9 @@ class GSDeviceOGL final : public GSDevice
|
|||
uint32 ztst:2;
|
||||
uint32 zwe:1;
|
||||
uint32 date:1;
|
||||
uint32 date_one:1;
|
||||
|
||||
uint32 _free:28;
|
||||
uint32 _free:27;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
|
@ -441,7 +442,7 @@ class GSDeviceOGL final : public GSDevice
|
|||
GLuint m_vs[1<<3];
|
||||
GLuint m_gs[1<<2];
|
||||
GLuint m_ps_ss[1<<4];
|
||||
GSDepthStencilOGL* m_om_dss[1<<4];
|
||||
GSDepthStencilOGL* m_om_dss[1<<5];
|
||||
hash_map<uint64, GLuint > m_ps;
|
||||
GLuint m_apitrace;
|
||||
|
||||
|
|
|
@ -821,13 +821,28 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
DATE = false;
|
||||
} else if (m_accurate_date && om_csel.wa /* FIXME Check the msb bit of the mask instead + the dfmt*/
|
||||
&& (!m_context->TEST.ATE || m_context->TEST.ATST == ATST_ALWAYS)) {
|
||||
// texture barrier will split the draw call into n draw call. It is very efficient for
|
||||
// few primitive draws. Otherwise it sucks.
|
||||
if ((m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 50) || (m_index.tail < 100)) {
|
||||
// Performance note: check alpha range with GetAlphaMinMax()
|
||||
// Note: all my dump are already above 120fps, but it seems to reduce GPU load
|
||||
// with big upscaling
|
||||
GetAlphaMinMax();
|
||||
if (m_context->TEST.DATM && m_vt.m_alpha.max < 128) {
|
||||
// Only first pixel (write 0) will pass (alpha is 1)
|
||||
GL_PERF("Fast DATE with alpha %d-%d", m_vt.m_alpha.min, m_vt.m_alpha.max);
|
||||
om_dssel.date_one = 1;
|
||||
} else if (!m_context->TEST.DATM && m_vt.m_alpha.min >= 128) {
|
||||
// Only first pixel (write 1) will pass (alpha is 0)
|
||||
GL_PERF("Fast DATE with alpha %d-%d", m_vt.m_alpha.min, m_vt.m_alpha.max);
|
||||
om_dssel.date_one = 1;
|
||||
} else if ((m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 50) || (m_index.tail < 100)) {
|
||||
// texture barrier will split the draw call into n draw call. It is very efficient for
|
||||
// few primitive draws. Otherwise it sucks.
|
||||
GL_PERF("Slower DATE with alpha %d-%d", m_vt.m_alpha.min, m_vt.m_alpha.max);
|
||||
require_barrier = true;
|
||||
DATE_GL45 = true;
|
||||
DATE = false;
|
||||
} else {
|
||||
GL_PERF("Slow DATE with alpha %d-%d", m_vt.m_alpha.min, m_vt.m_alpha.max);
|
||||
|
||||
if (GLLoader::found_GL_ARB_shader_image_load_store) {
|
||||
DATE_GL42 = true;
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue