mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: improve date performance for GL45
If there is no overlap, it is allowed to directly read from the render target. On SotC testcase with 6x scaling: 30fps -> 40fps Note: it requires GL_ARB_texture_barrier extension so be sure to have a recent driver Note2: it requires a lots of testing too Open question: in case of complex date (written alpha) Will it be faster to split the draw call into multiple call with no primitive overlap
This commit is contained in:
parent
795ae50ecd
commit
c207632e49
|
@ -54,8 +54,8 @@ namespace GLState {
|
||||||
|
|
||||||
GLuint rt = 0;
|
GLuint rt = 0;
|
||||||
GLuint ds = 0;
|
GLuint ds = 0;
|
||||||
GLuint tex_unit[2] = {0, 0};
|
GLuint tex_unit[4] = {0, 0, 0, 0};
|
||||||
GLuint64 tex_handle[2] = { 0, 0};
|
GLuint64 tex_handle[4] = { 0, 0, 0, 0};
|
||||||
bool dirty_ressources = false;
|
bool dirty_ressources = false;
|
||||||
|
|
||||||
GLuint ps = 0;
|
GLuint ps = 0;
|
||||||
|
@ -106,10 +106,10 @@ namespace GLState {
|
||||||
|
|
||||||
rt = 0;
|
rt = 0;
|
||||||
ds = 0;
|
ds = 0;
|
||||||
tex_unit[0] = 0;
|
for (int i = 0; i < 4; i++) {
|
||||||
tex_unit[1] = 0;
|
tex_unit[i] = 0;
|
||||||
tex_handle[0] = 0;
|
tex_handle[i] = 0;
|
||||||
tex_handle[1] = 0;
|
}
|
||||||
|
|
||||||
ps = 0;
|
ps = 0;
|
||||||
gs = 0;
|
gs = 0;
|
||||||
|
|
|
@ -56,8 +56,8 @@ namespace GLState {
|
||||||
|
|
||||||
extern GLuint rt; // render target
|
extern GLuint rt; // render target
|
||||||
extern GLuint ds; // Depth-Stencil
|
extern GLuint ds; // Depth-Stencil
|
||||||
extern GLuint tex_unit[2]; // shader input texture
|
extern GLuint tex_unit[4]; // shader input texture
|
||||||
extern GLuint64 tex_handle[2]; // shader input texture
|
extern GLuint64 tex_handle[4]; // shader input texture
|
||||||
|
|
||||||
extern GLuint ps;
|
extern GLuint ps;
|
||||||
extern GLuint gs;
|
extern GLuint gs;
|
||||||
|
|
|
@ -579,7 +579,6 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt)
|
||||||
#ifndef ENABLE_GLES
|
#ifndef ENABLE_GLES
|
||||||
gl_BindImageTexture(2, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
|
gl_BindImageTexture(2, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
|
||||||
#endif
|
#endif
|
||||||
gl_BindTextureUnit(3, static_cast<GSTextureOGL*>(rt)->GetID());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::RecycleDateTexture()
|
void GSDeviceOGL::RecycleDateTexture()
|
||||||
|
|
|
@ -372,7 +372,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 clr1:1;
|
uint32 clr1:1;
|
||||||
uint32 fba:1;
|
uint32 fba:1;
|
||||||
uint32 aout:1;
|
uint32 aout:1;
|
||||||
uint32 date:2;
|
uint32 date:3;
|
||||||
uint32 spritehack:1;
|
uint32 spritehack:1;
|
||||||
uint32 tcoffsethack:1;
|
uint32 tcoffsethack:1;
|
||||||
uint32 point_sampler:1;
|
uint32 point_sampler:1;
|
||||||
|
|
|
@ -151,6 +151,39 @@ void GSRendererOGL::SetupIA()
|
||||||
dev->IASetPrimitiveTopology(t);
|
dev->IASetPrimitiveTopology(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool GSRendererOGL::PrimitiveOverlap()
|
||||||
|
{
|
||||||
|
if (m_vertex.next < 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (m_vt.m_primclass != GS_SPRITE_CLASS)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Check intersection of sprite primitive only
|
||||||
|
size_t count = m_vertex.next;
|
||||||
|
GSVertex* v = &m_vertex.buff[0];
|
||||||
|
|
||||||
|
for(size_t i = 0; i < count; i += 2) {
|
||||||
|
// Very bad code
|
||||||
|
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y);
|
||||||
|
for (size_t j = i+2; j < count; j += 2) {
|
||||||
|
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y);
|
||||||
|
GSVector4i inter = vi.rintersect(vj);
|
||||||
|
if (!inter.rempty()) {
|
||||||
|
//fprintf(stderr, "Overlap found between %d and %d (draw of %d vertices)\n", i, j, count);
|
||||||
|
//vi.print();
|
||||||
|
//vj.print();
|
||||||
|
//inter.print();
|
||||||
|
//exit(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//fprintf(stderr, "Yes, code can be optimized (draw of %d vertices)\n", count);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||||
{
|
{
|
||||||
GSDrawingEnvironment& env = m_env;
|
GSDrawingEnvironment& env = m_env;
|
||||||
|
@ -160,7 +193,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
const GSVector2& rtscale = rt->GetScale();
|
const GSVector2& rtscale = rt->GetScale();
|
||||||
|
|
||||||
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||||
bool advance_DATE = false;
|
bool DATE_GL42 = false;
|
||||||
|
bool DATE_GL45 = false;
|
||||||
|
|
||||||
ASSERT(m_dev != NULL);
|
ASSERT(m_dev != NULL);
|
||||||
|
|
||||||
|
@ -208,14 +242,21 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
|
|
||||||
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||||
|
|
||||||
if (DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
if (DATE) {
|
||||||
advance_DATE = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil;
|
if (gl_TextureBarrier && !PrimitiveOverlap()) {
|
||||||
|
DATE_GL45 = true;
|
||||||
|
DATE = false;
|
||||||
|
} else if (om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
||||||
|
DATE_GL42 = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DATE
|
// DATE
|
||||||
|
|
||||||
if(DATE)
|
if (DATE_GL45) {
|
||||||
{
|
gl_TextureBarrier();
|
||||||
|
dev->PSSetShaderResource(3, rt);
|
||||||
|
} else if (DATE) {
|
||||||
// TODO: do I need to clamp the value (if yes how? rintersect with rt?)
|
// TODO: do I need to clamp the value (if yes how? rintersect with rt?)
|
||||||
GSVector4 si = GSVector4(rtscale.x, rtscale.y);
|
GSVector4 si = GSVector4(rtscale.x, rtscale.y);
|
||||||
GSVector4 o = GSVector4(-1.0f, 1.0f); // Round value
|
GSVector4 o = GSVector4(-1.0f, 1.0f); // Round value
|
||||||
|
@ -227,8 +268,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
|
|
||||||
// Must be done here to avoid any GL state pertubation (clear function...)
|
// Must be done here to avoid any GL state pertubation (clear function...)
|
||||||
// Create an r32ui image that will containt primitive ID
|
// Create an r32ui image that will containt primitive ID
|
||||||
if (advance_DATE) {
|
if (DATE_GL42) {
|
||||||
dev->InitPrimDateTexture(rt);
|
dev->InitPrimDateTexture(rt);
|
||||||
|
dev->PSSetShaderResource(3, rt);
|
||||||
} else {
|
} else {
|
||||||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||||
|
|
||||||
|
@ -349,9 +391,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
|
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
|
||||||
ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;
|
ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;
|
||||||
|
|
||||||
if(DATE)
|
if (DATE_GL45) {
|
||||||
{
|
ps_sel.date = 5 + context->TEST.DATM;
|
||||||
if (advance_DATE)
|
} else if(DATE) {
|
||||||
|
if (DATE_GL42)
|
||||||
ps_sel.date = 1 + context->TEST.DATM;
|
ps_sel.date = 1 + context->TEST.DATM;
|
||||||
else
|
else
|
||||||
om_dssel.date = 1;
|
om_dssel.date = 1;
|
||||||
|
@ -497,7 +540,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||||
dev->SetupCB(&vs_cb, &ps_cb, ps_sel.sprite ? &gs_cb : NULL);
|
dev->SetupCB(&vs_cb, &ps_cb, ps_sel.sprite ? &gs_cb : NULL);
|
||||||
|
|
||||||
if (advance_DATE) {
|
if (DATE_GL42) {
|
||||||
// Create an r32i image that will contain primitive ID
|
// Create an r32i image that will contain primitive ID
|
||||||
// Note: do it at the beginning because the clean will dirty the FBO state
|
// Note: do it at the beginning because the clean will dirty the FBO state
|
||||||
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
|
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
|
||||||
|
@ -598,7 +641,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (advance_DATE)
|
if (DATE_GL42)
|
||||||
dev->RecycleDateTexture();
|
dev->RecycleDateTexture();
|
||||||
|
|
||||||
dev->EndScene();
|
dev->EndScene();
|
||||||
|
|
|
@ -56,4 +56,6 @@ class GSRendererOGL : public GSRendererHW
|
||||||
void UpdateFBA(GSTexture* rt);
|
void UpdateFBA(GSTexture* rt);
|
||||||
|
|
||||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||||
|
|
||||||
|
bool PrimitiveOverlap();
|
||||||
};
|
};
|
||||||
|
|
|
@ -438,13 +438,13 @@ void ps_main()
|
||||||
#if !pGL_ES
|
#if !pGL_ES
|
||||||
void ps_main()
|
void ps_main()
|
||||||
{
|
{
|
||||||
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
|
#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image)
|
||||||
// DATM == 0
|
// DATM == 0
|
||||||
// Pixel with alpha equal to 1 will failed
|
// Pixel with alpha equal to 1 will failed
|
||||||
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
||||||
if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)
|
if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)
|
||||||
discard;
|
discard;
|
||||||
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
|
#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)
|
||||||
// DATM == 1
|
// DATM == 1
|
||||||
// Pixel with alpha equal to 0 will failed
|
// Pixel with alpha equal to 0 will failed
|
||||||
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
||||||
|
|
|
@ -1189,13 +1189,13 @@ static const char* tfx_fs_all_glsl =
|
||||||
"#if !pGL_ES\n"
|
"#if !pGL_ES\n"
|
||||||
"void ps_main()\n"
|
"void ps_main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
|
"#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image)\n"
|
||||||
" // DATM == 0\n"
|
" // DATM == 0\n"
|
||||||
" // Pixel with alpha equal to 1 will failed\n"
|
" // Pixel with alpha equal to 1 will failed\n"
|
||||||
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
||||||
" if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)\n"
|
" if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)\n"
|
||||||
" discard;\n"
|
" discard;\n"
|
||||||
"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
|
"#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)\n"
|
||||||
" // DATM == 1\n"
|
" // DATM == 1\n"
|
||||||
" // Pixel with alpha equal to 0 will failed\n"
|
" // Pixel with alpha equal to 0 will failed\n"
|
||||||
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
||||||
|
|
Loading…
Reference in New Issue