mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: improve date performance for GL45
If there is no overlap, it is allowed to directly read from the render target. On SotC testcase with 6x scaling: 30fps -> 40fps Note: it requires GL_ARB_texture_barrier extension so be sure to have a recent driver Note2: it requires a lots of testing too Open question: in case of complex date (written alpha) Will it be faster to split the draw call into multiple call with no primitive overlap
This commit is contained in:
parent
795ae50ecd
commit
c207632e49
|
@ -54,8 +54,8 @@ namespace GLState {
|
|||
|
||||
GLuint rt = 0;
|
||||
GLuint ds = 0;
|
||||
GLuint tex_unit[2] = {0, 0};
|
||||
GLuint64 tex_handle[2] = { 0, 0};
|
||||
GLuint tex_unit[4] = {0, 0, 0, 0};
|
||||
GLuint64 tex_handle[4] = { 0, 0, 0, 0};
|
||||
bool dirty_ressources = false;
|
||||
|
||||
GLuint ps = 0;
|
||||
|
@ -106,10 +106,10 @@ namespace GLState {
|
|||
|
||||
rt = 0;
|
||||
ds = 0;
|
||||
tex_unit[0] = 0;
|
||||
tex_unit[1] = 0;
|
||||
tex_handle[0] = 0;
|
||||
tex_handle[1] = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
tex_unit[i] = 0;
|
||||
tex_handle[i] = 0;
|
||||
}
|
||||
|
||||
ps = 0;
|
||||
gs = 0;
|
||||
|
|
|
@ -56,8 +56,8 @@ namespace GLState {
|
|||
|
||||
extern GLuint rt; // render target
|
||||
extern GLuint ds; // Depth-Stencil
|
||||
extern GLuint tex_unit[2]; // shader input texture
|
||||
extern GLuint64 tex_handle[2]; // shader input texture
|
||||
extern GLuint tex_unit[4]; // shader input texture
|
||||
extern GLuint64 tex_handle[4]; // shader input texture
|
||||
|
||||
extern GLuint ps;
|
||||
extern GLuint gs;
|
||||
|
|
|
@ -579,7 +579,6 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt)
|
|||
#ifndef ENABLE_GLES
|
||||
gl_BindImageTexture(2, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
|
||||
#endif
|
||||
gl_BindTextureUnit(3, static_cast<GSTextureOGL*>(rt)->GetID());
|
||||
}
|
||||
|
||||
void GSDeviceOGL::RecycleDateTexture()
|
||||
|
|
|
@ -372,7 +372,7 @@ class GSDeviceOGL : public GSDevice
|
|||
uint32 clr1:1;
|
||||
uint32 fba:1;
|
||||
uint32 aout:1;
|
||||
uint32 date:2;
|
||||
uint32 date:3;
|
||||
uint32 spritehack:1;
|
||||
uint32 tcoffsethack:1;
|
||||
uint32 point_sampler:1;
|
||||
|
|
|
@ -151,6 +151,39 @@ void GSRendererOGL::SetupIA()
|
|||
dev->IASetPrimitiveTopology(t);
|
||||
}
|
||||
|
||||
bool GSRendererOGL::PrimitiveOverlap()
|
||||
{
|
||||
if (m_vertex.next < 4)
|
||||
return false;
|
||||
|
||||
if (m_vt.m_primclass != GS_SPRITE_CLASS)
|
||||
return true;
|
||||
|
||||
// Check intersection of sprite primitive only
|
||||
size_t count = m_vertex.next;
|
||||
GSVertex* v = &m_vertex.buff[0];
|
||||
|
||||
for(size_t i = 0; i < count; i += 2) {
|
||||
// Very bad code
|
||||
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i+1].XYZ.X, v[i+1].XYZ.Y);
|
||||
for (size_t j = i+2; j < count; j += 2) {
|
||||
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j+1].XYZ.X, v[j+1].XYZ.Y);
|
||||
GSVector4i inter = vi.rintersect(vj);
|
||||
if (!inter.rempty()) {
|
||||
//fprintf(stderr, "Overlap found between %d and %d (draw of %d vertices)\n", i, j, count);
|
||||
//vi.print();
|
||||
//vj.print();
|
||||
//inter.print();
|
||||
//exit(0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//fprintf(stderr, "Yes, code can be optimized (draw of %d vertices)\n", count);
|
||||
return false;
|
||||
}
|
||||
|
||||
void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
|
@ -160,7 +193,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
const GSVector2& rtscale = rt->GetScale();
|
||||
|
||||
bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
bool advance_DATE = false;
|
||||
bool DATE_GL42 = false;
|
||||
bool DATE_GL45 = false;
|
||||
|
||||
ASSERT(m_dev != NULL);
|
||||
|
||||
|
@ -208,14 +242,21 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||
|
||||
if (DATE && om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
||||
advance_DATE = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil;
|
||||
if (DATE) {
|
||||
if (gl_TextureBarrier && !PrimitiveOverlap()) {
|
||||
DATE_GL45 = true;
|
||||
DATE = false;
|
||||
} else if (om_bsel.wa && (!context->TEST.ATE || context->TEST.ATST == ATST_ALWAYS)) {
|
||||
DATE_GL42 = GLLoader::found_GL_ARB_shader_image_load_store && !UserHacks_AlphaStencil;
|
||||
}
|
||||
}
|
||||
|
||||
// DATE
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if (DATE_GL45) {
|
||||
gl_TextureBarrier();
|
||||
dev->PSSetShaderResource(3, rt);
|
||||
} else if (DATE) {
|
||||
// TODO: do I need to clamp the value (if yes how? rintersect with rt?)
|
||||
GSVector4 si = GSVector4(rtscale.x, rtscale.y);
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f); // Round value
|
||||
|
@ -227,8 +268,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
// Must be done here to avoid any GL state pertubation (clear function...)
|
||||
// Create an r32ui image that will containt primitive ID
|
||||
if (advance_DATE) {
|
||||
if (DATE_GL42) {
|
||||
dev->InitPrimDateTexture(rt);
|
||||
dev->PSSetShaderResource(3, rt);
|
||||
} else {
|
||||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||
|
||||
|
@ -349,9 +391,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
// GS_SPRITE_CLASS are already flat (either by CPU or the GS)
|
||||
ps_sel.iip = (m_vt.m_primclass == GS_SPRITE_CLASS) ? 1 : PRIM->IIP;
|
||||
|
||||
if(DATE)
|
||||
{
|
||||
if (advance_DATE)
|
||||
if (DATE_GL45) {
|
||||
ps_sel.date = 5 + context->TEST.DATM;
|
||||
} else if(DATE) {
|
||||
if (DATE_GL42)
|
||||
ps_sel.date = 1 + context->TEST.DATM;
|
||||
else
|
||||
om_dssel.date = 1;
|
||||
|
@ -497,7 +540,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupCB(&vs_cb, &ps_cb, ps_sel.sprite ? &gs_cb : NULL);
|
||||
|
||||
if (advance_DATE) {
|
||||
if (DATE_GL42) {
|
||||
// Create an r32i image that will contain primitive ID
|
||||
// Note: do it at the beginning because the clean will dirty the FBO state
|
||||
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
|
||||
|
@ -598,7 +641,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
}
|
||||
}
|
||||
}
|
||||
if (advance_DATE)
|
||||
if (DATE_GL42)
|
||||
dev->RecycleDateTexture();
|
||||
|
||||
dev->EndScene();
|
||||
|
|
|
@ -56,4 +56,6 @@ class GSRendererOGL : public GSRendererHW
|
|||
void UpdateFBA(GSTexture* rt);
|
||||
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
|
||||
bool PrimitiveOverlap();
|
||||
};
|
||||
|
|
|
@ -438,13 +438,13 @@ void ps_main()
|
|||
#if !pGL_ES
|
||||
void ps_main()
|
||||
{
|
||||
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
|
||||
#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image)
|
||||
// DATM == 0
|
||||
// Pixel with alpha equal to 1 will failed
|
||||
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
||||
if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)
|
||||
discard;
|
||||
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
|
||||
#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)
|
||||
// DATM == 1
|
||||
// Pixel with alpha equal to 0 will failed
|
||||
float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;
|
||||
|
|
|
@ -1189,13 +1189,13 @@ static const char* tfx_fs_all_glsl =
|
|||
"#if !pGL_ES\n"
|
||||
"void ps_main()\n"
|
||||
"{\n"
|
||||
"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
|
||||
"#if (PS_DATE & 3) == 1 && !defined(DISABLE_GL42_image)\n"
|
||||
" // DATM == 0\n"
|
||||
" // Pixel with alpha equal to 1 will failed\n"
|
||||
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
||||
" if ((127.5f / 255.0f) < rt_a) // < 0x80 pass (== 0x80 should not pass)\n"
|
||||
" discard;\n"
|
||||
"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
|
||||
"#elif (PS_DATE & 3) == 2 && !defined(DISABLE_GL42_image)\n"
|
||||
" // DATM == 1\n"
|
||||
" // Pixel with alpha equal to 0 will failed\n"
|
||||
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
|
||||
|
|
Loading…
Reference in New Issue