gsdx ogl: replace 4 VS shader variation by an AND mask

Perf will be roughly the same. However there is a single VS for all
the HW emulation.
This commit is contained in:
Gregory Hainaut 2016-06-01 09:14:06 +02:00
parent 959abe64f8
commit 5d49a6b685
6 changed files with 24 additions and 48 deletions

View File

@ -417,28 +417,16 @@ void GSDeviceOGL::CreateTextureFX()
m_palette_ss = CreateSampler(false, false, false);
glBindSampler(1, m_palette_ss);
// Pre compile all Geometry & Vertex Shader
// It might cost a seconds at startup but it would reduce benchmark pollution
{
GL_PUSH("Compile GS");
for (uint32 key = 0; key < countof(m_gs); key++) {
GSSelector sel(key);
if (sel.point == sel.sprite)
m_gs[key] = 0;
else
m_gs[key] = CompileGS(GSSelector(key));
}
// Pre compile the (remaining) Geometry & Vertex Shader
for (uint32 key = 0; key < countof(m_gs); key++) {
GSSelector sel(key);
if (sel.point == sel.sprite)
m_gs[key] = 0;
else
m_gs[key] = CompileGS(GSSelector(key));
}
{
GL_PUSH("Compile VS");
for (uint32 key = 0; key < countof(m_vs); key++) {
VSSelector sel(key);
m_vs[key] = CompileVS(sel);
}
}
m_vs[0] = CompileVS(VSSelector(0));
// Enable all bits for stencil operations. Technically 1 bit is
// enough but buffer is polluted with noise. Clear will be limited
@ -768,10 +756,7 @@ void GSDeviceOGL::Barrier(GLbitfield b)
/* Note: must be here because tfx_glsl is static */
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
{
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
;
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, macro);
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, "");
}
/* Note: must be here because tfx_glsl is static */

View File

@ -120,12 +120,14 @@ public:
struct alignas(32) VSConstantBuffer
{
GSVector4 Vertex_Scale_Offset;
GSVector4 TextureScale;
GSVector2i DepthMask;
GSVector2 TextureScale;
VSConstantBuffer()
{
Vertex_Scale_Offset = GSVector4::zero();
TextureScale = GSVector4::zero();
DepthMask = GSVector2i(0, 0);
TextureScale = GSVector2(0, 0);
}
__forceinline bool Update(const VSConstantBuffer* cb)
@ -151,9 +153,7 @@ public:
{
struct
{
uint32 bppz:2;
uint32 _free:30;
uint32 _free:32;
};
uint32 key;
@ -446,7 +446,7 @@ public:
GLuint ps;
} m_shadeboost;
GLuint m_vs[1<<2];
GLuint m_vs[1];
GLuint m_gs[1<<2];
GLuint m_ps_ss[1<<4];
GSDepthStencilOGL* m_om_dss[1<<5];

View File

@ -982,6 +982,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
// We are probably receiving bad coordinates from VU1 in these cases.
vs_cb.DepthMask = GSVector2i(0xFFFFFFFF, 0xFFFFFFFF);
if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
{
@ -994,7 +995,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (m_vt.m_min.p.z > 0xffffff)
{
GL_INS("Bad Z size on 24 bits buffers")
vs_sel.bppz = 1;
vs_cb.DepthMask = GSVector2i(0x00FFFFFF, 0x00FFFFFF);
om_dssel.ztst = ZTST_ALWAYS;
}
}
@ -1008,7 +1009,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (m_vt.m_min.p.z > 0xffff)
{
GL_INS("Bad Z size on 16 bits buffers")
vs_sel.bppz = 2;
vs_cb.DepthMask = GSVector2i(0x0000FFFF, 0x0000FFFF);
om_dssel.ztst = ZTST_ALWAYS;
}
}

View File

@ -68,7 +68,8 @@ layout(std140, binding = 20) uniform cb20
{
vec2 VertexScale;
vec2 VertexOffset;
vec2 _removed_TextureScale;
uint DepthMask;
uint cb20_pad;
vec2 PointSize;
};

View File

@ -35,13 +35,7 @@ void texture_coord()
void vs_main()
{
highp uint z;
if(VS_BPPZ == 1) // 24
z = i_z & uint(0xffffff);
else if(VS_BPPZ == 2) // 16
z = i_z & uint(0xffff);
else
z = i_z;
highp uint z = i_z & DepthMask;
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty

View File

@ -93,7 +93,8 @@ static const char* const common_header_glsl =
"{\n"
" vec2 VertexScale;\n"
" vec2 VertexOffset;\n"
" vec2 _removed_TextureScale;\n"
" uint DepthMask;\n"
" uint cb20_pad;\n"
" vec2 PointSize;\n"
"};\n"
"\n"
@ -697,13 +698,7 @@ static const char* const tfx_vgs_glsl =
"\n"
"void vs_main()\n"
"{\n"
" highp uint z;\n"
" if(VS_BPPZ == 1) // 24\n"
" z = i_z & uint(0xffffff);\n"
" else if(VS_BPPZ == 2) // 16\n"
" z = i_z & uint(0xffff);\n"
" else\n"
" z = i_z;\n"
" highp uint z = i_z & DepthMask;\n"
"\n"
" // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
" // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"