mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl: replace 4 VS shader variation by an AND mask
Perf will be roughly the same. However there is a single VS for all the HW emulation.
This commit is contained in:
parent
959abe64f8
commit
5d49a6b685
|
@ -417,28 +417,16 @@ void GSDeviceOGL::CreateTextureFX()
|
|||
m_palette_ss = CreateSampler(false, false, false);
|
||||
glBindSampler(1, m_palette_ss);
|
||||
|
||||
// Pre compile all Geometry & Vertex Shader
|
||||
// It might cost a seconds at startup but it would reduce benchmark pollution
|
||||
{
|
||||
GL_PUSH("Compile GS");
|
||||
|
||||
for (uint32 key = 0; key < countof(m_gs); key++) {
|
||||
GSSelector sel(key);
|
||||
if (sel.point == sel.sprite)
|
||||
m_gs[key] = 0;
|
||||
else
|
||||
m_gs[key] = CompileGS(GSSelector(key));
|
||||
}
|
||||
// Pre compile the (remaining) Geometry & Vertex Shader
|
||||
for (uint32 key = 0; key < countof(m_gs); key++) {
|
||||
GSSelector sel(key);
|
||||
if (sel.point == sel.sprite)
|
||||
m_gs[key] = 0;
|
||||
else
|
||||
m_gs[key] = CompileGS(GSSelector(key));
|
||||
}
|
||||
|
||||
{
|
||||
GL_PUSH("Compile VS");
|
||||
|
||||
for (uint32 key = 0; key < countof(m_vs); key++) {
|
||||
VSSelector sel(key);
|
||||
m_vs[key] = CompileVS(sel);
|
||||
}
|
||||
}
|
||||
m_vs[0] = CompileVS(VSSelector(0));
|
||||
|
||||
// Enable all bits for stencil operations. Technically 1 bit is
|
||||
// enough but buffer is polluted with noise. Clear will be limited
|
||||
|
@ -768,10 +756,7 @@ void GSDeviceOGL::Barrier(GLbitfield b)
|
|||
/* Note: must be here because tfx_glsl is static */
|
||||
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
||||
{
|
||||
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
|
||||
;
|
||||
|
||||
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, macro);
|
||||
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, "");
|
||||
}
|
||||
|
||||
/* Note: must be here because tfx_glsl is static */
|
||||
|
|
|
@ -120,12 +120,14 @@ public:
|
|||
struct alignas(32) VSConstantBuffer
|
||||
{
|
||||
GSVector4 Vertex_Scale_Offset;
|
||||
GSVector4 TextureScale;
|
||||
GSVector2i DepthMask;
|
||||
GSVector2 TextureScale;
|
||||
|
||||
VSConstantBuffer()
|
||||
{
|
||||
Vertex_Scale_Offset = GSVector4::zero();
|
||||
TextureScale = GSVector4::zero();
|
||||
DepthMask = GSVector2i(0, 0);
|
||||
TextureScale = GSVector2(0, 0);
|
||||
}
|
||||
|
||||
__forceinline bool Update(const VSConstantBuffer* cb)
|
||||
|
@ -151,9 +153,7 @@ public:
|
|||
{
|
||||
struct
|
||||
{
|
||||
uint32 bppz:2;
|
||||
|
||||
uint32 _free:30;
|
||||
uint32 _free:32;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
|
@ -446,7 +446,7 @@ public:
|
|||
GLuint ps;
|
||||
} m_shadeboost;
|
||||
|
||||
GLuint m_vs[1<<2];
|
||||
GLuint m_vs[1];
|
||||
GLuint m_gs[1<<2];
|
||||
GLuint m_ps_ss[1<<4];
|
||||
GSDepthStencilOGL* m_om_dss[1<<5];
|
||||
|
|
|
@ -982,6 +982,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
||||
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
||||
// We are probably receiving bad coordinates from VU1 in these cases.
|
||||
vs_cb.DepthMask = GSVector2i(0xFFFFFFFF, 0xFFFFFFFF);
|
||||
|
||||
if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
|
||||
{
|
||||
|
@ -994,7 +995,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
if (m_vt.m_min.p.z > 0xffffff)
|
||||
{
|
||||
GL_INS("Bad Z size on 24 bits buffers")
|
||||
vs_sel.bppz = 1;
|
||||
vs_cb.DepthMask = GSVector2i(0x00FFFFFF, 0x00FFFFFF);
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
|
@ -1008,7 +1009,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
if (m_vt.m_min.p.z > 0xffff)
|
||||
{
|
||||
GL_INS("Bad Z size on 16 bits buffers")
|
||||
vs_sel.bppz = 2;
|
||||
vs_cb.DepthMask = GSVector2i(0x0000FFFF, 0x0000FFFF);
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,8 @@ layout(std140, binding = 20) uniform cb20
|
|||
{
|
||||
vec2 VertexScale;
|
||||
vec2 VertexOffset;
|
||||
vec2 _removed_TextureScale;
|
||||
uint DepthMask;
|
||||
uint cb20_pad;
|
||||
vec2 PointSize;
|
||||
};
|
||||
|
||||
|
|
|
@ -35,13 +35,7 @@ void texture_coord()
|
|||
|
||||
void vs_main()
|
||||
{
|
||||
highp uint z;
|
||||
if(VS_BPPZ == 1) // 24
|
||||
z = i_z & uint(0xffffff);
|
||||
else if(VS_BPPZ == 2) // 16
|
||||
z = i_z & uint(0xffff);
|
||||
else
|
||||
z = i_z;
|
||||
highp uint z = i_z & DepthMask;
|
||||
|
||||
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
|
||||
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
|
||||
|
|
|
@ -93,7 +93,8 @@ static const char* const common_header_glsl =
|
|||
"{\n"
|
||||
" vec2 VertexScale;\n"
|
||||
" vec2 VertexOffset;\n"
|
||||
" vec2 _removed_TextureScale;\n"
|
||||
" uint DepthMask;\n"
|
||||
" uint cb20_pad;\n"
|
||||
" vec2 PointSize;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
|
@ -697,13 +698,7 @@ static const char* const tfx_vgs_glsl =
|
|||
"\n"
|
||||
"void vs_main()\n"
|
||||
"{\n"
|
||||
" highp uint z;\n"
|
||||
" if(VS_BPPZ == 1) // 24\n"
|
||||
" z = i_z & uint(0xffffff);\n"
|
||||
" else if(VS_BPPZ == 2) // 16\n"
|
||||
" z = i_z & uint(0xffff);\n"
|
||||
" else\n"
|
||||
" z = i_z;\n"
|
||||
" highp uint z = i_z & DepthMask;\n"
|
||||
"\n"
|
||||
" // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
|
||||
" // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"
|
||||
|
|
Loading…
Reference in New Issue