mirror of https://github.com/PCSX2/pcsx2.git
gsdx ogl: replace 4 VS shader variation by an AND mask
Perf will be roughly the same. However there is a single VS for all the HW emulation.
This commit is contained in:
parent
959abe64f8
commit
5d49a6b685
|
@ -417,11 +417,7 @@ void GSDeviceOGL::CreateTextureFX()
|
||||||
m_palette_ss = CreateSampler(false, false, false);
|
m_palette_ss = CreateSampler(false, false, false);
|
||||||
glBindSampler(1, m_palette_ss);
|
glBindSampler(1, m_palette_ss);
|
||||||
|
|
||||||
// Pre compile all Geometry & Vertex Shader
|
// Pre compile the (remaining) Geometry & Vertex Shader
|
||||||
// It might cost a seconds at startup but it would reduce benchmark pollution
|
|
||||||
{
|
|
||||||
GL_PUSH("Compile GS");
|
|
||||||
|
|
||||||
for (uint32 key = 0; key < countof(m_gs); key++) {
|
for (uint32 key = 0; key < countof(m_gs); key++) {
|
||||||
GSSelector sel(key);
|
GSSelector sel(key);
|
||||||
if (sel.point == sel.sprite)
|
if (sel.point == sel.sprite)
|
||||||
|
@ -429,16 +425,8 @@ void GSDeviceOGL::CreateTextureFX()
|
||||||
else
|
else
|
||||||
m_gs[key] = CompileGS(GSSelector(key));
|
m_gs[key] = CompileGS(GSSelector(key));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
{
|
m_vs[0] = CompileVS(VSSelector(0));
|
||||||
GL_PUSH("Compile VS");
|
|
||||||
|
|
||||||
for (uint32 key = 0; key < countof(m_vs); key++) {
|
|
||||||
VSSelector sel(key);
|
|
||||||
m_vs[key] = CompileVS(sel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable all bits for stencil operations. Technically 1 bit is
|
// Enable all bits for stencil operations. Technically 1 bit is
|
||||||
// enough but buffer is polluted with noise. Clear will be limited
|
// enough but buffer is polluted with noise. Clear will be limited
|
||||||
|
@ -768,10 +756,7 @@ void GSDeviceOGL::Barrier(GLbitfield b)
|
||||||
/* Note: must be here because tfx_glsl is static */
|
/* Note: must be here because tfx_glsl is static */
|
||||||
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
GLuint GSDeviceOGL::CompileVS(VSSelector sel)
|
||||||
{
|
{
|
||||||
std::string macro = format("#define VS_BPPZ %d\n", sel.bppz)
|
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, "");
|
||||||
;
|
|
||||||
|
|
||||||
return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, tfx_vgs_glsl, macro);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: must be here because tfx_glsl is static */
|
/* Note: must be here because tfx_glsl is static */
|
||||||
|
|
|
@ -120,12 +120,14 @@ public:
|
||||||
struct alignas(32) VSConstantBuffer
|
struct alignas(32) VSConstantBuffer
|
||||||
{
|
{
|
||||||
GSVector4 Vertex_Scale_Offset;
|
GSVector4 Vertex_Scale_Offset;
|
||||||
GSVector4 TextureScale;
|
GSVector2i DepthMask;
|
||||||
|
GSVector2 TextureScale;
|
||||||
|
|
||||||
VSConstantBuffer()
|
VSConstantBuffer()
|
||||||
{
|
{
|
||||||
Vertex_Scale_Offset = GSVector4::zero();
|
Vertex_Scale_Offset = GSVector4::zero();
|
||||||
TextureScale = GSVector4::zero();
|
DepthMask = GSVector2i(0, 0);
|
||||||
|
TextureScale = GSVector2(0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline bool Update(const VSConstantBuffer* cb)
|
__forceinline bool Update(const VSConstantBuffer* cb)
|
||||||
|
@ -151,9 +153,7 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 bppz:2;
|
uint32 _free:32;
|
||||||
|
|
||||||
uint32 _free:30;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
uint32 key;
|
||||||
|
@ -446,7 +446,7 @@ public:
|
||||||
GLuint ps;
|
GLuint ps;
|
||||||
} m_shadeboost;
|
} m_shadeboost;
|
||||||
|
|
||||||
GLuint m_vs[1<<2];
|
GLuint m_vs[1];
|
||||||
GLuint m_gs[1<<2];
|
GLuint m_gs[1<<2];
|
||||||
GLuint m_ps_ss[1<<4];
|
GLuint m_ps_ss[1<<4];
|
||||||
GSDepthStencilOGL* m_om_dss[1<<5];
|
GSDepthStencilOGL* m_om_dss[1<<5];
|
||||||
|
|
|
@ -982,6 +982,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
// The real GS appears to do no masking based on the Z buffer format and writing larger Z values
|
||||||
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
// than the buffer supports seems to be an error condition on the real GS, causing it to crash.
|
||||||
// We are probably receiving bad coordinates from VU1 in these cases.
|
// We are probably receiving bad coordinates from VU1 in these cases.
|
||||||
|
vs_cb.DepthMask = GSVector2i(0xFFFFFFFF, 0xFFFFFFFF);
|
||||||
|
|
||||||
if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
|
if (om_dssel.ztst >= ZTST_ALWAYS && om_dssel.zwe)
|
||||||
{
|
{
|
||||||
|
@ -994,7 +995,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
if (m_vt.m_min.p.z > 0xffffff)
|
if (m_vt.m_min.p.z > 0xffffff)
|
||||||
{
|
{
|
||||||
GL_INS("Bad Z size on 24 bits buffers")
|
GL_INS("Bad Z size on 24 bits buffers")
|
||||||
vs_sel.bppz = 1;
|
vs_cb.DepthMask = GSVector2i(0x00FFFFFF, 0x00FFFFFF);
|
||||||
om_dssel.ztst = ZTST_ALWAYS;
|
om_dssel.ztst = ZTST_ALWAYS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1008,7 +1009,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
if (m_vt.m_min.p.z > 0xffff)
|
if (m_vt.m_min.p.z > 0xffff)
|
||||||
{
|
{
|
||||||
GL_INS("Bad Z size on 16 bits buffers")
|
GL_INS("Bad Z size on 16 bits buffers")
|
||||||
vs_sel.bppz = 2;
|
vs_cb.DepthMask = GSVector2i(0x0000FFFF, 0x0000FFFF);
|
||||||
om_dssel.ztst = ZTST_ALWAYS;
|
om_dssel.ztst = ZTST_ALWAYS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,8 @@ layout(std140, binding = 20) uniform cb20
|
||||||
{
|
{
|
||||||
vec2 VertexScale;
|
vec2 VertexScale;
|
||||||
vec2 VertexOffset;
|
vec2 VertexOffset;
|
||||||
vec2 _removed_TextureScale;
|
uint DepthMask;
|
||||||
|
uint cb20_pad;
|
||||||
vec2 PointSize;
|
vec2 PointSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -35,13 +35,7 @@ void texture_coord()
|
||||||
|
|
||||||
void vs_main()
|
void vs_main()
|
||||||
{
|
{
|
||||||
highp uint z;
|
highp uint z = i_z & DepthMask;
|
||||||
if(VS_BPPZ == 1) // 24
|
|
||||||
z = i_z & uint(0xffffff);
|
|
||||||
else if(VS_BPPZ == 2) // 16
|
|
||||||
z = i_z & uint(0xffff);
|
|
||||||
else
|
|
||||||
z = i_z;
|
|
||||||
|
|
||||||
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
|
// pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)
|
||||||
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
|
// example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty
|
||||||
|
|
|
@ -93,7 +93,8 @@ static const char* const common_header_glsl =
|
||||||
"{\n"
|
"{\n"
|
||||||
" vec2 VertexScale;\n"
|
" vec2 VertexScale;\n"
|
||||||
" vec2 VertexOffset;\n"
|
" vec2 VertexOffset;\n"
|
||||||
" vec2 _removed_TextureScale;\n"
|
" uint DepthMask;\n"
|
||||||
|
" uint cb20_pad;\n"
|
||||||
" vec2 PointSize;\n"
|
" vec2 PointSize;\n"
|
||||||
"};\n"
|
"};\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -697,13 +698,7 @@ static const char* const tfx_vgs_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
"void vs_main()\n"
|
"void vs_main()\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" highp uint z;\n"
|
" highp uint z = i_z & DepthMask;\n"
|
||||||
" if(VS_BPPZ == 1) // 24\n"
|
|
||||||
" z = i_z & uint(0xffffff);\n"
|
|
||||||
" else if(VS_BPPZ == 2) // 16\n"
|
|
||||||
" z = i_z & uint(0xffff);\n"
|
|
||||||
" else\n"
|
|
||||||
" z = i_z;\n"
|
|
||||||
"\n"
|
"\n"
|
||||||
" // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
|
" // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
|
||||||
" // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"
|
" // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"
|
||||||
|
|
Loading…
Reference in New Issue