GS-ogl: Optimize fragment shader uniform buffer.

Couple MaxDepthPS with TA and Af vector.

Vectors are free so let's use them.

Optimization.
This commit is contained in:
lightningterror 2021-12-09 15:05:16 +01:00
parent 64455620e8
commit 6c3e357d9e
3 changed files with 20 additions and 28 deletions

View File

@ -215,14 +215,13 @@ public:
{
GSVector4 FogColor_AREF;
GSVector4 WH;
GSVector4 TA_Af;
GSVector4 TA_MaxDepth_Af;
GSVector4i MskFix;
GSVector4i FbMask;
GSVector4 HalfTexel;
GSVector4 MinMax;
GSVector4 TC_OH;
GSVector4 MaxDepth;
GSVector4 DitherMatrix[4];
@ -231,12 +230,11 @@ public:
FogColor_AREF = GSVector4::zero();
HalfTexel = GSVector4::zero();
WH = GSVector4::zero();
TA_Af = GSVector4::zero();
TA_MaxDepth_Af = GSVector4::zero();
MinMax = GSVector4::zero();
MskFix = GSVector4i::zero();
TC_OH = GSVector4::zero();
FbMask = GSVector4i::zero();
MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero();
@ -251,7 +249,7 @@ public:
// if WH matches both HalfTexel and TC_OH_TS do too
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[6] == b[6])
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12])).alltrue())
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11])).alltrue())
{
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0];
@ -263,11 +261,9 @@ public:
a[6] = b[6];
a[8] = b[8];
a[9] = b[9];
a[10] = b[10];
a[11] = b[11];
a[12] = b[12];
return true;
}

View File

@ -135,7 +135,7 @@ void GSRendererOGL::EmulateZbuffer()
const bool clamp_z = (u32)(GSVector4i(m_vt.m_max.p).z) > max_z;
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
//ps_cb.TA_MaxDepth_Af.z = 1.0f;
m_ps_sel.zclamp = 0;
if (clamp_z)
@ -146,7 +146,7 @@ void GSRendererOGL::EmulateZbuffer()
}
else if (!m_context->ZBUF.ZMSK)
{
ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, max_z * ldexpf(1, -32));
ps_cb.TA_MaxDepth_Af.z = max_z * ldexpf(1, -32);
m_ps_sel.zclamp = 1;
}
}
@ -665,7 +665,7 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
// Require the fix alpha vlaue
if (ALPHA.C == 2)
{
ps_cb.TA_Af.a = (float)ALPHA.FIX / 128.0f;
ps_cb.TA_MaxDepth_Af.a = (float)ALPHA.FIX / 128.0f;
}
}
else
@ -753,8 +753,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
// FIXME rely on compiler for the optimization
ps_cb.TA_Af.x = ta.x;
ps_cb.TA_Af.y = ta.y;
ps_cb.TA_MaxDepth_Af.x = ta.x;
ps_cb.TA_MaxDepth_Af.y = ta.y;
// The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea.
bilinear &= m_vt.IsLinear();
@ -778,8 +778,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
// FIXME rely on compiler for the optimization
ps_cb.TA_Af.x = ta.x;
ps_cb.TA_Af.y = ta.y;
ps_cb.TA_MaxDepth_Af.x = ta.x;
ps_cb.TA_MaxDepth_Af.y = ta.y;
}
// Select the index format

View File

@ -52,8 +52,7 @@ layout(std140, binding = 15) uniform cb15
int EMODA;
int EMODC;
int _pad0;
int _pad1;
ivec2 pad_cb15;
};
#endif
@ -81,7 +80,7 @@ layout(std140, binding = 21) uniform cb21
vec4 WH;
vec2 TA;
float pad0_cb21;
float MaxDepthPS;
float Af;
uvec4 MskFix;
@ -92,12 +91,9 @@ layout(std140, binding = 21) uniform cb21
vec4 MinMax;
vec2 pad1_cb21;
vec2 pad_cb21;
vec2 TC_OffsetHack;
vec3 pad2_cb21;
float MaxDepthPS;
mat4 DitherMatrix;
};
#endif