gsdx-hw: Add zclamping to ps/fs.

Add zclamping to ps/fs, enable vs, ps/fs clamp when needed with a macro.
This commit is contained in:
KrossX 2020-06-06 17:21:03 +02:00 committed by lightningterror
parent 5d0eefeebd
commit b5625ad8b0
9 changed files with 60 additions and 11 deletions

View File

@ -105,7 +105,7 @@ public:
GSVector4i FbMask;
GSVector4 TC_OffsetHack;
GSVector4 Af;
GSVector4 Af_MaxDepth;
GSVector4 DitherMatrix[4];
PSConstantBuffer()
@ -118,7 +118,7 @@ public:
MskFix = GSVector4i::zero();
ChannelShuffle = GSVector4i::zero();
FbMask = GSVector4i::zero();
Af = GSVector4::zero();
Af_MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero();
@ -241,6 +241,9 @@ public:
// Dithering
uint32 dither:2;
// Depth clamp
uint32 zclamp:1;
// Hack
uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1;
@ -248,7 +251,7 @@ public:
uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd
uint32 _free:16;
uint32 _free:15;
};
uint64 key;

View File

@ -163,13 +163,26 @@ void GSRendererDX11::EmulateZbuffer()
// Clamping is done after rasterization.
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
ps_cb.Af_MaxDepth.y = 1.0f;
m_ps_sel.zclamp = 0;
if (clamp_z)
{
// FIXME: Do z clamping for sprites on vs, triangles on ps.
vs_cb.MaxDepth = GSVector2i(max_z);
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS)
{
vs_cb.MaxDepth = GSVector2i(max_z);
}
else
{
ps_cb.Af_MaxDepth.y = max_z * ldexpf(1, -32);
m_ps_sel.zclamp = 1;
}
}
GSVertex* v = &m_vertex.buff[0];
// Minor optimization of a corner case (it allow to better emulate some alpha test effects)
if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z)
@ -581,7 +594,7 @@ void GSRendererDX11::EmulateBlending()
// Require the fix alpha vlaue
if (ALPHA.C == 2)
ps_cb.Af.x = (float)ALPHA.FIX / 128.0f;
ps_cb.Af_MaxDepth.x = (float)ALPHA.FIX / 128.0f;
}
else
{

View File

@ -219,6 +219,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
sm.AddMacro("PS_BLEND_C", sel.blend_c);
sm.AddMacro("PS_BLEND_D", sel.blend_d);
sm.AddMacro("PS_DITHER", sel.dither);
sm.AddMacro("PS_ZCLAMP", sel.zclamp);
CComPtr<ID3D11PixelShader> ps;

View File

@ -983,6 +983,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
+ format("#define PS_FBMASK %d\n", sel.fbmask)
+ format("#define PS_HDR %d\n", sel.hdr)
+ format("#define PS_DITHER %d\n", sel.dither)
+ format("#define PS_ZCLAMP %d\n", sel.zclamp)
// + format("#define PS_PABE %d\n", sel.pabe)
;

View File

@ -198,6 +198,8 @@ public:
GSVector4 HalfTexel;
GSVector4 MinMax;
GSVector4 TC_OH_TS;
GSVector4 MaxDepth;
GSVector4 DitherMatrix[4];
PSConstantBuffer()
@ -210,6 +212,7 @@ public:
MskFix = GSVector4i::zero();
TC_OH_TS = GSVector4::zero();
FbMask = GSVector4i::zero();
MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero();
@ -225,7 +228,7 @@ public:
// if WH matches both HalfTexel and TC_OH_TS do too
// MinMax depends on WH and MskFix so no need to check it too
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11])).alltrue())
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12])).alltrue())
{
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0];
@ -236,9 +239,11 @@ public:
a[5] = b[5];
a[8] = b[8];
a[9] = b[9];
a[10] = b[10];
a[11] = b[11];
a[12] = b[12];
return true;
}
@ -303,6 +308,9 @@ public:
// Dithering
uint32 dither:2;
// Depth clamp
uint32 zclamp:1;
// Hack
uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1;
@ -313,7 +321,7 @@ public:
uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd
uint32 _free2:8;
uint32 _free2:7;
};
uint64 key;

View File

@ -175,11 +175,18 @@ void GSRendererOGL::EmulateZbuffer()
// Clamping is done after rasterization.
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
ps_cb.MaxDepth = GSVector4(1.0f);
m_ps_sel.zclamp = 0;
if (clamp_z) {
// FIXME: Do z clamping for sprites on vs, triangles on ps.
if (m_vt.m_primclass == GS_SPRITE_CLASS)
if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) {
vs_cb.MaxDepth = GSVector2i(max_z);
} else {
ps_cb.MaxDepth = GSVector4(max_z * ldexpf(1, -32));
m_ps_sel.zclamp = 1;
}
}
GSVertex* v = &m_vertex.buff[0];

View File

@ -95,6 +95,9 @@ layout(std140, binding = 21) uniform cb21
vec2 TextureScale;
vec2 TC_OffsetHack;
float MaxDepthPS;
vec3 pad_cb21;
mat4 DitherMatrix;
};
#endif

View File

@ -877,6 +877,10 @@ void ps_main()
// #endif
SV_Target0 = C / 255.0f;
SV_Target1 = vec4(alpha_blend);
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
#endif
}
#endif

View File

@ -49,6 +49,7 @@
#define PS_BLEND_C 0
#define PS_BLEND_D 0
#define PS_DITHER 0
#define PS_ZCLAMP 0
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
@ -85,6 +86,9 @@ struct PS_OUTPUT
{
float4 c0 : SV_Target0;
float4 c1 : SV_Target1;
#if PS_ZCLAMP
float depth : SV_Depth;
#endif
};
Texture2D<float4> Texture : register(t0);
@ -117,7 +121,8 @@ cbuffer cb1
uint4 FbMask;
float4 TC_OffsetHack;
float Af;
float3 _pad;
float MaxDepthPS;
float2 pad_cb1;
float4x4 DitherMatrix;
};
@ -778,6 +783,10 @@ PS_OUTPUT ps_main(PS_INPUT input)
output.c0 = C / 255.0f;
output.c1 = (float4)(alpha_blend);
#if PS_ZCLAMP
output.depth = min(input.p.z, MaxDepthPS);
#endif
return output;
}