gsdx-hw: Add zclamping to ps/fs.

Add zclamping to ps/fs, enable vs, ps/fs clamp when needed with a macro.
This commit is contained in:
KrossX 2020-06-06 17:21:03 +02:00 committed by lightningterror
parent 5d0eefeebd
commit b5625ad8b0
9 changed files with 60 additions and 11 deletions

View File

@ -105,7 +105,7 @@ public:
GSVector4i FbMask; GSVector4i FbMask;
GSVector4 TC_OffsetHack; GSVector4 TC_OffsetHack;
GSVector4 Af; GSVector4 Af_MaxDepth;
GSVector4 DitherMatrix[4]; GSVector4 DitherMatrix[4];
PSConstantBuffer() PSConstantBuffer()
@ -118,7 +118,7 @@ public:
MskFix = GSVector4i::zero(); MskFix = GSVector4i::zero();
ChannelShuffle = GSVector4i::zero(); ChannelShuffle = GSVector4i::zero();
FbMask = GSVector4i::zero(); FbMask = GSVector4i::zero();
Af = GSVector4::zero(); Af_MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero(); DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero(); DitherMatrix[1] = GSVector4::zero();
@ -241,6 +241,9 @@ public:
// Dithering // Dithering
uint32 dither:2; uint32 dither:2;
// Depth clamp
uint32 zclamp:1;
// Hack // Hack
uint32 tcoffsethack:1; uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1; uint32 urban_chaos_hle:1;
@ -248,7 +251,7 @@ public:
uint32 point_sampler:1; uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd uint32 invalid_tex0:1; // Lupin the 3rd
uint32 _free:16; uint32 _free:15;
}; };
uint64 key; uint64 key;

View File

@ -163,13 +163,26 @@ void GSRendererDX11::EmulateZbuffer()
// Clamping is done after rasterization. // Clamping is done after rasterization.
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z; const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF); vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
ps_cb.Af_MaxDepth.y = 1.0f;
m_ps_sel.zclamp = 0;
if (clamp_z) if (clamp_z)
{ {
// FIXME: Do z clamping for sprites on vs, triangles on ps. if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS)
vs_cb.MaxDepth = GSVector2i(max_z); {
vs_cb.MaxDepth = GSVector2i(max_z);
}
else
{
ps_cb.Af_MaxDepth.y = max_z * ldexpf(1, -32);
m_ps_sel.zclamp = 1;
}
} }
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
// Minor optimization of a corner case (it allow to better emulate some alpha test effects) // Minor optimization of a corner case (it allow to better emulate some alpha test effects)
if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z) if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z)
@ -581,7 +594,7 @@ void GSRendererDX11::EmulateBlending()
// Require the fix alpha vlaue // Require the fix alpha vlaue
if (ALPHA.C == 2) if (ALPHA.C == 2)
ps_cb.Af.x = (float)ALPHA.FIX / 128.0f; ps_cb.Af_MaxDepth.x = (float)ALPHA.FIX / 128.0f;
} }
else else
{ {

View File

@ -219,6 +219,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
sm.AddMacro("PS_BLEND_C", sel.blend_c); sm.AddMacro("PS_BLEND_C", sel.blend_c);
sm.AddMacro("PS_BLEND_D", sel.blend_d); sm.AddMacro("PS_BLEND_D", sel.blend_d);
sm.AddMacro("PS_DITHER", sel.dither); sm.AddMacro("PS_DITHER", sel.dither);
sm.AddMacro("PS_ZCLAMP", sel.zclamp);
CComPtr<ID3D11PixelShader> ps; CComPtr<ID3D11PixelShader> ps;

View File

@ -983,6 +983,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
+ format("#define PS_FBMASK %d\n", sel.fbmask) + format("#define PS_FBMASK %d\n", sel.fbmask)
+ format("#define PS_HDR %d\n", sel.hdr) + format("#define PS_HDR %d\n", sel.hdr)
+ format("#define PS_DITHER %d\n", sel.dither) + format("#define PS_DITHER %d\n", sel.dither)
+ format("#define PS_ZCLAMP %d\n", sel.zclamp)
// + format("#define PS_PABE %d\n", sel.pabe) // + format("#define PS_PABE %d\n", sel.pabe)
; ;

View File

@ -198,6 +198,8 @@ public:
GSVector4 HalfTexel; GSVector4 HalfTexel;
GSVector4 MinMax; GSVector4 MinMax;
GSVector4 TC_OH_TS; GSVector4 TC_OH_TS;
GSVector4 MaxDepth;
GSVector4 DitherMatrix[4]; GSVector4 DitherMatrix[4];
PSConstantBuffer() PSConstantBuffer()
@ -210,6 +212,7 @@ public:
MskFix = GSVector4i::zero(); MskFix = GSVector4i::zero();
TC_OH_TS = GSVector4::zero(); TC_OH_TS = GSVector4::zero();
FbMask = GSVector4i::zero(); FbMask = GSVector4i::zero();
MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero(); DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero(); DitherMatrix[1] = GSVector4::zero();
@ -225,7 +228,7 @@ public:
// if WH matches both HalfTexel and TC_OH_TS do too // if WH matches both HalfTexel and TC_OH_TS do too
// MinMax depends on WH and MskFix so no need to check it too // MinMax depends on WH and MskFix so no need to check it too
if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11])).alltrue()) & (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12])).alltrue())
{ {
// Note previous check uses SSE already, a plain copy will be faster than any memcpy // Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0]; a[0] = b[0];
@ -236,9 +239,11 @@ public:
a[5] = b[5]; a[5] = b[5];
a[8] = b[8]; a[8] = b[8];
a[9] = b[9]; a[9] = b[9];
a[10] = b[10]; a[10] = b[10];
a[11] = b[11]; a[11] = b[11];
a[12] = b[12];
return true; return true;
} }
@ -303,6 +308,9 @@ public:
// Dithering // Dithering
uint32 dither:2; uint32 dither:2;
// Depth clamp
uint32 zclamp:1;
// Hack // Hack
uint32 tcoffsethack:1; uint32 tcoffsethack:1;
uint32 urban_chaos_hle:1; uint32 urban_chaos_hle:1;
@ -313,7 +321,7 @@ public:
uint32 point_sampler:1; uint32 point_sampler:1;
uint32 invalid_tex0:1; // Lupin the 3rd uint32 invalid_tex0:1; // Lupin the 3rd
uint32 _free2:8; uint32 _free2:7;
}; };
uint64 key; uint64 key;

View File

@ -175,11 +175,18 @@ void GSRendererOGL::EmulateZbuffer()
// Clamping is done after rasterization. // Clamping is done after rasterization.
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z; const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF); vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
ps_cb.MaxDepth = GSVector4(1.0f);
m_ps_sel.zclamp = 0;
if (clamp_z) { if (clamp_z) {
// FIXME: Do z clamping for sprites on vs, triangles on ps. if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) {
if (m_vt.m_primclass == GS_SPRITE_CLASS)
vs_cb.MaxDepth = GSVector2i(max_z); vs_cb.MaxDepth = GSVector2i(max_z);
} else {
ps_cb.MaxDepth = GSVector4(max_z * ldexpf(1, -32));
m_ps_sel.zclamp = 1;
}
} }
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];

View File

@ -95,6 +95,9 @@ layout(std140, binding = 21) uniform cb21
vec2 TextureScale; vec2 TextureScale;
vec2 TC_OffsetHack; vec2 TC_OffsetHack;
float MaxDepthPS;
vec3 pad_cb21;
mat4 DitherMatrix; mat4 DitherMatrix;
}; };
#endif #endif

View File

@ -877,6 +877,10 @@ void ps_main()
// #endif // #endif
SV_Target0 = C / 255.0f; SV_Target0 = C / 255.0f;
SV_Target1 = vec4(alpha_blend); SV_Target1 = vec4(alpha_blend);
#if PS_ZCLAMP
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
#endif
} }
#endif #endif

View File

@ -49,6 +49,7 @@
#define PS_BLEND_C 0 #define PS_BLEND_C 0
#define PS_BLEND_D 0 #define PS_BLEND_D 0
#define PS_DITHER 0 #define PS_DITHER 0
#define PS_ZCLAMP 0
#endif #endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
@ -85,6 +86,9 @@ struct PS_OUTPUT
{ {
float4 c0 : SV_Target0; float4 c0 : SV_Target0;
float4 c1 : SV_Target1; float4 c1 : SV_Target1;
#if PS_ZCLAMP
float depth : SV_Depth;
#endif
}; };
Texture2D<float4> Texture : register(t0); Texture2D<float4> Texture : register(t0);
@ -117,7 +121,8 @@ cbuffer cb1
uint4 FbMask; uint4 FbMask;
float4 TC_OffsetHack; float4 TC_OffsetHack;
float Af; float Af;
float3 _pad; float MaxDepthPS;
float2 pad_cb1;
float4x4 DitherMatrix; float4x4 DitherMatrix;
}; };
@ -778,6 +783,10 @@ PS_OUTPUT ps_main(PS_INPUT input)
output.c0 = C / 255.0f; output.c0 = C / 255.0f;
output.c1 = (float4)(alpha_blend); output.c1 = (float4)(alpha_blend);
#if PS_ZCLAMP
output.depth = min(input.p.z, MaxDepthPS);
#endif
return output; return output;
} }