From b5625ad8b08ddfc3158d1d19aa5e616994fce993 Mon Sep 17 00:00:00 2001 From: KrossX Date: Sat, 6 Jun 2020 17:21:03 +0200 Subject: [PATCH] gsdx-hw: Add zclamping to ps/fs. Add zclamping to ps/fs, enable vs, ps/fs clamp when needed with a macro. --- plugins/GSdx/Renderers/DX11/GSDevice11.h | 9 ++++++--- .../GSdx/Renderers/DX11/GSRendererDX11.cpp | 19 ++++++++++++++++--- plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp | 1 + plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp | 1 + plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h | 12 ++++++++++-- .../GSdx/Renderers/OpenGL/GSRendererOGL.cpp | 11 +++++++++-- plugins/GSdx/res/glsl/common_header.glsl | 3 +++ plugins/GSdx/res/glsl/tfx_fs.glsl | 4 ++++ plugins/GSdx/res/tfx.fx | 11 ++++++++++- 9 files changed, 60 insertions(+), 11 deletions(-) diff --git a/plugins/GSdx/Renderers/DX11/GSDevice11.h b/plugins/GSdx/Renderers/DX11/GSDevice11.h index 497d20a3e0..bb38eb8f7f 100644 --- a/plugins/GSdx/Renderers/DX11/GSDevice11.h +++ b/plugins/GSdx/Renderers/DX11/GSDevice11.h @@ -105,7 +105,7 @@ public: GSVector4i FbMask; GSVector4 TC_OffsetHack; - GSVector4 Af; + GSVector4 Af_MaxDepth; GSVector4 DitherMatrix[4]; PSConstantBuffer() @@ -118,7 +118,7 @@ public: MskFix = GSVector4i::zero(); ChannelShuffle = GSVector4i::zero(); FbMask = GSVector4i::zero(); - Af = GSVector4::zero(); + Af_MaxDepth = GSVector4::zero(); DitherMatrix[0] = GSVector4::zero(); DitherMatrix[1] = GSVector4::zero(); @@ -241,6 +241,9 @@ public: // Dithering uint32 dither:2; + // Depth clamp + uint32 zclamp:1; + // Hack uint32 tcoffsethack:1; uint32 urban_chaos_hle:1; @@ -248,7 +251,7 @@ public: uint32 point_sampler:1; uint32 invalid_tex0:1; // Lupin the 3rd - uint32 _free:16; + uint32 _free:15; }; uint64 key; diff --git a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp index 57a4e66247..6a9a27d594 100644 --- a/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSRendererDX11.cpp @@ -163,13 +163,26 @@ void GSRendererDX11::EmulateZbuffer() // Clamping is done after rasterization. const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z; + vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF); + ps_cb.Af_MaxDepth.y = 1.0f; + m_ps_sel.zclamp = 0; + if (clamp_z) { - // FIXME: Do z clamping for sprites on vs, triangles on ps. - vs_cb.MaxDepth = GSVector2i(max_z); + if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) + { + vs_cb.MaxDepth = GSVector2i(max_z); + } + else + { + ps_cb.Af_MaxDepth.y = max_z * ldexpf(1, -32); + m_ps_sel.zclamp = 1; + } } + + GSVertex* v = &m_vertex.buff[0]; // Minor optimization of a corner case (it allow to better emulate some alpha test effects) if (m_om_dssel.ztst == ZTST_GEQUAL && m_vt.m_eq.z && v[0].XYZ.Z == max_z) @@ -581,7 +594,7 @@ void GSRendererDX11::EmulateBlending() // Require the fix alpha vlaue if (ALPHA.C == 2) - ps_cb.Af.x = (float)ALPHA.FIX / 128.0f; + ps_cb.Af_MaxDepth.x = (float)ALPHA.FIX / 128.0f; } else { diff --git a/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp b/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp index 37a503008f..f973cf6426 100644 --- a/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp +++ b/plugins/GSdx/Renderers/DX11/GSTextureFX11.cpp @@ -219,6 +219,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe sm.AddMacro("PS_BLEND_C", sel.blend_c); sm.AddMacro("PS_BLEND_D", sel.blend_d); sm.AddMacro("PS_DITHER", sel.dither); + sm.AddMacro("PS_ZCLAMP", sel.zclamp); CComPtr ps; diff --git a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp index ad9b29dfc3..e172808418 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.cpp @@ -983,6 +983,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_FBMASK %d\n", sel.fbmask) + format("#define PS_HDR %d\n", sel.hdr) + format("#define PS_DITHER %d\n", sel.dither) + + format("#define PS_ZCLAMP %d\n", sel.zclamp) // + format("#define PS_PABE %d\n", sel.pabe) ; diff --git a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h index 92a1b3e8bd..7610410ef4 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h +++ b/plugins/GSdx/Renderers/OpenGL/GSDeviceOGL.h @@ -198,6 +198,8 @@ public: GSVector4 HalfTexel; GSVector4 MinMax; GSVector4 TC_OH_TS; + GSVector4 MaxDepth; + GSVector4 DitherMatrix[4]; PSConstantBuffer() @@ -210,6 +212,7 @@ public: MskFix = GSVector4i::zero(); TC_OH_TS = GSVector4::zero(); FbMask = GSVector4i::zero(); + MaxDepth = GSVector4::zero(); DitherMatrix[0] = GSVector4::zero(); DitherMatrix[1] = GSVector4::zero(); @@ -225,7 +228,7 @@ public: // if WH matches both HalfTexel and TC_OH_TS do too // MinMax depends on WH and MskFix so no need to check it too if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) - & (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11])).alltrue()) + & (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12])).alltrue()) { // Note previous check uses SSE already, a plain copy will be faster than any memcpy a[0] = b[0]; @@ -236,9 +239,11 @@ public: a[5] = b[5]; a[8] = b[8]; + a[9] = b[9]; a[10] = b[10]; a[11] = b[11]; + a[12] = b[12]; return true; } @@ -303,6 +308,9 @@ public: // Dithering uint32 dither:2; + // Depth clamp + uint32 zclamp:1; + // Hack uint32 tcoffsethack:1; uint32 urban_chaos_hle:1; @@ -313,7 +321,7 @@ public: uint32 point_sampler:1; uint32 invalid_tex0:1; // Lupin the 3rd - uint32 _free2:8; + uint32 _free2:7; }; uint64 key; diff --git a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp index f7c21357fa..54ed5ef006 100644 --- a/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp +++ b/plugins/GSdx/Renderers/OpenGL/GSRendererOGL.cpp @@ -175,11 +175,18 @@ void GSRendererOGL::EmulateZbuffer() // Clamping is done after rasterization. const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8); const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z; + vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF); + ps_cb.MaxDepth = GSVector4(1.0f); + m_ps_sel.zclamp = 0; + if (clamp_z) { - // FIXME: Do z clamping for sprites on vs, triangles on ps. - if (m_vt.m_primclass == GS_SPRITE_CLASS) + if (m_vt.m_primclass == GS_SPRITE_CLASS || m_vt.m_primclass == GS_POINT_CLASS) { vs_cb.MaxDepth = GSVector2i(max_z); + } else { + ps_cb.MaxDepth = GSVector4(max_z * ldexpf(1, -32)); + m_ps_sel.zclamp = 1; + } } GSVertex* v = &m_vertex.buff[0]; diff --git a/plugins/GSdx/res/glsl/common_header.glsl b/plugins/GSdx/res/glsl/common_header.glsl index bbbc63de42..b6cf915901 100644 --- a/plugins/GSdx/res/glsl/common_header.glsl +++ b/plugins/GSdx/res/glsl/common_header.glsl @@ -95,6 +95,9 @@ layout(std140, binding = 21) uniform cb21 vec2 TextureScale; vec2 TC_OffsetHack; + float MaxDepthPS; + vec3 pad_cb21; + mat4 DitherMatrix; }; #endif diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 80ff1483a2..eac789dcd6 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -877,6 +877,10 @@ void ps_main() // #endif SV_Target0 = C / 255.0f; SV_Target1 = vec4(alpha_blend); + +#if PS_ZCLAMP + gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS); +#endif } #endif diff --git a/plugins/GSdx/res/tfx.fx b/plugins/GSdx/res/tfx.fx index aa3e0fb1b7..c92d769ab2 100644 --- a/plugins/GSdx/res/tfx.fx +++ b/plugins/GSdx/res/tfx.fx @@ -49,6 +49,7 @@ #define PS_BLEND_C 0 #define PS_BLEND_D 0 #define PS_DITHER 0 +#define PS_ZCLAMP 0 #endif #define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D) @@ -85,6 +86,9 @@ struct PS_OUTPUT { float4 c0 : SV_Target0; float4 c1 : SV_Target1; +#if PS_ZCLAMP + float depth : SV_Depth; +#endif }; Texture2D Texture : register(t0); @@ -117,7 +121,8 @@ cbuffer cb1 uint4 FbMask; float4 TC_OffsetHack; float Af; - float3 _pad; + float MaxDepthPS; + float2 pad_cb1; float4x4 DitherMatrix; }; @@ -778,6 +783,10 @@ PS_OUTPUT ps_main(PS_INPUT input) output.c0 = C / 255.0f; output.c1 = (float4)(alpha_blend); +#if PS_ZCLAMP + output.depth = min(input.p.z, MaxDepthPS); +#endif + return output; }