Merge pull request #4471 from Armada651/hybrid-depth

VideoCommon: Don't process the depth range in the vertex shader if it's not oversized.
This commit is contained in:
Jules Blok 2016-12-29 18:49:27 +01:00 committed by GitHub
commit a2e191649d
6 changed files with 69 additions and 59 deletions

View File

@ -559,6 +559,10 @@ void Renderer::SetViewport()
float Y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissorYOff); float Y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissorYOff);
float Wd = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd); float Wd = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd);
float Ht = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht); float Ht = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht);
float range = MathUtil::Clamp<float>(xfmem.viewport.zRange, 0.0f, 16777215.0f);
float min_depth =
MathUtil::Clamp<float>(xfmem.viewport.farZ - range, 0.0f, 16777215.0f) / 16777216.0f;
float max_depth = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
if (Wd < 0.0f) if (Wd < 0.0f)
{ {
X += Wd; X += Wd;
@ -570,18 +574,24 @@ void Renderer::SetViewport()
Ht = -Ht; Ht = -Ht;
} }
// If an inverted depth range is used, which D3D doesn't support,
// we need to calculate the depth range in the vertex shader.
if (xfmem.viewport.zRange < 0.0f)
{
min_depth = 0.0f;
max_depth = GX_MAX_DEPTH;
}
// In D3D, the viewport rectangle must fit within the render target. // In D3D, the viewport rectangle must fit within the render target.
X = (X >= 0.f) ? X : 0.f; X = (X >= 0.f) ? X : 0.f;
Y = (Y >= 0.f) ? Y : 0.f; Y = (Y >= 0.f) ? Y : 0.f;
Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X); Wd = (X + Wd <= GetTargetWidth()) ? Wd : (GetTargetWidth() - X);
Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y); Ht = (Y + Ht <= GetTargetHeight()) ? Ht : (GetTargetHeight() - Y);
// We do depth clipping and depth range in the vertex shader instead of relying // We use an inverted depth range here to apply the Reverse Z trick.
// on the graphics API. However we still need to ensure depth values don't exceed // This trick makes sure we match the precision provided by the 1:0
// the maximum value supported by the console GPU. We also need to account for the // clipping depth range on the hardware.
// fact that the entire depth buffer is inverted on D3D, so we set GX_MAX_DEPTH as D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht, 1.0f - max_depth, 1.0f - min_depth);
// an inverted near value.
D3D11_VIEWPORT vp = CD3D11_VIEWPORT(X, Y, Wd, Ht, 1.0f - GX_MAX_DEPTH, D3D11_MAX_DEPTH);
D3D::context->RSSetViewports(1, &vp); D3D::context->RSSetViewports(1, &vp);
} }

View File

@ -464,6 +464,10 @@ void Renderer::SetViewport()
float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset); float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset);
float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd); float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd);
float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht); float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht);
float range = MathUtil::Clamp<float>(xfmem.viewport.zRange, 0.0f, 16777215.0f);
float min_depth =
MathUtil::Clamp<float>(xfmem.viewport.farZ - range, 0.0f, 16777215.0f) / 16777216.0f;
float max_depth = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
if (width < 0.0f) if (width < 0.0f)
{ {
x += width; x += width;
@ -475,18 +479,24 @@ void Renderer::SetViewport()
height = -height; height = -height;
} }
// If an inverted depth range is used, which D3D doesn't support,
// we need to calculate the depth range in the vertex shader.
if (xfmem.viewport.zRange < 0.0f)
{
min_depth = 0.0f;
max_depth = GX_MAX_DEPTH;
}
// In D3D, the viewport rectangle must fit within the render target. // In D3D, the viewport rectangle must fit within the render target.
x = (x >= 0.f) ? x : 0.f; x = (x >= 0.f) ? x : 0.f;
y = (y >= 0.f) ? y : 0.f; y = (y >= 0.f) ? y : 0.f;
width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x); width = (x + width <= GetTargetWidth()) ? width : (GetTargetWidth() - x);
height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y); height = (y + height <= GetTargetHeight()) ? height : (GetTargetHeight() - y);
// We do depth clipping and depth range in the vertex shader instead of relying // We use an inverted depth range here to apply the Reverse Z trick.
// on the graphics API. However we still need to ensure depth values don't exceed // This trick makes sure we match the precision provided by the 1:0
// the maximum value supported by the console GPU. We also need to account for the // clipping depth range on the hardware.
// fact that the entire depth buffer is inverted on D3D, so we set GX_MAX_DEPTH as D3D12_VIEWPORT vp = {x, y, width, height, 1.0f - max_depth, 1.0f - min_depth};
// an inverted near value.
D3D12_VIEWPORT vp = {x, y, width, height, 1.0f - GX_MAX_DEPTH, D3D12_MAX_DEPTH};
D3D::current_command_list->RSSetViewports(1, &vp); D3D::current_command_list->RSSetViewports(1, &vp);
} }

View File

@ -1122,12 +1122,10 @@ void Renderer::SetViewport()
(float)scissorYOff); (float)scissorYOff);
float Width = EFBToScaledXf(2.0f * xfmem.viewport.wd); float Width = EFBToScaledXf(2.0f * xfmem.viewport.wd);
float Height = EFBToScaledYf(-2.0f * xfmem.viewport.ht); float Height = EFBToScaledYf(-2.0f * xfmem.viewport.ht);
float GLNear = MathUtil::Clamp<float>( float range = MathUtil::Clamp<float>(xfmem.viewport.zRange, -16777215.0f, 16777215.0f);
xfmem.viewport.farZ - float min_depth =
MathUtil::Clamp<float>(xfmem.viewport.zRange, -16777216.0f, 16777216.0f), MathUtil::Clamp<float>(xfmem.viewport.farZ - range, 0.0f, 16777215.0f) / 16777216.0f;
0.0f, 16777215.0f) / float max_depth = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
16777216.0f;
float GLFar = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
if (Width < 0) if (Width < 0)
{ {
X += Width; X += Width;
@ -1154,17 +1152,7 @@ void Renderer::SetViewport()
// vertex shader we only need to ensure depth values don't exceed the maximum // vertex shader we only need to ensure depth values don't exceed the maximum
// value supported by the console GPU. If not, we simply clamp the near/far values // value supported by the console GPU. If not, we simply clamp the near/far values
// themselves to the maximum value as done above. // themselves to the maximum value as done above.
if (g_ActiveConfig.backend_info.bSupportsDepthClamp) glDepthRangef(max_depth, min_depth);
{
if (xfmem.viewport.zRange < 0.0f)
glDepthRangef(0.0f, GX_MAX_DEPTH);
else
glDepthRangef(GX_MAX_DEPTH, 0.0f);
}
else
{
glDepthRangef(GLFar, GLNear);
}
} }
void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable,

View File

@ -1635,6 +1635,10 @@ void Renderer::SetViewport()
float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset); float y = Renderer::EFBToScaledYf(xfmem.viewport.yOrig + xfmem.viewport.ht - scissor_y_offset);
float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd); float width = Renderer::EFBToScaledXf(2.0f * xfmem.viewport.wd);
float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht); float height = Renderer::EFBToScaledYf(-2.0f * xfmem.viewport.ht);
float range = MathUtil::Clamp<float>(xfmem.viewport.zRange, -16777215.0f, 16777215.0f);
float min_depth =
MathUtil::Clamp<float>(xfmem.viewport.farZ - range, 0.0f, 16777215.0f) / 16777216.0f;
float max_depth = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
if (width < 0.0f) if (width < 0.0f)
{ {
x += width; x += width;
@ -1646,28 +1650,19 @@ void Renderer::SetViewport()
height = -height; height = -height;
} }
// If we do depth clipping and depth range in the vertex shader we only need to ensure // If an inverted depth range is used, which the Vulkan drivers don't
// depth values don't exceed the maximum value supported by the console GPU. If not, // support, we need to calculate the depth range in the vertex shader.
// we simply clamp the near/far values themselves to the maximum value as done above. // TODO: Make this into a DriverDetails bug and write a test for CTS.
float min_depth, max_depth; if (xfmem.viewport.zRange < 0.0f)
if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
{ {
min_depth = 1.0f - GX_MAX_DEPTH; min_depth = 0.0f;
max_depth = 1.0f; max_depth = GX_MAX_DEPTH;
}
else
{
float near_val = MathUtil::Clamp<float>(xfmem.viewport.farZ -
MathUtil::Clamp<float>(xfmem.viewport.zRange,
-16777216.0f, 16777216.0f),
0.0f, 16777215.0f) /
16777216.0f;
float far_val = MathUtil::Clamp<float>(xfmem.viewport.farZ, 0.0f, 16777215.0f) / 16777216.0f;
min_depth = near_val;
max_depth = far_val;
} }
VkViewport viewport = {x, y, width, height, min_depth, max_depth}; // We use an inverted depth range here to apply the Reverse Z trick.
// This trick makes sure we match the precision provided by the 1:0
// clipping depth range on the hardware.
VkViewport viewport = {x, y, width, height, 1.0f - max_depth, 1.0f - min_depth};
StateTracker::GetInstance()->SetViewport(viewport); StateTracker::GetInstance()->SetViewport(viewport);
} }

View File

@ -30,6 +30,10 @@ VertexShaderUid GetVertexShaderUid()
uid_data->msaa = g_ActiveConfig.iMultisamples > 1; uid_data->msaa = g_ActiveConfig.iMultisamples > 1;
uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA; uid_data->ssaa = g_ActiveConfig.iMultisamples > 1 && g_ActiveConfig.bSSAA;
uid_data->numColorChans = xfmem.numChan.numColorChans; uid_data->numColorChans = xfmem.numChan.numColorChans;
uid_data->vertex_depth =
g_ActiveConfig.backend_info.bSupportsDepthClamp &&
((fabs(xfmem.viewport.zRange) > 16777215.0f || fabs(xfmem.viewport.farZ) > 16777215.0f) ||
(xfmem.viewport.zRange < 0.0f && !g_ActiveConfig.backend_info.bSupportsReversedDepthRange));
GetLightingShaderUid(uid_data->lighting); GetLightingShaderUid(uid_data->lighting);
@ -416,13 +420,11 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
out.Write("o.colors_1 = color1;\n"); out.Write("o.colors_1 = color1;\n");
} }
// Write the true depth value. If the game uses depth textures, then the pixel shader will // If we can disable the incorrect depth clipping planes using depth clamping, then we can do
// override it with the correct values if not then early z culling will improve speed. // our own depth clipping and calculate the depth range before the perspective divide if
// necessary.
if (g_ActiveConfig.backend_info.bSupportsDepthClamp) if (g_ActiveConfig.backend_info.bSupportsDepthClamp)
{ {
// If we can disable the incorrect depth clipping planes using depth clamping, then we can do
// our own depth clipping and calculate the depth range before the perspective divide.
// Since we're adjusting z for the depth range before the perspective divide, we have to do our // Since we're adjusting z for the depth range before the perspective divide, we have to do our
// own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range. // own clipping. We want to clip so that -w <= z <= 0, which matches the console -1..0 range.
// We adjust our depth value for clipping purposes to match the perspective projection in the // We adjust our depth value for clipping purposes to match the perspective projection in the
@ -430,7 +432,14 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n"); out.Write("float clipDepth = o.pos.z * (1.0 - 1e-7);\n");
out.Write("o.clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w out.Write("o.clipDist0 = clipDepth + o.pos.w;\n"); // Near: z < -w
out.Write("o.clipDist1 = -clipDepth;\n"); // Far: z > 0 out.Write("o.clipDist1 = -clipDepth;\n"); // Far: z > 0
}
// Write the true depth value. If the game uses depth textures, then the pixel shader will
// override it with the correct values if not then early z culling will improve speed.
// There are two different ways to do this, when the depth range is oversized, we process
// the depth range in the vertex shader, if not we let the host driver handle it.
if (uid_data->vertex_depth)
{
// Adjust z for the depth range. We're using an equation which incorperates a depth inversion, // Adjust z for the depth range. We're using an equation which incorperates a depth inversion,
// so we can map the console -1..0 range to the 0..1 range used in the depth buffer. // so we can map the console -1..0 range to the 0..1 range used in the depth buffer.
// We have to handle the depth range in the vertex shader instead of after the perspective // We have to handle the depth range in the vertex shader instead of after the perspective
@ -442,11 +451,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const vertex_shader_uid_da
} }
else else
{ {
// If we can't disable the incorrect depth clipping planes, then we need to rely on the // Here we rely on the host driver to process the depth range, however we still need to invert
// graphics API to handle the depth range after the perspective divide. This can result in // the console -1..0 range to the 0..1 range used in the depth buffer.
// inaccurate depth values due to the missing depth bias, but that can be least corrected by
// overriding depth values in the pixel shader. We still need to take care of the reversed depth
// though, so we do that here.
out.Write("o.pos.z = -o.pos.z;\n"); out.Write("o.pos.z = -o.pos.z;\n");
} }

View File

@ -43,7 +43,8 @@ struct vertex_shader_uid_data
u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is
// 8 bits wide // 8 bits wide
u32 ssaa : 1; u32 ssaa : 1;
u32 pad : 15; u32 vertex_depth : 1;
u32 pad : 14;
struct struct
{ {