From 59f299d5d6cabddacf8e7d80f6f53987e4767c22 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 29 Nov 2021 17:51:02 -0800 Subject: [PATCH] Software: Fix zfreeze with CullMode::All --- .../Core/VideoBackends/Software/Clipper.cpp | 70 ++++++++++++------- Source/Core/VideoBackends/Software/Clipper.h | 6 +- .../VideoBackends/Software/Rasterizer.cpp | 27 ++++--- .../Core/VideoBackends/Software/Rasterizer.h | 2 + .../VideoBackends/Software/SWVertexLoader.cpp | 7 ++ .../VideoBackends/Software/SWVertexLoader.h | 3 + Source/Core/VideoCommon/VertexManagerBase.h | 4 +- 7 files changed, 78 insertions(+), 41 deletions(-) diff --git a/Source/Core/VideoBackends/Software/Clipper.cpp b/Source/Core/VideoBackends/Software/Clipper.cpp index dc22a6a201..f13ebb90b7 100644 --- a/Source/Core/VideoBackends/Software/Clipper.cpp +++ b/Source/Core/VideoBackends/Software/Clipper.cpp @@ -289,10 +289,42 @@ void ProcessTriangle(OutputVertexData* v0, OutputVertexData* v1, OutputVertexDat { INCSTAT(g_stats.this_frame.num_triangles_in) - bool backface; - - if (!CullTest(v0, v1, v2, backface)) + if (IsTriviallyRejected(v0, v1, v2)) + { + INCSTAT(g_stats.this_frame.num_triangles_rejected) + // NOTE: The slope used by zfreeze shouldn't be updated if the triangle is + // trivially rejected during clipping return; + } + + bool backface = IsBackface(v0, v1, v2); + + if (!backface) + { + if (bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) + { + // cull frontfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v1); + PerspectiveDivide(v2); + Rasterizer::UpdateZSlope(v0, v1, v2); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } + else + { + if (bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) + { + // cull backfacing - we still need to update the slope for zfreeze + PerspectiveDivide(v0); + PerspectiveDivide(v2); + PerspectiveDivide(v1); + Rasterizer::UpdateZSlope(v0, v2, v1); + INCSTAT(g_stats.this_frame.num_triangles_culled) + return; + } + } int indices[NUM_INDICES] = {0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, @@ -461,19 +493,18 @@ void ProcessPoint(OutputVertexData* center) Rasterizer::DrawTriangleFrontFace(&ur, &lr, &ul); } -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface) +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) { int mask = CalcClipMask(v0); mask &= CalcClipMask(v1); mask &= CalcClipMask(v2); - if (mask) - { - INCSTAT(g_stats.this_frame.num_triangles_rejected) - return false; - } + return mask != 0; +} +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) +{ float x0 = v0->projectedPosition.x; float x1 = v1->projectedPosition.x; float x2 = v2->projectedPosition.x; @@ -486,29 +517,14 @@ bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const Outp float normalZDir = (x0 * w2 - x2 * w0) * y1 + (x2 * y0 - x0 * y2) * w1 + (y2 * w0 - y0 * w2) * x1; - backface = normalZDir <= 0.0f; + bool backface = normalZDir <= 0.0f; // Jimmie Johnson's Anything with an Engine has a positive viewport, while other games have a // negative viewport. The positive viewport does not require vertices to be vertically mirrored, // but the backface test does need to be inverted for things to be drawn. if (xfmem.viewport.ht > 0) backface = !backface; - // TODO: Are these tests / the definition of backface above backwards? - if ((bpmem.genMode.cullmode == CullMode::Back || bpmem.genMode.cullmode == CullMode::All) && - !backface) // cull frontfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - if ((bpmem.genMode.cullmode == CullMode::Front || bpmem.genMode.cullmode == CullMode::All) && - backface) // cull backfacing - { - INCSTAT(g_stats.this_frame.num_triangles_culled) - return false; - } - - return true; + return backface; } void PerspectiveDivide(OutputVertexData* vertex) diff --git a/Source/Core/VideoBackends/Software/Clipper.h b/Source/Core/VideoBackends/Software/Clipper.h index 4b18023696..21be39c4fe 100644 --- a/Source/Core/VideoBackends/Software/Clipper.h +++ b/Source/Core/VideoBackends/Software/Clipper.h @@ -15,8 +15,10 @@ void ProcessLine(OutputVertexData* v0, OutputVertexData* v1); void ProcessPoint(OutputVertexData* v); -bool CullTest(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2, - bool& backface); +bool IsTriviallyRejected(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); + +bool IsBackface(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); void PerspectiveDivide(OutputVertexData* vertex); } // namespace Clipper diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index b3d0dd3fdb..726692138c 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -301,11 +301,27 @@ static void BuildBlock(s32 blockX, s32 blockY) } } +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2) +{ + if (!bpmem.genMode.zfreeze) + { + const s32 X1 = iround(16.0f * v0->screenPosition[0]) - 9; + const s32 Y1 = iround(16.0f * v0->screenPosition[1]) - 9; + const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); + ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); + } +} + void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2) { INCSTAT(g_stats.this_frame.num_triangles_drawn); + // The zslope should be updated now, even if the triangle is rejected by the scissor test, as + // zfreeze depends on it + UpdateZSlope(v0, v1, v2); + // adapted from http://devmaster.net/posts/6145/advanced-rasterization // 28.4 fixed-pou32 coordinates. rounded to nearest and adjusted to match hardware output @@ -370,22 +386,13 @@ void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v if (minx >= maxx || miny >= maxy) return; - // Set up slopes + // Set up the remaining slopes const SlopeContext ctx(v0, v1, v2, (X1 + 0xF) >> 4, (Y1 + 0xF) >> 4); float w[3] = {1.0f / v0->projectedPosition.w, 1.0f / v1->projectedPosition.w, 1.0f / v2->projectedPosition.w}; WSlope = Slope(w[0], w[1], w[2], ctx); - // TODO: The zfreeze emulation is not quite correct, yet! - // Many things might prevent us from reaching this line (culling, clipping, scissoring). - // However, the zslope is always guaranteed to be calculated unless all vertices are trivially - // rejected during clipping! - // We're currently sloppy at this since we abort early if any of the culling/clipping/scissoring - // tests fail. - if (!bpmem.genMode.zfreeze) - ZSlope = Slope(v0->screenPosition.z, v1->screenPosition.z, v2->screenPosition.z, ctx); - for (unsigned int i = 0; i < bpmem.genMode.numcolchans; i++) { for (int comp = 0; comp < 4; comp++) diff --git a/Source/Core/VideoBackends/Software/Rasterizer.h b/Source/Core/VideoBackends/Software/Rasterizer.h index 97cbce4d63..c278809966 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.h +++ b/Source/Core/VideoBackends/Software/Rasterizer.h @@ -11,6 +11,8 @@ namespace Rasterizer { void Init(); +void UpdateZSlope(const OutputVertexData* v0, const OutputVertexData* v1, + const OutputVertexData* v2); void DrawTriangleFrontFace(const OutputVertexData* v0, const OutputVertexData* v1, const OutputVertexData* v2); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index af99e5ba85..287fcf4a4b 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -32,6 +32,13 @@ SWVertexLoader::SWVertexLoader() = default; SWVertexLoader::~SWVertexLoader() = default; +DataReader SWVertexLoader::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall) +{ + // The software renderer needs cullall to be false for zfreeze to work + return VertexManagerBase::PrepareForAdditionalData(primitive, count, stride, false); +} + void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { DebugUtil::OnObjectBegin(); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index bbda8da037..59b6ca65fd 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -19,6 +19,9 @@ public: SWVertexLoader(); ~SWVertexLoader(); + DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, + bool cullall) override; + protected: void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index b3dd49aa61..c413889713 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -99,8 +99,8 @@ public: PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices); - DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, u32 stride, - bool cullall); + virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count, + u32 stride, bool cullall); void FlushData(u32 count, u32 stride); void Flush();