From 36ee2477b1b297433e9ce0ee1afe87f1d9929ccf Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 1 Aug 2018 13:46:08 -0700 Subject: [PATCH] SoftRasterizer: Fix the animating characters in Customize status screen in Sands of Destruction by emulating a special LEQUAL depth test. (Fixes #41. Special thanks to StapleButter for his insight on this issue.) --- desmume/src/rasterize.cpp | 76 ++++++++++++++++++++++++++++----------- desmume/src/rasterize.h | 12 ++++--- desmume/src/render3D.cpp | 5 +++ desmume/src/render3D.h | 13 +++++-- 4 files changed, 79 insertions(+), 27 deletions(-) diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 2d449543f..330c15586 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -497,7 +497,7 @@ FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, } } -template template +template template FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z) { FragmentColor srcColor; @@ -510,6 +510,7 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c u8 &dstAttributeStencil = this->_softRender->_framebufferAttributes->stencil[fragmentIndex]; u8 &dstAttributeIsFogged = this->_softRender->_framebufferAttributes->isFogged[fragmentIndex]; u8 &dstAttributeIsTranslucentPoly = this->_softRender->_framebufferAttributes->isTranslucentPoly[fragmentIndex]; + u8 &dstAttributePolyFacing = this->_softRender->_framebufferAttributes->polyFacing[fragmentIndex]; // not sure about the w-buffer depth value: this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64 // @@ -520,9 +521,13 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c // run the depth test bool depthFail = false; + if (polyAttr.DepthEqualTest_Enable) { - const u32 minDepth = max(0x00000000, dstAttributeDepth - DEPTH_EQUALS_TEST_TOLERANCE); + // The EQUAL depth test is used if the polygon requests it. Note that the NDS doesn't perform + // a true EQUAL test -- there is a set tolerance to it that makes it easier for pixels to + // pass the depth test. + const u32 minDepth = (u32)max(0x00000000, (s32)dstAttributeDepth - DEPTH_EQUALS_TEST_TOLERANCE); const u32 maxDepth = min(0x00FFFFFF, dstAttributeDepth + DEPTH_EQUALS_TEST_TOLERANCE); if (newDepth < minDepth || newDepth > maxDepth) @@ -530,8 +535,21 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c depthFail = true; } } + else if ( (ISFRONTFACING && (dstAttributePolyFacing == PolyFacing_Back)) && (dstColor.a == 0x1F)) + { + // The LEQUAL test is used in the special case where an incoming front-facing polygon's pixel + // is to be drawn on top of a back-facing polygon's opaque pixel. + // + // Test case: The Customize status screen in Sands of Destruction requires this type of depth + // test in order to correctly show the animating characters. + if (newDepth > dstAttributeDepth) + { + depthFail = true; + } + } else { + // The LESS depth test is the default type of depth test for all other conditions. if (newDepth >= dstAttributeDepth) { depthFail = true; @@ -624,13 +642,15 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c dstAttributeIsFogged = (dstAttributeIsFogged && polyAttr.Fog_Enable); } + dstAttributePolyFacing = (ISFRONTFACING) ? PolyFacing_Front : PolyFacing_Back; + //depth writing if (isOpaquePixel || polyAttr.TranslucentDepthWrite_Enable) dstAttributeDepth = newDepth; } //draws a single scanline -template template +template template FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) { int XStart = pLeft->X; @@ -717,7 +737,7 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly while (width-- > 0) { - this->_pixel(polyAttr, isTranslucent, adr, dstColor[adr], color[0], color[1], color[2], u, v, 1.0f/invw, z); + this->_pixel(polyAttr, isTranslucent, adr, dstColor[adr], color[0], color[1], color[2], u, v, 1.0f/invw, z); adr++; x++; @@ -732,7 +752,7 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly } //runs several scanlines, until an edge is finished -template template +template template void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right) { //oh lord, hack city for edge drawing @@ -745,13 +765,13 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const if ( USELINEHACK && (left->Height == 0) && (right->Height == 0) && (left->Y < framebufferHeight) && (left->Y >= 0) ) { const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) ); - if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); + if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); } while (Height--) { const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) ); - if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); + if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); const int xl = left->X; const int xr = right->X; const int y = left->Y; @@ -834,11 +854,16 @@ FORCEINLINE void RasterizerUnit::_rot_verts() //rotate verts until vert0.y is minimum, and then vert0.x is minimum in case of ties //this is a necessary precondition for our shape engine -template template +template template void RasterizerUnit::_sort_verts() { //if the verts are backwards, reorder them first - if (ISBACKWARDS) + // + // At least... that's what the last comment says. But historically, we've + // always been using front-facing polygons the entire time, and so the + // comment should actually read, "if the verts are front-facing, reorder + // them first". So what is the real behavior for this? - rogerman, 2018/08/01 + if (ISFRONTFACING) for (size_t i = 0; i < TYPE/2; i++) swap(this->_verts[i],this->_verts[TYPE-i-1]); @@ -871,21 +896,21 @@ void RasterizerUnit::_sort_verts() //verts must be clockwise. //I didnt reference anything for this algorithm but it seems like I've seen it somewhere before. //Maybe it is like crow's algorithm -template template +template template void RasterizerUnit::_shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type) { bool failure = false; switch (type) { - case 3: this->_sort_verts(); break; - case 4: this->_sort_verts(); break; - case 5: this->_sort_verts(); break; - case 6: this->_sort_verts(); break; - case 7: this->_sort_verts(); break; - case 8: this->_sort_verts(); break; - case 9: this->_sort_verts(); break; - case 10: this->_sort_verts(); break; + case 3: this->_sort_verts(); break; + case 4: this->_sort_verts(); break; + case 5: this->_sort_verts(); break; + case 6: this->_sort_verts(); break; + case 7: this->_sort_verts(); break; + case 8: this->_sort_verts(); break; + case 9: this->_sort_verts(); break; + case 10: this->_sort_verts(); break; default: printf("skipping type %d\n", type); return; } @@ -914,11 +939,11 @@ void RasterizerUnit::_shape_engine(const POLYGON_ATTR polyAttr, const const bool horizontal = (left.Y == right.Y); if (horizontal) { - this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); + this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); } else { - this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); + this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); } //if we ran out of an edge, step to the next one @@ -2153,6 +2178,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo this->_framebufferAttributes->opaquePolyID[iw] = polyIDBuffer[ir]; this->_framebufferAttributes->translucentPolyID[iw] = kUnsetTranslucentPolyID; this->_framebufferAttributes->isTranslucentPoly[iw] = 0; + this->_framebufferAttributes->polyFacing[iw] = PolyFacing_Unwritten; this->_framebufferAttributes->stencil[iw] = 0; } } @@ -2480,6 +2506,7 @@ void SoftRasterizerRenderer_AVX2::LoadClearValues(const FragmentColor &clearColo this->_clearAttrStencil_v256u8 = _mm256_set1_epi8(clearAttributes.stencil); this->_clearAttrIsFogged_v256u8 = _mm256_set1_epi8(clearAttributes.isFogged); this->_clearAttrIsTranslucentPoly_v256u8 = _mm256_set1_epi8(clearAttributes.isTranslucentPoly); + this->_clearAttrPolyFacing_v256u8 = _mm256_set1_epi8(clearAttributes.polyFacing); } void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel) @@ -2501,6 +2528,7 @@ void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPix _mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v256u8); _mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v256u8); _mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v256u8); + _mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->polyFacing + i), this->_clearAttrPolyFacing_v256u8); } } @@ -2515,6 +2543,7 @@ void SoftRasterizerRenderer_SSE2::LoadClearValues(const FragmentColor &clearColo this->_clearAttrStencil_v128u8 = _mm_set1_epi8(clearAttributes.stencil); this->_clearAttrIsFogged_v128u8 = _mm_set1_epi8(clearAttributes.isFogged); this->_clearAttrIsTranslucentPoly_v128u8 = _mm_set1_epi8(clearAttributes.isTranslucentPoly); + this->_clearAttrPolyFacing_v128u8 = _mm_set1_epi8(clearAttributes.polyFacing); } void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel) @@ -2536,6 +2565,7 @@ void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPix _mm_stream_si128((v128u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8); _mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8); _mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8); + _mm_stream_si128((v128u8 *)(this->_framebufferAttributes->polyFacing + i), this->_clearAttrPolyFacing_v128u8); } } @@ -2570,6 +2600,11 @@ void SoftRasterizerRenderer_Altivec::LoadClearValues(const FragmentColor &clearC clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly, clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly, clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly}; + + this->_clearAttrPolyFacing_v128u8 = (v128u8){clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing, + clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing, + clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing, + clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing}; } void SoftRasterizerRenderer_Altivec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel) @@ -2591,6 +2626,7 @@ void SoftRasterizerRenderer_Altivec::ClearUsingValues_Execute(const size_t start vec_st(this->_clearAttrStencil_v128u8, i, this->_framebufferAttributes->stencil); vec_st(this->_clearAttrIsFogged_v128u8, i, this->_framebufferAttributes->isFogged); vec_st(this->_clearAttrIsTranslucentPoly_v128u8, i, this->_framebufferAttributes->isTranslucentPoly); + vec_st(this->_clearAttrPolyFacing_v128u8, i, this->_framebufferAttributes->polyFacing); } } diff --git a/desmume/src/rasterize.h b/desmume/src/rasterize.h index 4234cc2bd..aee1214a3 100644 --- a/desmume/src/rasterize.h +++ b/desmume/src/rasterize.h @@ -111,13 +111,13 @@ protected: FORCEINLINE float _round_s(double val); template FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV); - template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z); - template FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); - template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right); + template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z); + template FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); + template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right); template FORCEINLINE void _rot_verts(); - template void _sort_verts(); - template void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type); + template void _sort_verts(); + template void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type); public: void SetSLI(u32 startLine, u32 endLine, bool debug); @@ -225,6 +225,7 @@ protected: v256u8 _clearAttrStencil_v256u8; v256u8 _clearAttrIsFogged_v256u8; v256u8 _clearAttrIsTranslucentPoly_v256u8; + v256u8 _clearAttrPolyFacing_v256u8; #elif defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC) v128u32 _clearColor_v128u32; v128u32 _clearDepth_v128u32; @@ -233,6 +234,7 @@ protected: v128u8 _clearAttrStencil_v128u8; v128u8 _clearAttrIsFogged_v128u8; v128u8 _clearAttrIsTranslucentPoly_v128u8; + v128u8 _clearAttrPolyFacing_v128u8; #endif virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0; diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index f00ef1588..55254aaa6 100755 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -91,6 +91,7 @@ FragmentAttributesBuffer::FragmentAttributesBuffer(size_t newCount) stencil = (u8 *)malloc_alignedCacheLine(count * sizeof(u8)); isFogged = (u8 *)malloc_alignedCacheLine(count * sizeof(u8)); isTranslucentPoly = (u8 *)malloc_alignedCacheLine(count * sizeof(u8)); + polyFacing = (u8 *)malloc_alignedCacheLine(count * sizeof(u8)); } FragmentAttributesBuffer::~FragmentAttributesBuffer() @@ -101,6 +102,7 @@ FragmentAttributesBuffer::~FragmentAttributesBuffer() free_aligned(stencil); free_aligned(isFogged); free_aligned(isTranslucentPoly); + free_aligned(polyFacing); } void FragmentAttributesBuffer::SetAtIndex(const size_t index, const FragmentAttributes &attr) @@ -111,6 +113,7 @@ void FragmentAttributesBuffer::SetAtIndex(const size_t index, const FragmentAttr this->stencil[index] = attr.stencil; this->isFogged[index] = attr.isFogged; this->isTranslucentPoly[index] = attr.isTranslucentPoly; + this->polyFacing[index] = attr.polyFacing; } Render3DTexture::Render3DTexture(TEXIMAGE_PARAM texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes) @@ -502,6 +505,7 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState) clearFragment.depth = renderState.clearDepth; clearFragment.stencil = 0; clearFragment.isTranslucentPoly = 0; + clearFragment.polyFacing = PolyFacing_Unwritten; clearFragment.isFogged = BIT15(clearColorSwapped); if (renderState.enableClearImage) @@ -693,6 +697,7 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState) clearFragment.depth = renderState.clearDepth; clearFragment.stencil = 0; clearFragment.isTranslucentPoly = 0; + clearFragment.polyFacing = PolyFacing_Unwritten; clearFragment.isFogged = BIT15(renderState.clearColor); if (renderState.enableClearImage) diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index bd9e6031c..af19743a0 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -68,6 +68,13 @@ enum RendererID enum Render3DErrorCode { RENDER3DERROR_NOERR = 0 +}; + +enum PolyFacing +{ + PolyFacing_Unwritten = 0, + PolyFacing_Front = 1, + PolyFacing_Back = 2 }; typedef int Render3DError; @@ -79,7 +86,8 @@ struct FragmentAttributes u8 translucentPolyID; u8 stencil; u8 isFogged; - u8 isTranslucentPoly; + u8 isTranslucentPoly; + u8 polyFacing; }; struct FragmentAttributesBuffer @@ -90,7 +98,8 @@ struct FragmentAttributesBuffer u8 *translucentPolyID; u8 *stencil; u8 *isFogged; - u8 *isTranslucentPoly; + u8 *isTranslucentPoly; + u8 *polyFacing; FragmentAttributesBuffer(size_t newCount); ~FragmentAttributesBuffer();