From 21f04c9ef22ac345cf3b2ed5e9054cd1e96a32fc Mon Sep 17 00:00:00 2001 From: rogerman Date: Fri, 23 Nov 2018 14:59:13 -0800 Subject: [PATCH] SoftRasterizer: Do some minor improvements to both performance and code size. --- desmume/src/rasterize.cpp | 378 +++++++++++++++++++++++++++++++++----- desmume/src/rasterize.h | 329 +++++++++++++++++---------------- 2 files changed, 496 insertions(+), 211 deletions(-) diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 18abe1ce7..7236cbbe8 100755 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -498,9 +498,9 @@ FORCEINLINE void RasterizerUnit::_shade(const PolygonMode polygonMode, } template template -FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z) +FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w) { - FragmentColor srcColor; + FragmentColor newDstColor32; FragmentColor shaderOutput; bool isOpaquePixel; @@ -604,13 +604,13 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c //this is a HACK: //we are being very sloppy with our interpolation precision right now //and rather than fix it, i just want to clamp it - srcColor = MakeFragmentColor(max(0x00, min(0x3F,u32floor(r))), - max(0x00, min(0x3F,u32floor(g))), - max(0x00, min(0x3F,u32floor(b))), - polyAttr.Alpha); + newDstColor32 = MakeFragmentColor(max(0x00, min(0x3F, u32floor(r))), + max(0x00, min(0x3F, u32floor(g))), + max(0x00, min(0x3F, u32floor(b))), + polyAttr.Alpha); //pixel shader - this->_shade((PolygonMode)polyAttr.Mode, srcColor, shaderOutput, invu * w, invv * w); + this->_shade((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); // handle alpha test if ( shaderOutput.a == 0 || @@ -656,7 +656,7 @@ FORCEINLINE void RasterizerUnit::_pixel(const POLYGON_ATTR polyAttr, c template template FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) { - int XStart = pLeft->X; + const int XStart = pLeft->X; int width = pRight->X - XStart; // HACK: workaround for vertical/slant line poly @@ -672,26 +672,36 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly } //these are the starting values, taken from the left edge - float invw = pLeft->invw.curr; - float u = pLeft->u.curr; - float v = pLeft->v.curr; - float z = pLeft->z.curr; + CACHE_ALIGN float coord[4] = { + pLeft->u.curr, + pLeft->v.curr, + pLeft->z.curr, + pLeft->invw.curr + }; - float color[3] = { + CACHE_ALIGN float color[4] = { pLeft->color[0].curr, pLeft->color[1].curr, - pLeft->color[2].curr }; + pLeft->color[2].curr, + (float)polyAttr.Alpha / 31.0f + }; //our dx values are taken from the steps up until the right edge - float invWidth = 1.0f / width; - float dinvw_dx = (pRight->invw.curr - invw) * invWidth; - float du_dx = (pRight->u.curr - u) * invWidth; - float dv_dx = (pRight->v.curr - v) * invWidth; - float dz_dx = (pRight->z.curr - z) * invWidth; - float dc_dx[3] = { + const float invWidth = 1.0f / (float)width; + + const CACHE_ALIGN float coord_dx[4] = { + (pRight->u.curr - coord[0]) * invWidth, + (pRight->v.curr - coord[1]) * invWidth, + (pRight->z.curr - coord[2]) * invWidth, + (pRight->invw.curr - coord[3]) * invWidth + }; + + const CACHE_ALIGN float color_dx[4] = { (pRight->color[0].curr - color[0]) * invWidth, (pRight->color[1].curr - color[1]) * invWidth, - (pRight->color[2].curr - color[2]) * invWidth }; + (pRight->color[2].curr - color[2]) * invWidth, + 0.0f * invWidth + }; size_t adr = (pLeft->Y*framebufferWidth)+XStart; @@ -717,13 +727,19 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly printf("rasterizer rendering at x=%d! oops!\n",x); return; } - invw += dinvw_dx * -x; - u += du_dx * -x; - v += dv_dx * -x; - z += dz_dx * -x; - color[0] += dc_dx[0] * -x; - color[1] += dc_dx[1] * -x; - color[2] += dc_dx[2] * -x; + + const float negativeX = (float)-x; + + coord[0] += coord_dx[0] * negativeX; + coord[1] += coord_dx[1] * negativeX; + coord[2] += coord_dx[2] * negativeX; + coord[3] += coord_dx[3] * negativeX; + + color[0] += color_dx[0] * negativeX; + color[1] += color_dx[1] * negativeX; + color[2] += color_dx[2] * negativeX; + color[3] += color_dx[3] * negativeX; + adr += -x; width -= -x; x = 0; @@ -740,23 +756,275 @@ FORCEINLINE void RasterizerUnit::_drawscanline(const POLYGON_ATTR poly while (width-- > 0) { - this->_pixel(polyAttr, isTranslucent, adr, dstColor[adr], color[0], color[1], color[2], u, v, 1.0f/invw, z); + this->_pixel(polyAttr, isTranslucent, adr, dstColor[adr], color[0], color[1], color[2], coord[0], coord[1], coord[2], 1.0f/coord[3]); adr++; x++; - invw += dinvw_dx; - u += du_dx; - v += dv_dx; - z += dz_dx; - color[0] += dc_dx[0]; - color[1] += dc_dx[1]; - color[2] += dc_dx[2]; + coord[0] += coord_dx[0]; + coord[1] += coord_dx[1]; + coord[2] += coord_dx[2]; + coord[3] += coord_dx[3]; + + color[0] += color_dx[0]; + color[1] += color_dx[1]; + color[2] += color_dx[2]; + color[3] += color_dx[3]; } } +#ifdef ENABLE_SSE2 + +template template +FORCEINLINE void RasterizerUnit::_pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w) +{ + FragmentColor newDstColor32; + FragmentColor shaderOutput; + bool isOpaquePixel; + + u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex]; + u8 &dstAttributeOpaquePolyID = this->_softRender->_framebufferAttributes->opaquePolyID[fragmentIndex]; + u8 &dstAttributeTranslucentPolyID = this->_softRender->_framebufferAttributes->translucentPolyID[fragmentIndex]; + u8 &dstAttributeStencil = this->_softRender->_framebufferAttributes->stencil[fragmentIndex]; + u8 &dstAttributeIsFogged = this->_softRender->_framebufferAttributes->isFogged[fragmentIndex]; + u8 &dstAttributeIsTranslucentPoly = this->_softRender->_framebufferAttributes->isTranslucentPoly[fragmentIndex]; + u8 &dstAttributePolyFacing = this->_softRender->_framebufferAttributes->polyFacing[fragmentIndex]; + + // not sure about the w-buffer depth value: this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64 + // + // When using z-depth, be sure to test against the following test cases: + // - The drawing of the overworld map in Dragon Quest IV + // - The drawing of all units on the map in Advance Wars: Days of Ruin + + // Note that an IEEE-754 single-precision float uses a 23-bit significand. Therefore, we will multiply the + // Z-depth by a 22-bit significand for safety. + const u32 newDepth = (gfx3d.renderState.wbuffer) ? u32floor(w * 4096.0f) : u32floor(z * 4194303.0f) << 2; + + // run the depth test + bool depthFail = false; + + if (polyAttr.DepthEqualTest_Enable) + { + // The EQUAL depth test is used if the polygon requests it. Note that the NDS doesn't perform + // a true EQUAL test -- there is a set tolerance to it that makes it easier for pixels to + // pass the depth test. + const u32 minDepth = (u32)max(0x00000000, (s32)dstAttributeDepth - DEPTH_EQUALS_TEST_TOLERANCE); + const u32 maxDepth = min(0x00FFFFFF, dstAttributeDepth + DEPTH_EQUALS_TEST_TOLERANCE); + + if (newDepth < minDepth || newDepth > maxDepth) + { + depthFail = true; + } + } + else if ( (ISFRONTFACING && (dstAttributePolyFacing == PolyFacing_Back)) && (dstColor.a == 0x1F)) + { + // The LEQUAL test is used in the special case where an incoming front-facing polygon's pixel + // is to be drawn on top of a back-facing polygon's opaque pixel. + // + // Test case: The Customize status screen in Sands of Destruction requires this type of depth + // test in order to correctly show the animating characters. + if (newDepth > dstAttributeDepth) + { + depthFail = true; + } + } + else + { + // The LESS depth test is the default type of depth test for all other conditions. + if (newDepth >= dstAttributeDepth) + { + depthFail = true; + } + } + + if (depthFail) + { + //shadow mask polygons set stencil bit here + if (ISSHADOWPOLYGON && polyAttr.PolygonID == 0) + dstAttributeStencil=1; + return; + } + + //handle shadow polys + if (ISSHADOWPOLYGON) + { + if (polyAttr.PolygonID == 0) + { + //shadow mask polygons only affect the stencil buffer, and then only when they fail depth test + //if we made it here, the shadow mask polygon fragment needs to be trashed + return; + } + else + { + //shadow color polygon conditions + if (dstAttributeStencil == 0) + { + //draw only where stencil bit is set + return; + } + if (dstAttributeOpaquePolyID == polyAttr.PolygonID) + { + //draw only when polygon ID differs + //TODO: are we using the right dst polyID? + return; + } + + //once drawn, stencil bit is always cleared + dstAttributeStencil = 0; + } + } + + //perspective-correct the colors + const __m128 perspective = _mm_set_ps(31.0f, w, w, w); + __m128 newColorf = _mm_add_ps( _mm_mul_ps(srcColorf, perspective), _mm_set1_ps(0.5f) ); + newColorf = _mm_max_ps(newColorf, _mm_setzero_ps()); + + __m128i cvtColor32 = _mm_cvtps_epi32(newColorf); + cvtColor32 = _mm_min_epu8(cvtColor32, _mm_set_epi32(0x1F, 0x3F, 0x3F, 0x3F)); + cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); + cvtColor32 = _mm_packus_epi16(cvtColor32, _mm_setzero_si128()); + + newDstColor32.color = _mm_cvtsi128_si32(cvtColor32); + + //pixel shader + this->_shade((PolygonMode)polyAttr.Mode, newDstColor32, shaderOutput, invu * w, invv * w); + + // handle alpha test + if ( shaderOutput.a == 0 || (this->_softRender->currentRenderState->enableAlphaTest && shaderOutput.a < this->_softRender->currentRenderState->alphaTestRef) ) + { + return; + } + + // write pixel values to the framebuffer + isOpaquePixel = (shaderOutput.a == 0x1F); + if (isOpaquePixel) + { + dstAttributeOpaquePolyID = polyAttr.PolygonID; + dstAttributeIsTranslucentPoly = isTranslucent; + dstAttributeIsFogged = polyAttr.Fog_Enable; + dstColor = shaderOutput; + } + else + { + //dont overwrite pixels on translucent polys with the same polyids + if (dstAttributeTranslucentPolyID == polyAttr.PolygonID) + return; + + //originally we were using a test case of shadows-behind-trees in sm64ds + //but, it looks bad in that game. this is actually correct + //if this isnt correct, then complex shape cart shadows in mario kart don't work right + dstAttributeTranslucentPolyID = polyAttr.PolygonID; + + //alpha blending and write color + alphaBlend(dstColor, shaderOutput); + + dstAttributeIsFogged = (dstAttributeIsFogged && polyAttr.Fog_Enable); + } + + dstAttributePolyFacing = (ISFRONTFACING) ? PolyFacing_Front : PolyFacing_Back; + + //depth writing + if (isOpaquePixel || polyAttr.TranslucentDepthWrite_Enable) + dstAttributeDepth = newDepth; +} + +//draws a single scanline +template template +FORCEINLINE void RasterizerUnit::_drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight) +{ + const int XStart = pLeft->X; + int width = pRight->X - XStart; + + // HACK: workaround for vertical/slant line poly + if (USELINEHACK && width == 0) + { + int leftWidth = pLeft->XStep; + if (pLeft->ErrorTerm + pLeft->Numerator >= pLeft->Denominator) + leftWidth++; + int rightWidth = pRight->XStep; + if (pRight->ErrorTerm + pRight->Numerator >= pRight->Denominator) + rightWidth++; + width = max(1, max(abs(leftWidth), abs(rightWidth))); + } + + //these are the starting values, taken from the left edge + __m128 coord = _mm_setr_ps(pLeft->u.curr, + pLeft->v.curr, + pLeft->z.curr, + pLeft->invw.curr); + + __m128 color = _mm_setr_ps(pLeft->color[0].curr, + pLeft->color[1].curr, + pLeft->color[2].curr, + (float)polyAttr.Alpha / 31.0f); + + //our dx values are taken from the steps up until the right edge + const __m128 invWidth = _mm_set1_ps(1.0f / (float)width); + const __m128 coord_dx = _mm_mul_ps(_mm_setr_ps(pRight->u.curr - coord[0], pRight->v.curr - coord[1], pRight->z.curr - coord[2], pRight->invw.curr - coord[3]), invWidth); + const __m128 color_dx = _mm_mul_ps(_mm_setr_ps(pRight->color[0].curr - color[0], pRight->color[1].curr - color[1], pRight->color[2].curr - color[2], 0.0f), invWidth); + + size_t adr = (pLeft->Y*framebufferWidth)+XStart; + + //CONSIDER: in case some other math is wrong (shouldve been clipped OK), we might go out of bounds here. + //better check the Y value. + if (RENDERER && (pLeft->Y < 0 || pLeft->Y > (framebufferHeight - 1))) + { + printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y); + return; + } + if (!RENDERER && (pLeft->Y < 0 || pLeft->Y >= framebufferHeight)) + { + printf("rasterizer rendering at y=%d! oops!\n",pLeft->Y); + return; + } + + int x = XStart; + + if (x < 0) + { + if (RENDERER && !USELINEHACK) + { + printf("rasterizer rendering at x=%d! oops!\n",x); + return; + } + + const __m128 negativeX = _mm_cvtepi32_ps(_mm_set1_epi32(-x)); + coord = _mm_add_ps(coord, _mm_mul_ps(coord_dx, negativeX)); + color = _mm_add_ps(color, _mm_mul_ps(color_dx, negativeX)); + + adr += -x; + width -= -x; + x = 0; + } + if (x+width > framebufferWidth) + { + if (RENDERER && !USELINEHACK && framebufferWidth == GPU_FRAMEBUFFER_NATIVE_WIDTH) + { + printf("rasterizer rendering at x=%d! oops!\n",x+width-1); + return; + } + width = framebufferWidth - x; + } + + CACHE_ALIGN float coord_s[4]; + + while (width-- > 0) + { + _mm_store_ps(coord_s, coord); + + this->_pixel_SSE2(polyAttr, isTranslucent, adr, dstColor[adr], color, coord_s[0], coord_s[1], coord_s[2], 1.0f/coord_s[3]); + adr++; + x++; + + coord = _mm_add_ps(coord, coord_dx); + color = _mm_add_ps(color, color_dx); + } +} + +#endif // ENABLE_SSE2 + //runs several scanlines, until an edge is finished -template template -void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right) +template template +void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right) { //oh lord, hack city for edge drawing @@ -768,13 +1036,28 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const if ( USELINEHACK && (left->Height == 0) && (right->Height == 0) && (left->Y < framebufferHeight) && (left->Y >= 0) ) { const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) ); - if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); + if (draw) + { +#ifdef ENABLE_SSE2 + this->_drawscanline_SSE2(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); +#else + this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); +#endif + } } while (Height--) { const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) ); - if (draw) this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); + if (draw) + { +#ifdef ENABLE_SSE2 + this->_drawscanline_SSE2(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); +#else + this->_drawscanline(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right); +#endif + } + const int xl = left->X; const int xr = right->X; const int y = left->Y; @@ -784,8 +1067,8 @@ void RasterizerUnit::_runscanlines(const POLYGON_ATTR polyAttr, const if (!RENDERER && _debug_thisPoly) { //debug drawing - bool top = (ISHORIZONTAL && first); - bool bottom = (!Height && ISHORIZONTAL); + bool top = (isHorizontal && first); + bool bottom = (!Height && isHorizontal); if (Height || top || bottom) { if (draw) @@ -939,15 +1222,8 @@ void RasterizerUnit::_shape_engine(const POLYGON_ATTR polyAttr, const if (failure) return; - const bool horizontal = (left.Y == right.Y); - if (horizontal) - { - this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); - } - else - { - this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, &left, &right); - } + const bool isHorizontal = (left.Y == right.Y); + this->_runscanlines(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, isHorizontal, &left, &right); //if we ran out of an edge, step to the next one if (right.Height == 0) diff --git a/desmume/src/rasterize.h b/desmume/src/rasterize.h index aee1214a3..98dc726bc 100644 --- a/desmume/src/rasterize.h +++ b/desmume/src/rasterize.h @@ -1,31 +1,35 @@ -/* - Copyright (C) 2009-2018 DeSmuME team - - This file is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This file is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the this software. If not, see . -*/ - -#ifndef _RASTERIZE_H_ -#define _RASTERIZE_H_ - -#include "render3D.h" -#include "gfx3d.h" +/* + Copyright (C) 2009-2018 DeSmuME team -#define SOFTRASTERIZER_MAX_THREADS 32 - -extern GPU3DInterface gpu3DRasterize; + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. -class Task; + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the this software. If not, see . +*/ + +#ifndef _RASTERIZE_H_ +#define _RASTERIZE_H_ + +#include "render3D.h" +#include "gfx3d.h" + +#ifdef ENABLE_SSE2 +#include +#endif + +#define SOFTRASTERIZER_MAX_THREADS 32 + +extern GPU3DInterface gpu3DRasterize; + +class Task; class SoftRasterizerRenderer; struct edge_fx_fl; @@ -34,63 +38,63 @@ struct SoftRasterizerClearParam SoftRasterizerRenderer *renderer; size_t startPixel; size_t endPixel; -}; - -struct SoftRasterizerPostProcessParams -{ - SoftRasterizerRenderer *renderer; - size_t startLine; - size_t endLine; - bool enableEdgeMarking; - bool enableFog; - u32 fogColor; - bool fogAlphaOnly; -}; - -class SoftRasterizerTexture : public Render3DTexture -{ -private: - void _clamp(s32 &val, const int size, const s32 sizemask) const; - void _hclamp(s32 &val) const; - void _vclamp(s32 &val) const; - void _repeat(s32 &val, const int size, const s32 sizemask) const; - void _hrepeat(s32 &val) const; - void _vrepeat(s32 &val) const; - void _flip(s32 &val, const int size, const s32 sizemask) const; - void _hflip(s32 &val) const; - void _vflip(s32 &val) const; - -protected: - u32 *_unpackData; - u32 *_customBuffer; - - u32 *_renderData; - s32 _renderWidth; - s32 _renderHeight; - s32 _renderWidthMask; - s32 _renderHeightMask; - u32 _renderWidthShift; - -public: - SoftRasterizerTexture(TEXIMAGE_PARAM texAttributes, u32 palAttributes); - virtual ~SoftRasterizerTexture(); - - virtual void Load(); - - u32* GetUnpackData(); - - u32* GetRenderData(); - s32 GetRenderWidth() const; - s32 GetRenderHeight() const; - s32 GetRenderWidthMask() const; - s32 GetRenderHeightMask() const; - u32 GetRenderWidthShift() const; - - void GetRenderSamplerCoordinates(const u8 wrapMode, s32 &iu, s32 &iv) const; - - void SetUseDeposterize(bool willDeposterize); - void SetScalingFactor(size_t scalingFactor); -}; +}; + +struct SoftRasterizerPostProcessParams +{ + SoftRasterizerRenderer *renderer; + size_t startLine; + size_t endLine; + bool enableEdgeMarking; + bool enableFog; + u32 fogColor; + bool fogAlphaOnly; +}; + +class SoftRasterizerTexture : public Render3DTexture +{ +private: + void _clamp(s32 &val, const int size, const s32 sizemask) const; + void _hclamp(s32 &val) const; + void _vclamp(s32 &val) const; + void _repeat(s32 &val, const int size, const s32 sizemask) const; + void _hrepeat(s32 &val) const; + void _vrepeat(s32 &val) const; + void _flip(s32 &val, const int size, const s32 sizemask) const; + void _hflip(s32 &val) const; + void _vflip(s32 &val) const; + +protected: + u32 *_unpackData; + u32 *_customBuffer; + + u32 *_renderData; + s32 _renderWidth; + s32 _renderHeight; + s32 _renderWidthMask; + s32 _renderHeightMask; + u32 _renderWidthShift; + +public: + SoftRasterizerTexture(TEXIMAGE_PARAM texAttributes, u32 palAttributes); + virtual ~SoftRasterizerTexture(); + + virtual void Load(); + + u32* GetUnpackData(); + + u32* GetRenderData(); + s32 GetRenderWidth() const; + s32 GetRenderHeight() const; + s32 GetRenderWidthMask() const; + s32 GetRenderHeightMask() const; + u32 GetRenderWidthShift() const; + + void GetRenderSamplerCoordinates(const u8 wrapMode, s32 &iu, s32 &iv) const; + + void SetUseDeposterize(bool willDeposterize); + void SetScalingFactor(size_t scalingFactor); +}; template class RasterizerUnit @@ -111,9 +115,14 @@ protected: FORCEINLINE float _round_s(double val); template FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV); - template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z); + template FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float z, float w); template FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); - template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right); + template void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, const bool isHorizontal, edge_fx_fl *left, edge_fx_fl *right); + +#ifdef ENABLE_SSE2 + template FORCEINLINE void _pixel_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, const __m128 &srcColorf, float invu, float invv, float z, float w); + template FORCEINLINE void _drawscanline_SSE2(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight); +#endif template FORCEINLINE void _rot_verts(); template void _sort_verts(); @@ -124,17 +133,17 @@ public: void SetRenderer(SoftRasterizerRenderer *theRenderer); template FORCEINLINE void Render(); }; - + #if defined(ENABLE_AVX2) class SoftRasterizerRenderer : public Render3D_AVX2 -#elif defined(ENABLE_SSE2) +#elif defined(ENABLE_SSE2) class SoftRasterizerRenderer : public Render3D_SSE2 #elif defined(ENABLE_ALTIVEC) -class SoftRasterizerRenderer : public Render3D_Altivec -#else -class SoftRasterizerRenderer : public Render3D -#endif -{ +class SoftRasterizerRenderer : public Render3D_Altivec +#else +class SoftRasterizerRenderer : public Render3D +#endif +{ protected: Task *_task; SoftRasterizerClearParam _threadClearParam[SOFTRASTERIZER_MAX_THREADS]; @@ -151,66 +160,66 @@ protected: FragmentColor _clearColor6665; FragmentAttributes _clearAttributes; - - GFX3D_Clipper clipper; - u8 fogTable[32768]; - FragmentColor edgeMarkTable[8]; - bool edgeMarkDisabled[8]; - - bool _renderGeometryNeedsFinish; - - bool _enableHighPrecisionColorInterpolation; - bool _enableLineHack; - - // SoftRasterizer-specific methods - virtual Render3DError InitTables(); - - template size_t performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); - - // Base rendering methods - virtual Render3DError BeginRender(const GFX3D &engine); - virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList); - virtual Render3DError RenderEdgeMarking(const u16 *colorTable, const bool useAntialias); - virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly); - virtual Render3DError EndRender(const u64 frameCount); - - virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer); - virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); - -public: - int _debug_drawClippedUserPoly; - size_t _clippedPolyCount; - FragmentColor toonColor32LUT[32]; - GFX3D_Clipper::TClippedPoly *clippedPolys; - FragmentAttributesBuffer *_framebufferAttributes; - bool polyVisible[POLYLIST_SIZE]; - bool polyBackfacing[POLYLIST_SIZE]; - GFX3D_State *currentRenderState; - - bool _enableFragmentSamplingHack; - - SoftRasterizerRenderer(); - virtual ~SoftRasterizerRenderer(); - - void performViewportTransforms(); - void performBackfaceTests(); - void performCoordAdjustment(); - void GetAndLoadAllTextures(); - Render3DError UpdateEdgeMarkColorTable(const u16 *edgeMarkColorTable); - Render3DError UpdateFogTable(const u8 *fogDensityTable); - Render3DError RenderEdgeMarkingAndFog(const SoftRasterizerPostProcessParams ¶m); - - SoftRasterizerTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing); - - // Base rendering methods - virtual Render3DError UpdateToonTable(const u16 *toonTableBuffer); - virtual Render3DError Reset(); - virtual Render3DError ApplyRenderingSettings(const GFX3D_State &renderState); - virtual Render3DError Render(const GFX3D &engine); - virtual Render3DError RenderFinish(); + + GFX3D_Clipper clipper; + u8 fogTable[32768]; + FragmentColor edgeMarkTable[8]; + bool edgeMarkDisabled[8]; + + bool _renderGeometryNeedsFinish; + + bool _enableHighPrecisionColorInterpolation; + bool _enableLineHack; + + // SoftRasterizer-specific methods + virtual Render3DError InitTables(); + + template size_t performClipping(const VERT *vertList, const POLYLIST *polyList, const INDEXLIST *indexList); + + // Base rendering methods + virtual Render3DError BeginRender(const GFX3D &engine); + virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList); + virtual Render3DError RenderEdgeMarking(const u16 *colorTable, const bool useAntialias); + virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly); + virtual Render3DError EndRender(const u64 frameCount); + + virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer); + virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + +public: + int _debug_drawClippedUserPoly; + size_t _clippedPolyCount; + FragmentColor toonColor32LUT[32]; + GFX3D_Clipper::TClippedPoly *clippedPolys; + FragmentAttributesBuffer *_framebufferAttributes; + bool polyVisible[POLYLIST_SIZE]; + bool polyBackfacing[POLYLIST_SIZE]; + GFX3D_State *currentRenderState; + + bool _enableFragmentSamplingHack; + + SoftRasterizerRenderer(); + virtual ~SoftRasterizerRenderer(); + + void performViewportTransforms(); + void performBackfaceTests(); + void performCoordAdjustment(); + void GetAndLoadAllTextures(); + Render3DError UpdateEdgeMarkColorTable(const u16 *edgeMarkColorTable); + Render3DError UpdateFogTable(const u8 *fogDensityTable); + Render3DError RenderEdgeMarkingAndFog(const SoftRasterizerPostProcessParams ¶m); + + SoftRasterizerTexture* GetLoadedTextureFromPolygon(const POLY &thePoly, bool enableTexturing); + + // Base rendering methods + virtual Render3DError UpdateToonTable(const u16 *toonTableBuffer); + virtual Render3DError Reset(); + virtual Render3DError ApplyRenderingSettings(const GFX3D_State &renderState); + virtual Render3DError Render(const GFX3D &engine); + virtual Render3DError RenderFinish(); virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16); - virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); - virtual Render3DError SetFramebufferSize(size_t w, size_t h); + virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); + virtual Render3DError SetFramebufferSize(size_t w, size_t h); }; template @@ -255,12 +264,12 @@ protected: public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); }; - -#elif defined(ENABLE_SSE2) -class SoftRasterizerRenderer_SSE2 : public SoftRasterizer_SIMD<16> + +#elif defined(ENABLE_SSE2) +class SoftRasterizerRenderer_SSE2 : public SoftRasterizer_SIMD<16> { protected: - virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); + virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); @@ -274,8 +283,8 @@ protected: public: virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel); -}; - -#endif - -#endif // _RASTERIZE_H_ +}; + +#endif + +#endif // _RASTERIZE_H_