SoftRasterizer: Framebuffer clears are now accelerated using AVX2 and Altivec.
This commit is contained in:
parent
ab18de05ef
commit
43d3883986
|
@ -616,11 +616,15 @@ public:
|
|||
void SetDeposterizeBuffer(void *dstBuffer, void *workingBuffer);
|
||||
void SetUpscalingBuffer(void *upscaleBuffer);
|
||||
};
|
||||
|
||||
#if defined(ENABLE_SSE2)
|
||||
class OpenGLRenderer : public Render3D_SSE2
|
||||
#else
|
||||
class OpenGLRenderer : public Render3D
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
class OpenGLRenderer : public Render3D_AVX2
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class OpenGLRenderer : public Render3D_SSE2
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
class OpenGLRenderer : public Render3D_Altivec
|
||||
#else
|
||||
class OpenGLRenderer : public Render3D
|
||||
#endif
|
||||
{
|
||||
private:
|
||||
|
|
|
@ -1118,29 +1118,38 @@ static void* SoftRasterizer_RunRenderEdgeMarkAndFog(void *arg)
|
|||
static void* SoftRasterizer_RunClearUsingValues(void *arg)
|
||||
{
|
||||
SoftRasterizerClearParam *param = (SoftRasterizerClearParam *)arg;
|
||||
param->renderer->ClearUsingValuesLoop(param->startPixel, param->endPixel);
|
||||
param->renderer->ClearUsingValues_Execute(param->startPixel, param->endPixel);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static Render3D* SoftRasterizerRendererCreate()
|
||||
{
|
||||
#if defined(ENABLE_SSE2)
|
||||
return new SoftRasterizerRenderer_SSE2;
|
||||
#else
|
||||
return new SoftRasterizerRenderer;
|
||||
{
|
||||
#if defined(ENABLE_AVX2)
|
||||
return new SoftRasterizerRenderer_AVX2;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
return new SoftRasterizerRenderer_SSE2;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
return new SoftRasterizerRenderer_Altivec;
|
||||
#else
|
||||
return new SoftRasterizerRenderer;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void SoftRasterizerRendererDestroy()
|
||||
{
|
||||
if (CurrentRenderer != BaseRenderer)
|
||||
{
|
||||
#if defined(ENABLE_SSE2)
|
||||
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||
#else
|
||||
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
|
||||
#endif
|
||||
{
|
||||
#if defined(ENABLE_AVX2)
|
||||
SoftRasterizerRenderer_AVX2 *oldRenderer = (SoftRasterizerRenderer_AVX2 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
SoftRasterizerRenderer_Altivec *oldRenderer = (SoftRasterizerRenderer_Altivec *)CurrentRenderer;
|
||||
#else
|
||||
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
|
||||
#endif
|
||||
|
||||
CurrentRenderer = BaseRenderer;
|
||||
delete oldRenderer;
|
||||
}
|
||||
|
@ -2136,12 +2145,12 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer::ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel)
|
||||
void SoftRasterizerRenderer::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i++)
|
||||
{
|
||||
this->_framebufferAttributes->SetAtIndex(i, this->_clearAttributes);
|
||||
this->_framebufferColor[i] = this->_clearColor6665;
|
||||
this->_framebufferAttributes->SetAtIndex(i, this->_clearAttributes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2161,7 +2170,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clea
|
|||
}
|
||||
else
|
||||
{
|
||||
this->ClearUsingValuesLoop(0, this->_framebufferPixCount);
|
||||
this->ClearUsingValues_Execute(0, this->_framebufferPixCount);
|
||||
}
|
||||
|
||||
if (doMultithreadedClear)
|
||||
|
@ -2327,11 +2336,12 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
|||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
}
|
||||
|
||||
SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
|
||||
#if defined(ENABLE_AVX2) || defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
|
||||
template <size_t SIMDBYTES>
|
||||
SoftRasterizer_SIMD<SIMDBYTES>::SoftRasterizer_SIMD()
|
||||
{
|
||||
if (_threadCount == 0)
|
||||
{
|
||||
|
@ -2341,7 +2351,7 @@ SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
|
|||
}
|
||||
else
|
||||
{
|
||||
const size_t pixelsPerThread = ((_framebufferSIMDPixCount / 16) / _threadCount) * 16;
|
||||
const size_t pixelsPerThread = ((_framebufferSIMDPixCount / SIMDBYTES) / _threadCount) * SIMDBYTES;
|
||||
|
||||
for (size_t i = 0; i < _threadCount; i++)
|
||||
{
|
||||
|
@ -2352,37 +2362,10 @@ SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
|
|||
}
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer_SSE2::ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel)
|
||||
template <size_t SIMDBYTES>
|
||||
Render3DError SoftRasterizer_SIMD<SIMDBYTES>::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=16)
|
||||
{
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 0), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 4), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 8), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 12), this->_clearColor_v128u32);
|
||||
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 4), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 12), this->_clearDepth_v128u32);
|
||||
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v128u8);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v128u8);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8);
|
||||
}
|
||||
}
|
||||
|
||||
Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color);
|
||||
this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth);
|
||||
this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID);
|
||||
this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID);
|
||||
this->_clearAttrStencil_v128u8 = _mm_set1_epi8(clearAttributes.stencil);
|
||||
this->_clearAttrIsFogged_v128u8 = _mm_set1_epi8(clearAttributes.isFogged);
|
||||
this->_clearAttrIsTranslucentPoly_v128u8 = _mm_set1_epi8(clearAttributes.isTranslucentPoly);
|
||||
this->LoadClearValues(clearColor6665, clearAttributes);
|
||||
|
||||
const bool doMultithreadedClear = (this->_threadCount > 0);
|
||||
|
||||
|
@ -2395,16 +2378,14 @@ Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor
|
|||
}
|
||||
else
|
||||
{
|
||||
this->ClearUsingValuesLoop(0, this->_framebufferSIMDPixCount);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (size_t i = this->_framebufferSIMDPixCount; i < this->_framebufferPixCount; i++)
|
||||
{
|
||||
this->_framebufferColor[i] = clearColor6665;
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
this->ClearUsingValues_Execute(0, this->_framebufferSIMDPixCount);
|
||||
}
|
||||
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
for (size_t i = this->_framebufferSIMDPixCount; i < this->_framebufferPixCount; i++)
|
||||
{
|
||||
this->_framebufferColor[i] = clearColor6665;
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
}
|
||||
|
||||
if (doMultithreadedClear)
|
||||
|
@ -2413,14 +2394,15 @@ Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor
|
|||
{
|
||||
this->_task[threadIndex].finish();
|
||||
}
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h)
|
||||
template <size_t SIMDBYTES>
|
||||
Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
Render3DError error = Render3D_SSE2::SetFramebufferSize(w, h);
|
||||
Render3DError error = Render3D_SIMD<SIMDBYTES>::SetFramebufferSize(w, h);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
|
@ -2444,7 +2426,7 @@ Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h
|
|||
}
|
||||
else
|
||||
{
|
||||
const size_t pixelsPerThread = ((pixCount / 16) / this->_threadCount) * 16;
|
||||
const size_t pixelsPerThread = ((pixCount / SIMDBYTES) / this->_threadCount) * SIMDBYTES;
|
||||
|
||||
this->_customLinesPerThread = h / this->_threadCount;
|
||||
this->_customPixelsPerThread = pixelsPerThread / this->_threadCount;
|
||||
|
@ -2460,6 +2442,113 @@ Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h
|
|||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
|
||||
void SoftRasterizerRenderer_AVX2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.color);
|
||||
this->_clearDepth_v256u32 = _mm256_set1_epi32(clearAttributes.depth);
|
||||
this->_clearAttrOpaquePolyID_v256u8 = _mm256_set1_epi8(clearAttributes.opaquePolyID);
|
||||
this->_clearAttrTranslucentPolyID_v256u8 = _mm256_set1_epi8(clearAttributes.translucentPolyID);
|
||||
this->_clearAttrStencil_v256u8 = _mm256_set1_epi8(clearAttributes.stencil);
|
||||
this->_clearAttrIsFogged_v256u8 = _mm256_set1_epi8(clearAttributes.isFogged);
|
||||
this->_clearAttrIsTranslucentPoly_v256u8 = _mm256_set1_epi8(clearAttributes.isTranslucentPoly);
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=32)
|
||||
{
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 0), this->_clearColor_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 8), this->_clearColor_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 16), this->_clearColor_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 24), this->_clearColor_v256u32);
|
||||
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 16), this->_clearDepth_v256u32);
|
||||
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 24), this->_clearDepth_v256u32);
|
||||
|
||||
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v256u8);
|
||||
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v256u8);
|
||||
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v256u8);
|
||||
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v256u8);
|
||||
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v256u8);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_SSE2)
|
||||
|
||||
void SoftRasterizerRenderer_SSE2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color);
|
||||
this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth);
|
||||
this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID);
|
||||
this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID);
|
||||
this->_clearAttrStencil_v128u8 = _mm_set1_epi8(clearAttributes.stencil);
|
||||
this->_clearAttrIsFogged_v128u8 = _mm_set1_epi8(clearAttributes.isFogged);
|
||||
this->_clearAttrIsTranslucentPoly_v128u8 = _mm_set1_epi8(clearAttributes.isTranslucentPoly);
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=16)
|
||||
{
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 0), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 4), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 8), this->_clearColor_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 12), this->_clearColor_v128u32);
|
||||
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 4), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v128u32);
|
||||
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 12), this->_clearDepth_v128u32);
|
||||
|
||||
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v128u8);
|
||||
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v128u8);
|
||||
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8);
|
||||
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8);
|
||||
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
|
||||
void SoftRasterizerRenderer_Altivec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
this->_clearColor_v128u32 = vec_splat_u32(clearColor6665.color);
|
||||
this->_clearDepth_v128u32 = vec_splat_u32(clearAttributes.depth);
|
||||
this->_clearAttrOpaquePolyID_v128u8 = vec_splat_u8(clearAttributes.opaquePolyID);
|
||||
this->_clearAttrTranslucentPolyID_v128u8 = vec_splat_u8(clearAttributes.translucentPolyID);
|
||||
this->_clearAttrStencil_v128u8 = vec_splat_u8(clearAttributes.stencil);
|
||||
this->_clearAttrIsFogged_v128u8 = vec_splat_u8(clearAttributes.isFogged);
|
||||
this->_clearAttrIsTranslucentPoly_v128u8 = vec_splat_u8(clearAttributes.isTranslucentPoly);
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer_Altivec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=16)
|
||||
{
|
||||
vec_st(this->_clearColor_v128u32, i + 0, this->_framebufferColor);
|
||||
vec_st(this->_clearColor_v128u32, i + 16, this->_framebufferColor);
|
||||
vec_st(this->_clearColor_v128u32, i + 32, this->_framebufferColor);
|
||||
vec_st(this->_clearColor_v128u32, i + 48, this->_framebufferColor);
|
||||
|
||||
vec_st(this->_clearDepth_v128u32, i + 0, this->_framebufferAttributes->depth);
|
||||
vec_st(this->_clearDepth_v128u32, i + 16, this->_framebufferAttributes->depth);
|
||||
vec_st(this->_clearDepth_v128u32, i + 32, this->_framebufferAttributes->depth);
|
||||
vec_st(this->_clearDepth_v128u32, i + 48, this->_framebufferAttributes->depth);
|
||||
|
||||
vec_st(this->_clearAttrOpaquePolyID_v128u8, i, this->_framebufferAttributes->opaquePolyID);
|
||||
vec_st(this->_clearAttrTranslucentPolyID_v128u8, i, this->_framebufferAttributes->translucentPolyID);
|
||||
vec_st(this->_clearAttrStencil_v128u8, i, this->_framebufferAttributes->stencil);
|
||||
vec_st(this->_clearAttrIsFogged_v128u8, i, this->_framebufferAttributes->isFogged);
|
||||
vec_st(this->_clearAttrIsTranslucentPoly_v128u8, i, this->_framebufferAttributes->isTranslucentPoly);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
#endif
|
||||
|
|
|
@ -125,8 +125,12 @@ public:
|
|||
template<bool SLI, bool USELINEHACK> FORCEINLINE void Render();
|
||||
};
|
||||
|
||||
#if defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||
#if defined(ENABLE_AVX2)
|
||||
class SoftRasterizerRenderer : public Render3D_AVX2
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
class SoftRasterizerRenderer : public Render3D_Altivec
|
||||
#else
|
||||
class SoftRasterizerRenderer : public Render3D
|
||||
#endif
|
||||
|
@ -205,15 +209,23 @@ public:
|
|||
virtual Render3DError Render(const GFX3D &engine);
|
||||
virtual Render3DError RenderFinish();
|
||||
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
|
||||
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
|
||||
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
|
||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||
};
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
|
||||
};
|
||||
|
||||
template <size_t SIMDBYTES>
|
||||
class SoftRasterizer_SIMD : public SoftRasterizerRenderer
|
||||
{
|
||||
protected:
|
||||
#if defined(ENABLE_AVX2)
|
||||
v256u32 _clearColor_v256u32;
|
||||
v256u32 _clearDepth_v256u32;
|
||||
v256u8 _clearAttrOpaquePolyID_v256u8;
|
||||
v256u8 _clearAttrTranslucentPolyID_v256u8;
|
||||
v256u8 _clearAttrStencil_v256u8;
|
||||
v256u8 _clearAttrIsFogged_v256u8;
|
||||
v256u8 _clearAttrIsTranslucentPoly_v256u8;
|
||||
#elif defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
v128u32 _clearColor_v128u32;
|
||||
v128u32 _clearDepth_v128u32;
|
||||
v128u8 _clearAttrOpaquePolyID_v128u8;
|
||||
|
@ -221,14 +233,45 @@ protected:
|
|||
v128u8 _clearAttrStencil_v128u8;
|
||||
v128u8 _clearAttrIsFogged_v128u8;
|
||||
v128u8 _clearAttrIsTranslucentPoly_v128u8;
|
||||
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
#endif
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
SoftRasterizerRenderer_SSE2();
|
||||
SoftRasterizer_SIMD();
|
||||
|
||||
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
|
||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||
};
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
class SoftRasterizerRenderer_AVX2 : public SoftRasterizer_SIMD<32>
|
||||
{
|
||||
protected:
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizer_SIMD<16>
|
||||
{
|
||||
protected:
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
class SoftRasterizerRenderer_Altivec : public SoftRasterizer_SIMD<16>
|
||||
{
|
||||
protected:
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -113,43 +113,6 @@ void FragmentAttributesBuffer::SetAtIndex(const size_t index, const FragmentAttr
|
|||
this->isTranslucentPoly[index] = attr.isTranslucentPoly;
|
||||
}
|
||||
|
||||
void FragmentAttributesBuffer::SetAll(const FragmentAttributes &attr)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const __m128i attrDepth_vec128 = _mm_set1_epi32(attr.depth);
|
||||
const __m128i attrOpaquePolyID_vec128 = _mm_set1_epi8(attr.opaquePolyID);
|
||||
const __m128i attrTranslucentPolyID_vec128 = _mm_set1_epi8(attr.translucentPolyID);
|
||||
const __m128i attrStencil_vec128 = _mm_set1_epi8(attr.stencil);
|
||||
const __m128i attrIsFogged_vec128 = _mm_set1_epi8(attr.isFogged);
|
||||
const __m128i attrIsTranslucentPoly_vec128 = _mm_set1_epi8(attr.isTranslucentPoly);
|
||||
|
||||
const size_t sseCount = count - (count % 16);
|
||||
for (; i < sseCount; i += 16)
|
||||
{
|
||||
_mm_stream_si128((__m128i *)(this->depth + 0), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 4), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 8), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 12), attrDepth_vec128);
|
||||
|
||||
_mm_stream_si128((__m128i *)this->opaquePolyID, attrOpaquePolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)this->translucentPolyID, attrTranslucentPolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)this->stencil, attrStencil_vec128);
|
||||
_mm_stream_si128((__m128i *)this->isFogged, attrIsFogged_vec128);
|
||||
_mm_stream_si128((__m128i *)this->isTranslucentPoly, attrIsTranslucentPoly_vec128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < count; i++)
|
||||
{
|
||||
this->SetAtIndex(i, attr);
|
||||
}
|
||||
}
|
||||
|
||||
Render3DTexture::Render3DTexture(TEXIMAGE_PARAM texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes)
|
||||
{
|
||||
_isSamplingEnabled = true;
|
||||
|
@ -688,12 +651,14 @@ Render3DError Render3D::VramReconfigureSignal()
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3D_SIMD128::Render3D_SIMD128()
|
||||
template <size_t SIMDBYTES>
|
||||
Render3D_SIMD<SIMDBYTES>::Render3D_SIMD()
|
||||
{
|
||||
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 16);
|
||||
_framebufferSIMDPixCount = (SIMDBYTES > 0) ? _framebufferPixCount - (_framebufferPixCount % SIMDBYTES) : _framebufferPixCount;
|
||||
}
|
||||
|
||||
Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
|
||||
template <size_t SIMDBYTES>
|
||||
Render3DError Render3D_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
|
@ -701,32 +666,18 @@ Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 16);
|
||||
this->_framebufferSIMDPixCount = (SIMDBYTES > 0) ? this->_framebufferPixCount - (this->_framebufferPixCount % SIMDBYTES) : _framebufferPixCount;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
Render3D_SIMD256::Render3D_SIMD256()
|
||||
{
|
||||
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 32);
|
||||
}
|
||||
|
||||
Render3DError Render3D_SIMD256::SetFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 32);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#if defined(ENABLE_AVX2) || defined(ENABLE_SSE2)
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
Render3DError Render3D_AVX2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
#elif defined(ENABLE_SSE2)
|
||||
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
#endif
|
||||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
||||
|
@ -910,4 +861,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
return error;
|
||||
}
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
#endif // defined(ENABLE_AVX2) || defined(ENABLE_SSE2)
|
||||
|
||||
template Render3D_SIMD<0>::Render3D_SIMD();
|
||||
template Render3D_SIMD<16>::Render3D_SIMD();
|
||||
template Render3D_SIMD<32>::Render3D_SIMD();
|
||||
|
|
|
@ -96,7 +96,6 @@ struct FragmentAttributesBuffer
|
|||
~FragmentAttributesBuffer();
|
||||
|
||||
void SetAtIndex(const size_t index, const FragmentAttributes &attr);
|
||||
void SetAll(const FragmentAttributes &attr);
|
||||
};
|
||||
|
||||
struct Render3DDeviceInfo
|
||||
|
@ -248,29 +247,35 @@ public:
|
|||
Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const;
|
||||
};
|
||||
|
||||
class Render3D_SIMD128 : public Render3D
|
||||
template <size_t SIMDBYTES>
|
||||
class Render3D_SIMD : public Render3D
|
||||
{
|
||||
public:
|
||||
Render3D_SIMD128();
|
||||
|
||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||
};
|
||||
|
||||
class Render3D_SIMD256 : public Render3D
|
||||
{
|
||||
public:
|
||||
Render3D_SIMD256();
|
||||
Render3D_SIMD();
|
||||
|
||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||
};
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
|
||||
class Render3D_AVX2 : public Render3D_SIMD<32>
|
||||
{
|
||||
public:
|
||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_SSE2)
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
class Render3D_SSE2 : public Render3D_SIMD128
|
||||
class Render3D_SSE2 : public Render3D_SIMD<16>
|
||||
{
|
||||
public:
|
||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||
};
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
|
||||
class Render3D_Altivec : public Render3D_SIMD<16>
|
||||
{};
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue