SoftRasterizer: Framebuffer clears are now accelerated using AVX2 and Altivec.

This commit is contained in:
rogerman 2018-02-12 18:03:52 -08:00
parent ab18de05ef
commit 43d3883986
5 changed files with 256 additions and 160 deletions

View File

@ -616,11 +616,15 @@ public:
void SetDeposterizeBuffer(void *dstBuffer, void *workingBuffer);
void SetUpscalingBuffer(void *upscaleBuffer);
};
#if defined(ENABLE_SSE2)
class OpenGLRenderer : public Render3D_SSE2
#else
class OpenGLRenderer : public Render3D
#if defined(ENABLE_AVX2)
class OpenGLRenderer : public Render3D_AVX2
#elif defined(ENABLE_SSE2)
class OpenGLRenderer : public Render3D_SSE2
#elif defined(ENABLE_ALTIVEC)
class OpenGLRenderer : public Render3D_Altivec
#else
class OpenGLRenderer : public Render3D
#endif
{
private:

View File

@ -1118,29 +1118,38 @@ static void* SoftRasterizer_RunRenderEdgeMarkAndFog(void *arg)
static void* SoftRasterizer_RunClearUsingValues(void *arg)
{
SoftRasterizerClearParam *param = (SoftRasterizerClearParam *)arg;
param->renderer->ClearUsingValuesLoop(param->startPixel, param->endPixel);
param->renderer->ClearUsingValues_Execute(param->startPixel, param->endPixel);
return NULL;
}
static Render3D* SoftRasterizerRendererCreate()
{
#if defined(ENABLE_SSE2)
return new SoftRasterizerRenderer_SSE2;
#else
return new SoftRasterizerRenderer;
{
#if defined(ENABLE_AVX2)
return new SoftRasterizerRenderer_AVX2;
#elif defined(ENABLE_SSE2)
return new SoftRasterizerRenderer_SSE2;
#elif defined(ENABLE_ALTIVEC)
return new SoftRasterizerRenderer_Altivec;
#else
return new SoftRasterizerRenderer;
#endif
}
static void SoftRasterizerRendererDestroy()
{
if (CurrentRenderer != BaseRenderer)
{
#if defined(ENABLE_SSE2)
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
#else
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
#endif
{
#if defined(ENABLE_AVX2)
SoftRasterizerRenderer_AVX2 *oldRenderer = (SoftRasterizerRenderer_AVX2 *)CurrentRenderer;
#elif defined(ENABLE_SSE2)
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
#elif defined(ENABLE_ALTIVEC)
SoftRasterizerRenderer_Altivec *oldRenderer = (SoftRasterizerRenderer_Altivec *)CurrentRenderer;
#else
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
#endif
CurrentRenderer = BaseRenderer;
delete oldRenderer;
}
@ -2136,12 +2145,12 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
return RENDER3DERROR_NOERR;
}
void SoftRasterizerRenderer::ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel)
void SoftRasterizerRenderer::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
for (size_t i = startPixel; i < endPixel; i++)
{
this->_framebufferAttributes->SetAtIndex(i, this->_clearAttributes);
this->_framebufferColor[i] = this->_clearColor6665;
this->_framebufferAttributes->SetAtIndex(i, this->_clearAttributes);
}
}
@ -2161,7 +2170,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clea
}
else
{
this->ClearUsingValuesLoop(0, this->_framebufferPixCount);
this->ClearUsingValues_Execute(0, this->_framebufferPixCount);
}
if (doMultithreadedClear)
@ -2327,11 +2336,12 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
}
return RENDER3DERROR_NOERR;
}
#ifdef ENABLE_SSE2
}
SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
#if defined(ENABLE_AVX2) || defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
template <size_t SIMDBYTES>
SoftRasterizer_SIMD<SIMDBYTES>::SoftRasterizer_SIMD()
{
if (_threadCount == 0)
{
@ -2341,7 +2351,7 @@ SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
}
else
{
const size_t pixelsPerThread = ((_framebufferSIMDPixCount / 16) / _threadCount) * 16;
const size_t pixelsPerThread = ((_framebufferSIMDPixCount / SIMDBYTES) / _threadCount) * SIMDBYTES;
for (size_t i = 0; i < _threadCount; i++)
{
@ -2352,37 +2362,10 @@ SoftRasterizerRenderer_SSE2::SoftRasterizerRenderer_SSE2()
}
}
void SoftRasterizerRenderer_SSE2::ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel)
template <size_t SIMDBYTES>
Render3DError SoftRasterizer_SIMD<SIMDBYTES>::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
{
for (size_t i = startPixel; i < endPixel; i+=16)
{
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 0), this->_clearColor_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 4), this->_clearColor_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 8), this->_clearColor_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 12), this->_clearColor_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 4), this->_clearDepth_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 12), this->_clearDepth_v128u32);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v128u8);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v128u8);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8);
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8);
}
}
Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
{
this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color);
this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID);
this->_clearAttrStencil_v128u8 = _mm_set1_epi8(clearAttributes.stencil);
this->_clearAttrIsFogged_v128u8 = _mm_set1_epi8(clearAttributes.isFogged);
this->_clearAttrIsTranslucentPoly_v128u8 = _mm_set1_epi8(clearAttributes.isTranslucentPoly);
this->LoadClearValues(clearColor6665, clearAttributes);
const bool doMultithreadedClear = (this->_threadCount > 0);
@ -2395,16 +2378,14 @@ Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor
}
else
{
this->ClearUsingValuesLoop(0, this->_framebufferSIMDPixCount);
}
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (size_t i = this->_framebufferSIMDPixCount; i < this->_framebufferPixCount; i++)
{
this->_framebufferColor[i] = clearColor6665;
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
this->ClearUsingValues_Execute(0, this->_framebufferSIMDPixCount);
}
#pragma LOOPVECTORIZE_DISABLE
for (size_t i = this->_framebufferSIMDPixCount; i < this->_framebufferPixCount; i++)
{
this->_framebufferColor[i] = clearColor6665;
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
}
if (doMultithreadedClear)
@ -2413,14 +2394,15 @@ Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor
{
this->_task[threadIndex].finish();
}
}
return RENDER3DERROR_NOERR;
}
return RENDER3DERROR_NOERR;
}
Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h)
template <size_t SIMDBYTES>
Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_t h)
{
Render3DError error = Render3D_SSE2::SetFramebufferSize(w, h);
Render3DError error = Render3D_SIMD<SIMDBYTES>::SetFramebufferSize(w, h);
if (error != RENDER3DERROR_NOERR)
{
return RENDER3DERROR_NOERR;
@ -2444,7 +2426,7 @@ Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h
}
else
{
const size_t pixelsPerThread = ((pixCount / 16) / this->_threadCount) * 16;
const size_t pixelsPerThread = ((pixCount / SIMDBYTES) / this->_threadCount) * SIMDBYTES;
this->_customLinesPerThread = h / this->_threadCount;
this->_customPixelsPerThread = pixelsPerThread / this->_threadCount;
@ -2460,6 +2442,113 @@ Render3DError SoftRasterizerRenderer_SSE2::SetFramebufferSize(size_t w, size_t h
}
return RENDER3DERROR_NOERR;
}
#endif
#if defined(ENABLE_AVX2)
void SoftRasterizerRenderer_AVX2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
{
this->_clearColor_v256u32 = _mm256_set1_epi32(clearColor6665.color);
this->_clearDepth_v256u32 = _mm256_set1_epi32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v256u8 = _mm256_set1_epi8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v256u8 = _mm256_set1_epi8(clearAttributes.translucentPolyID);
this->_clearAttrStencil_v256u8 = _mm256_set1_epi8(clearAttributes.stencil);
this->_clearAttrIsFogged_v256u8 = _mm256_set1_epi8(clearAttributes.isFogged);
this->_clearAttrIsTranslucentPoly_v256u8 = _mm256_set1_epi8(clearAttributes.isTranslucentPoly);
}
void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
for (size_t i = startPixel; i < endPixel; i+=32)
{
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 0), this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 8), this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 16), this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i + 24), this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 16), this->_clearDepth_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i + 24), this->_clearDepth_v256u32);
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v256u8);
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v256u8);
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v256u8);
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v256u8);
_mm256_stream_si256((v256u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v256u8);
}
}
#elif defined(ENABLE_SSE2)
void SoftRasterizerRenderer_SSE2::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
{
this->_clearColor_v128u32 = _mm_set1_epi32(clearColor6665.color);
this->_clearDepth_v128u32 = _mm_set1_epi32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v128u8 = _mm_set1_epi8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v128u8 = _mm_set1_epi8(clearAttributes.translucentPolyID);
this->_clearAttrStencil_v128u8 = _mm_set1_epi8(clearAttributes.stencil);
this->_clearAttrIsFogged_v128u8 = _mm_set1_epi8(clearAttributes.isFogged);
this->_clearAttrIsTranslucentPoly_v128u8 = _mm_set1_epi8(clearAttributes.isTranslucentPoly);
}
void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
for (size_t i = startPixel; i < endPixel; i+=16)
{
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 0), this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 4), this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 8), this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i + 12), this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 0), this->_clearDepth_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 4), this->_clearDepth_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 8), this->_clearDepth_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i + 12), this->_clearDepth_v128u32);
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v128u8);
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v128u8);
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8);
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8);
_mm_stream_si128((v128u8 *)(this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8);
}
}
#elif defined(ENABLE_ALTIVEC)
void SoftRasterizerRenderer_Altivec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
{
this->_clearColor_v128u32 = vec_splat_u32(clearColor6665.color);
this->_clearDepth_v128u32 = vec_splat_u32(clearAttributes.depth);
this->_clearAttrOpaquePolyID_v128u8 = vec_splat_u8(clearAttributes.opaquePolyID);
this->_clearAttrTranslucentPolyID_v128u8 = vec_splat_u8(clearAttributes.translucentPolyID);
this->_clearAttrStencil_v128u8 = vec_splat_u8(clearAttributes.stencil);
this->_clearAttrIsFogged_v128u8 = vec_splat_u8(clearAttributes.isFogged);
this->_clearAttrIsTranslucentPoly_v128u8 = vec_splat_u8(clearAttributes.isTranslucentPoly);
}
void SoftRasterizerRenderer_Altivec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
for (size_t i = startPixel; i < endPixel; i+=16)
{
vec_st(this->_clearColor_v128u32, i + 0, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, i + 16, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, i + 32, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, i + 48, this->_framebufferColor);
vec_st(this->_clearDepth_v128u32, i + 0, this->_framebufferAttributes->depth);
vec_st(this->_clearDepth_v128u32, i + 16, this->_framebufferAttributes->depth);
vec_st(this->_clearDepth_v128u32, i + 32, this->_framebufferAttributes->depth);
vec_st(this->_clearDepth_v128u32, i + 48, this->_framebufferAttributes->depth);
vec_st(this->_clearAttrOpaquePolyID_v128u8, i, this->_framebufferAttributes->opaquePolyID);
vec_st(this->_clearAttrTranslucentPolyID_v128u8, i, this->_framebufferAttributes->translucentPolyID);
vec_st(this->_clearAttrStencil_v128u8, i, this->_framebufferAttributes->stencil);
vec_st(this->_clearAttrIsFogged_v128u8, i, this->_framebufferAttributes->isFogged);
vec_st(this->_clearAttrIsTranslucentPoly_v128u8, i, this->_framebufferAttributes->isTranslucentPoly);
}
}
#endif // ENABLE_SSE2
#endif

View File

@ -125,8 +125,12 @@ public:
template<bool SLI, bool USELINEHACK> FORCEINLINE void Render();
};
#if defined(ENABLE_SSE2)
class SoftRasterizerRenderer : public Render3D_SSE2
#if defined(ENABLE_AVX2)
class SoftRasterizerRenderer : public Render3D_AVX2
#elif defined(ENABLE_SSE2)
class SoftRasterizerRenderer : public Render3D_SSE2
#elif defined(ENABLE_ALTIVEC)
class SoftRasterizerRenderer : public Render3D_Altivec
#else
class SoftRasterizerRenderer : public Render3D
#endif
@ -205,15 +209,23 @@ public:
virtual Render3DError Render(const GFX3D &engine);
virtual Render3DError RenderFinish();
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
#ifdef ENABLE_SSE2
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
};
template <size_t SIMDBYTES>
class SoftRasterizer_SIMD : public SoftRasterizerRenderer
{
protected:
#if defined(ENABLE_AVX2)
v256u32 _clearColor_v256u32;
v256u32 _clearDepth_v256u32;
v256u8 _clearAttrOpaquePolyID_v256u8;
v256u8 _clearAttrTranslucentPolyID_v256u8;
v256u8 _clearAttrStencil_v256u8;
v256u8 _clearAttrIsFogged_v256u8;
v256u8 _clearAttrIsTranslucentPoly_v256u8;
#elif defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
v128u32 _clearColor_v128u32;
v128u32 _clearDepth_v128u32;
v128u8 _clearAttrOpaquePolyID_v128u8;
@ -221,14 +233,45 @@ protected:
v128u8 _clearAttrStencil_v128u8;
v128u8 _clearAttrIsFogged_v128u8;
v128u8 _clearAttrIsTranslucentPoly_v128u8;
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
#endif
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0;
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
public:
SoftRasterizerRenderer_SSE2();
SoftRasterizer_SIMD();
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
#if defined(ENABLE_AVX2)
class SoftRasterizerRenderer_AVX2 : public SoftRasterizer_SIMD<32>
{
protected:
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
};
#elif defined(ENABLE_SSE2)
class SoftRasterizerRenderer_SSE2 : public SoftRasterizer_SIMD<16>
{
protected:
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
};
#elif defined(ENABLE_ALTIVEC)
class SoftRasterizerRenderer_Altivec : public SoftRasterizer_SIMD<16>
{
protected:
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
public:
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
};
#endif

View File

@ -113,43 +113,6 @@ void FragmentAttributesBuffer::SetAtIndex(const size_t index, const FragmentAttr
this->isTranslucentPoly[index] = attr.isTranslucentPoly;
}
void FragmentAttributesBuffer::SetAll(const FragmentAttributes &attr)
{
size_t i = 0;
#ifdef ENABLE_SSE2
const __m128i attrDepth_vec128 = _mm_set1_epi32(attr.depth);
const __m128i attrOpaquePolyID_vec128 = _mm_set1_epi8(attr.opaquePolyID);
const __m128i attrTranslucentPolyID_vec128 = _mm_set1_epi8(attr.translucentPolyID);
const __m128i attrStencil_vec128 = _mm_set1_epi8(attr.stencil);
const __m128i attrIsFogged_vec128 = _mm_set1_epi8(attr.isFogged);
const __m128i attrIsTranslucentPoly_vec128 = _mm_set1_epi8(attr.isTranslucentPoly);
const size_t sseCount = count - (count % 16);
for (; i < sseCount; i += 16)
{
_mm_stream_si128((__m128i *)(this->depth + 0), attrDepth_vec128);
_mm_stream_si128((__m128i *)(this->depth + 4), attrDepth_vec128);
_mm_stream_si128((__m128i *)(this->depth + 8), attrDepth_vec128);
_mm_stream_si128((__m128i *)(this->depth + 12), attrDepth_vec128);
_mm_stream_si128((__m128i *)this->opaquePolyID, attrOpaquePolyID_vec128);
_mm_stream_si128((__m128i *)this->translucentPolyID, attrTranslucentPolyID_vec128);
_mm_stream_si128((__m128i *)this->stencil, attrStencil_vec128);
_mm_stream_si128((__m128i *)this->isFogged, attrIsFogged_vec128);
_mm_stream_si128((__m128i *)this->isTranslucentPoly, attrIsTranslucentPoly_vec128);
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < count; i++)
{
this->SetAtIndex(i, attr);
}
}
Render3DTexture::Render3DTexture(TEXIMAGE_PARAM texAttributes, u32 palAttributes) : TextureStore(texAttributes, palAttributes)
{
_isSamplingEnabled = true;
@ -688,12 +651,14 @@ Render3DError Render3D::VramReconfigureSignal()
return RENDER3DERROR_NOERR;
}
Render3D_SIMD128::Render3D_SIMD128()
template <size_t SIMDBYTES>
Render3D_SIMD<SIMDBYTES>::Render3D_SIMD()
{
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 16);
_framebufferSIMDPixCount = (SIMDBYTES > 0) ? _framebufferPixCount - (_framebufferPixCount % SIMDBYTES) : _framebufferPixCount;
}
Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
template <size_t SIMDBYTES>
Render3DError Render3D_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_t h)
{
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
if (error != RENDER3DERROR_NOERR)
@ -701,32 +666,18 @@ Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
return RENDER3DERROR_NOERR;
}
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 16);
this->_framebufferSIMDPixCount = (SIMDBYTES > 0) ? this->_framebufferPixCount - (this->_framebufferPixCount % SIMDBYTES) : _framebufferPixCount;
return error;
}
Render3D_SIMD256::Render3D_SIMD256()
{
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 32);
}
Render3DError Render3D_SIMD256::SetFramebufferSize(size_t w, size_t h)
{
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
if (error != RENDER3DERROR_NOERR)
{
return RENDER3DERROR_NOERR;
}
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 32);
return error;
}
#ifdef ENABLE_SSE2
#if defined(ENABLE_AVX2) || defined(ENABLE_SSE2)
#if defined(ENABLE_AVX2)
Render3DError Render3D_AVX2::ClearFramebuffer(const GFX3D_State &renderState)
#elif defined(ENABLE_SSE2)
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
#endif
{
Render3DError error = RENDER3DERROR_NOERR;
@ -910,4 +861,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
return error;
}
#endif // ENABLE_SSE2
#endif // defined(ENABLE_AVX2) || defined(ENABLE_SSE2)
template Render3D_SIMD<0>::Render3D_SIMD();
template Render3D_SIMD<16>::Render3D_SIMD();
template Render3D_SIMD<32>::Render3D_SIMD();

View File

@ -96,7 +96,6 @@ struct FragmentAttributesBuffer
~FragmentAttributesBuffer();
void SetAtIndex(const size_t index, const FragmentAttributes &attr);
void SetAll(const FragmentAttributes &attr);
};
struct Render3DDeviceInfo
@ -248,29 +247,35 @@ public:
Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const;
};
class Render3D_SIMD128 : public Render3D
template <size_t SIMDBYTES>
class Render3D_SIMD : public Render3D
{
public:
Render3D_SIMD128();
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
class Render3D_SIMD256 : public Render3D
{
public:
Render3D_SIMD256();
Render3D_SIMD();
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
#if defined(ENABLE_AVX2)
class Render3D_AVX2 : public Render3D_SIMD<32>
{
public:
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
};
#elif defined(ENABLE_SSE2)
#ifdef ENABLE_SSE2
class Render3D_SSE2 : public Render3D_SIMD128
class Render3D_SSE2 : public Render3D_SIMD<16>
{
public:
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
};
};
#elif defined(ENABLE_ALTIVEC)
class Render3D_Altivec : public Render3D_SIMD<16>
{};
#endif