Render3D / SoftRasterizer: Add some more NEON optimizations.
This commit is contained in:
parent
e8de3db99c
commit
2e6b938378
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
Copyright (C) 2006 yopyop
|
||||
Copyright (C) 2006-2007 shash
|
||||
Copyright (C) 2008-2021 DeSmuME team
|
||||
Copyright (C) 2008-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -683,6 +683,8 @@ public:
|
|||
class OpenGLRenderer : public Render3D_AVX2
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class OpenGLRenderer : public Render3D_SSE2
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
class OpenGLRenderer : public Render3D_NEON
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
class OpenGLRenderer : public Render3D_AltiVec
|
||||
#else
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2009-2021 DeSmuME team
|
||||
Copyright (C) 2009-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -1423,6 +1423,8 @@ static Render3D* SoftRasterizerRendererCreate()
|
|||
return new SoftRasterizerRenderer_AVX2;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
return new SoftRasterizerRenderer_SSE2;
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
return new SoftRasterizerRenderer_NEON;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
return new SoftRasterizerRenderer_AltiVec;
|
||||
#else
|
||||
|
@ -1438,6 +1440,8 @@ static void SoftRasterizerRendererDestroy()
|
|||
SoftRasterizerRenderer_AVX2 *oldRenderer = (SoftRasterizerRenderer_AVX2 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
SoftRasterizerRenderer_NEON *oldRenderer = (SoftRasterizerRenderer_NEON *)CurrentRenderer;
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
SoftRasterizerRenderer_AltiVec *oldRenderer = (SoftRasterizerRenderer_AltiVec *)CurrentRenderer;
|
||||
#else
|
||||
|
@ -2501,7 +2505,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#if defined(ENABLE_AVX) || defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
#if defined(ENABLE_AVX) || defined(ENABLE_SSE2) || defined(ENABLE_NEON_A64) || defined(ENABLE_ALTIVEC)
|
||||
|
||||
template <size_t SIMDBYTES>
|
||||
SoftRasterizer_SIMD<SIMDBYTES>::SoftRasterizer_SIMD()
|
||||
|
@ -2611,7 +2615,7 @@ Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // defined(ENABLE_AVX) || defined(ENABLE_SSE2) || defined(ENABLE_NEON_A64) || defined(ENABLE_ALTIVEC)
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
|
||||
|
@ -2687,6 +2691,74 @@ void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPix
|
|||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
|
||||
void SoftRasterizerRenderer_NEON::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
{
|
||||
this->_clearColor_v128u32x4.val[0] = vdupq_n_u32(clearColor6665.color);
|
||||
this->_clearColor_v128u32x4.val[1] = this->_clearColor_v128u32x4.val[0];
|
||||
this->_clearColor_v128u32x4.val[2] = this->_clearColor_v128u32x4.val[0];
|
||||
this->_clearColor_v128u32x4.val[3] = this->_clearColor_v128u32x4.val[0];
|
||||
|
||||
this->_clearDepth_v128u32x4.val[0] = vdupq_n_u32(clearAttributes.depth);
|
||||
this->_clearDepth_v128u32x4.val[1] = this->_clearDepth_v128u32x4.val[0];
|
||||
this->_clearDepth_v128u32x4.val[2] = this->_clearDepth_v128u32x4.val[0];
|
||||
this->_clearDepth_v128u32x4.val[3] = this->_clearDepth_v128u32x4.val[0];
|
||||
|
||||
this->_clearAttrOpaquePolyID_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.opaquePolyID);
|
||||
this->_clearAttrOpaquePolyID_v128u8x4.val[1] = this->_clearAttrOpaquePolyID_v128u8x4.val[0];
|
||||
this->_clearAttrOpaquePolyID_v128u8x4.val[2] = this->_clearAttrOpaquePolyID_v128u8x4.val[0];
|
||||
this->_clearAttrOpaquePolyID_v128u8x4.val[3] = this->_clearAttrOpaquePolyID_v128u8x4.val[0];
|
||||
|
||||
this->_clearAttrTranslucentPolyID_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.translucentPolyID);
|
||||
this->_clearAttrTranslucentPolyID_v128u8x4.val[1] = this->_clearAttrTranslucentPolyID_v128u8x4.val[0];
|
||||
this->_clearAttrTranslucentPolyID_v128u8x4.val[2] = this->_clearAttrTranslucentPolyID_v128u8x4.val[0];
|
||||
this->_clearAttrTranslucentPolyID_v128u8x4.val[3] = this->_clearAttrTranslucentPolyID_v128u8x4.val[0];
|
||||
|
||||
this->_clearAttrStencil_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.stencil);
|
||||
this->_clearAttrStencil_v128u8x4.val[1] = this->_clearAttrStencil_v128u8x4.val[0];
|
||||
this->_clearAttrStencil_v128u8x4.val[2] = this->_clearAttrStencil_v128u8x4.val[0];
|
||||
this->_clearAttrStencil_v128u8x4.val[3] = this->_clearAttrStencil_v128u8x4.val[0];
|
||||
|
||||
this->_clearAttrIsFogged_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.isFogged);
|
||||
this->_clearAttrIsFogged_v128u8x4.val[1] = this->_clearAttrIsFogged_v128u8x4.val[0];
|
||||
this->_clearAttrIsFogged_v128u8x4.val[2] = this->_clearAttrIsFogged_v128u8x4.val[0];
|
||||
this->_clearAttrIsFogged_v128u8x4.val[3] = this->_clearAttrIsFogged_v128u8x4.val[0];
|
||||
|
||||
this->_clearAttrIsTranslucentPoly_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.isTranslucentPoly);
|
||||
this->_clearAttrIsTranslucentPoly_v128u8x4.val[1] = this->_clearAttrIsTranslucentPoly_v128u8x4.val[0];
|
||||
this->_clearAttrIsTranslucentPoly_v128u8x4.val[2] = this->_clearAttrIsTranslucentPoly_v128u8x4.val[0];
|
||||
this->_clearAttrIsTranslucentPoly_v128u8x4.val[3] = this->_clearAttrIsTranslucentPoly_v128u8x4.val[0];
|
||||
|
||||
this->_clearAttrPolyFacing_v128u8x4.val[0] = vdupq_n_u8(clearAttributes.polyFacing);
|
||||
this->_clearAttrPolyFacing_v128u8x4.val[1] = this->_clearAttrPolyFacing_v128u8x4.val[0];
|
||||
this->_clearAttrPolyFacing_v128u8x4.val[2] = this->_clearAttrPolyFacing_v128u8x4.val[0];
|
||||
this->_clearAttrPolyFacing_v128u8x4.val[3] = this->_clearAttrPolyFacing_v128u8x4.val[0];
|
||||
}
|
||||
|
||||
void SoftRasterizerRenderer_NEON::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=(sizeof(v128u8)*4))
|
||||
{
|
||||
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 0, this->_clearColor_v128u32x4);
|
||||
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 16, this->_clearColor_v128u32x4);
|
||||
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 32, this->_clearColor_v128u32x4);
|
||||
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 48, this->_clearColor_v128u32x4);
|
||||
|
||||
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 0, this->_clearDepth_v128u32x4);
|
||||
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 16, this->_clearDepth_v128u32x4);
|
||||
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 32, this->_clearDepth_v128u32x4);
|
||||
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 48, this->_clearDepth_v128u32x4);
|
||||
|
||||
vst1q_u8_x4((this->_framebufferAttributes->opaquePolyID + i), this->_clearAttrOpaquePolyID_v128u8x4);
|
||||
vst1q_u8_x4((this->_framebufferAttributes->translucentPolyID + i), this->_clearAttrTranslucentPolyID_v128u8x4);
|
||||
vst1q_u8_x4((this->_framebufferAttributes->stencil + i), this->_clearAttrStencil_v128u8x4);
|
||||
vst1q_u8_x4((this->_framebufferAttributes->isFogged + i), this->_clearAttrIsFogged_v128u8x4);
|
||||
vst1q_u8_x4((this->_framebufferAttributes->isTranslucentPoly + i), this->_clearAttrIsTranslucentPoly_v128u8x4);
|
||||
vst1q_u8_x4((this->_framebufferAttributes->polyFacing + i), this->_clearAttrPolyFacing_v128u8x4);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
|
||||
void SoftRasterizerRenderer_AltiVec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||
|
@ -2727,7 +2799,7 @@ void SoftRasterizerRenderer_AltiVec::LoadClearValues(const FragmentColor &clearC
|
|||
|
||||
void SoftRasterizerRenderer_AltiVec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||
{
|
||||
for (size_t i = startPixel; i < endPixel; i+=16)
|
||||
for (size_t i = startPixel; i < endPixel; i+=sizeof(v128u8))
|
||||
{
|
||||
vec_st(this->_clearColor_v128u32, (i * 4) + 0, this->_framebufferColor);
|
||||
vec_st(this->_clearColor_v128u32, (i * 4) + 16, this->_framebufferColor);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2009-2021 DeSmuME team
|
||||
Copyright (C) 2009-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,9 +21,6 @@
|
|||
#include "render3D.h"
|
||||
#include "gfx3d.h"
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#define SOFTRASTERIZER_MAX_THREADS 32
|
||||
|
||||
|
@ -138,6 +135,8 @@ public:
|
|||
class SoftRasterizerRenderer : public Render3D_AVX2
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
class SoftRasterizerRenderer : public Render3D_NEON
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
class SoftRasterizerRenderer : public Render3D_AltiVec
|
||||
#else
|
||||
|
@ -218,26 +217,6 @@ template <size_t SIMDBYTES>
|
|||
class SoftRasterizer_SIMD : public SoftRasterizerRenderer
|
||||
{
|
||||
protected:
|
||||
#if defined(ENABLE_AVX2)
|
||||
v256u32 _clearColor_v256u32;
|
||||
v256u32 _clearDepth_v256u32;
|
||||
v256u8 _clearAttrOpaquePolyID_v256u8;
|
||||
v256u8 _clearAttrTranslucentPolyID_v256u8;
|
||||
v256u8 _clearAttrStencil_v256u8;
|
||||
v256u8 _clearAttrIsFogged_v256u8;
|
||||
v256u8 _clearAttrIsTranslucentPoly_v256u8;
|
||||
v256u8 _clearAttrPolyFacing_v256u8;
|
||||
#elif defined(ENABLE_SSE2) || defined(ENABLE_ALTIVEC)
|
||||
v128u32 _clearColor_v128u32;
|
||||
v128u32 _clearDepth_v128u32;
|
||||
v128u8 _clearAttrOpaquePolyID_v128u8;
|
||||
v128u8 _clearAttrTranslucentPolyID_v128u8;
|
||||
v128u8 _clearAttrStencil_v128u8;
|
||||
v128u8 _clearAttrIsFogged_v128u8;
|
||||
v128u8 _clearAttrIsTranslucentPoly_v128u8;
|
||||
v128u8 _clearAttrPolyFacing_v128u8;
|
||||
#endif
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes) = 0;
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
|
@ -251,6 +230,15 @@ public:
|
|||
class SoftRasterizerRenderer_AVX2 : public SoftRasterizer_SIMD<32>
|
||||
{
|
||||
protected:
|
||||
v256u32 _clearColor_v256u32;
|
||||
v256u32 _clearDepth_v256u32;
|
||||
v256u8 _clearAttrOpaquePolyID_v256u8;
|
||||
v256u8 _clearAttrTranslucentPolyID_v256u8;
|
||||
v256u8 _clearAttrStencil_v256u8;
|
||||
v256u8 _clearAttrIsFogged_v256u8;
|
||||
v256u8 _clearAttrIsTranslucentPoly_v256u8;
|
||||
v256u8 _clearAttrPolyFacing_v256u8;
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
|
@ -261,6 +249,34 @@ public:
|
|||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizer_SIMD<16>
|
||||
{
|
||||
protected:
|
||||
v128u32 _clearColor_v128u32;
|
||||
v128u32 _clearDepth_v128u32;
|
||||
v128u8 _clearAttrOpaquePolyID_v128u8;
|
||||
v128u8 _clearAttrTranslucentPolyID_v128u8;
|
||||
v128u8 _clearAttrStencil_v128u8;
|
||||
v128u8 _clearAttrIsFogged_v128u8;
|
||||
v128u8 _clearAttrIsTranslucentPoly_v128u8;
|
||||
v128u8 _clearAttrPolyFacing_v128u8;
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
virtual void ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel);
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
class SoftRasterizerRenderer_NEON : public SoftRasterizer_SIMD<16>
|
||||
{
|
||||
protected:
|
||||
uint32x4x4_t _clearColor_v128u32x4;
|
||||
uint32x4x4_t _clearDepth_v128u32x4;
|
||||
uint8x16x4_t _clearAttrOpaquePolyID_v128u8x4;
|
||||
uint8x16x4_t _clearAttrTranslucentPolyID_v128u8x4;
|
||||
uint8x16x4_t _clearAttrStencil_v128u8x4;
|
||||
uint8x16x4_t _clearAttrIsFogged_v128u8x4;
|
||||
uint8x16x4_t _clearAttrIsTranslucentPoly_v128u8x4;
|
||||
uint8x16x4_t _clearAttrPolyFacing_v128u8x4;
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
|
@ -271,6 +287,15 @@ public:
|
|||
class SoftRasterizerRenderer_AltiVec : public SoftRasterizer_SIMD<16>
|
||||
{
|
||||
protected:
|
||||
v128u32 _clearColor_v128u32;
|
||||
v128u32 _clearDepth_v128u32;
|
||||
v128u8 _clearAttrOpaquePolyID_v128u8;
|
||||
v128u8 _clearAttrTranslucentPolyID_v128u8;
|
||||
v128u8 _clearAttrStencil_v128u8;
|
||||
v128u8 _clearAttrIsFogged_v128u8;
|
||||
v128u8 _clearAttrIsTranslucentPoly_v128u8;
|
||||
v128u8 _clearAttrPolyFacing_v128u8;
|
||||
|
||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||
|
||||
public:
|
||||
|
|
|
@ -20,10 +20,6 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "utils/bits.h"
|
||||
#include "MMU.h"
|
||||
#include "NDSSystem.h"
|
||||
|
@ -779,6 +775,7 @@ Render3DError Render3D_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_t h)
|
|||
}
|
||||
|
||||
#if defined(ENABLE_AVX2)
|
||||
|
||||
void Render3D_AVX2::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog)
|
||||
{
|
||||
const __m256i calcDepthConstants = _mm256_set1_epi32(0x01FF0200);
|
||||
|
@ -822,7 +819,9 @@ void Render3D_AVX2::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u
|
|||
_mm256_store_si256( (__m256i *)(outFog + i), _mm256_permute4x64_epi64(_mm256_packus_epi16(clearFogLo, clearFogHi), 0xD8) );
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_SSE2)
|
||||
|
||||
void Render3D_SSE2::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog)
|
||||
{
|
||||
const __m128i calcDepthConstants = _mm_set1_epi32(0x01FF0200);
|
||||
|
@ -866,7 +865,46 @@ void Render3D_SSE2::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u
|
|||
_mm_store_si128((__m128i *)(outFog + i), _mm_packs_epi16(clearFogLo, clearFogHi));
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
|
||||
void Render3D_NEON::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog)
|
||||
{
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i+=sizeof(v128u16))
|
||||
{
|
||||
// Copy the colors to the color buffer.
|
||||
vst1q_u16_x2( outColor16 + i, vld1q_u16_x2(inColor16 + i) );
|
||||
|
||||
// Write the depth values to the depth buffer using the following formula from GBATEK.
|
||||
// 15-bit to 24-bit depth formula from http://problemkaputt.de/gbatek.htm#ds3drearplane
|
||||
// D24 = (D15 * 0x0200) + (((D15 + 1) >> 15) * 0x01FF);
|
||||
//
|
||||
// For now, let's forget GBATEK (which could be wrong) and try using a simpified formula:
|
||||
// D24 = (D15 * 0x0200) + 0x01FF;
|
||||
|
||||
const uint16x8x2_t clearDepth = vld1q_u16_x2(inDepth16 + i);
|
||||
|
||||
const uint16x8x2_t clearDepthValue = {
|
||||
vandq_u16(clearDepth.val[0], vdupq_n_u16(0x7FFF)),
|
||||
vandq_u16(clearDepth.val[1], vdupq_n_u16(0x7FFF))
|
||||
};
|
||||
|
||||
const uint32x4x4_t calcDepth = {
|
||||
vmlal_n_u16( vdupq_n_u32(0x000001FF), vget_low_u16(clearDepthValue.val[0]), 0x0200 ),
|
||||
vmlal_n_u16( vdupq_n_u32(0x000001FF), vget_high_u16(clearDepthValue.val[0]), 0x0200 ),
|
||||
vmlal_n_u16( vdupq_n_u32(0x000001FF), vget_low_u16(clearDepthValue.val[1]), 0x0200 ),
|
||||
vmlal_n_u16( vdupq_n_u32(0x000001FF), vget_high_u16(clearDepthValue.val[1]), 0x0200 )
|
||||
};
|
||||
|
||||
vst1q_u32_x4(outDepth24 + i, calcDepth);
|
||||
|
||||
// Write the fog flags to the fog flag buffer.
|
||||
vst1q_u8( outFog + i, vuzp1q_u16(vshrq_n_u16(clearDepth.val[0], 15), vshrq_n_u16(clearDepth.val[1], 15)) );
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
|
||||
void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog)
|
||||
{
|
||||
const v128u16 calcDepthMul = ((v128u16){0x0200,0,0x0200,0,0x0200,0,0x0200,0});
|
||||
|
@ -915,6 +953,7 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons
|
|||
vec_st( vec_pack(clearFogLo, clearFogHi), 0, outFog + i );
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template Render3D_SIMD<16>::Render3D_SIMD();
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
Copyright (C) 2007-2019 DeSmuME team
|
||||
Copyright (C) 2007-2022 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -192,7 +192,7 @@ protected:
|
|||
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
|
||||
|
||||
virtual void _ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||
template<bool ISCOLORBLANK, bool ISDEPTHBLANK> void _ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const u16 *__restrict inColor16, const u16 *__restrict inDepth16,
|
||||
u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||
|
@ -286,7 +286,7 @@ public:
|
|||
|
||||
class Render3D_AVX2 : public Render3D_SIMD<32>
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual void _ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||
};
|
||||
|
||||
|
@ -294,7 +294,15 @@ public:
|
|||
|
||||
class Render3D_SSE2 : public Render3D_SIMD<16>
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual void _ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||
};
|
||||
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
|
||||
class Render3D_NEON : public Render3D_SIMD<16>
|
||||
{
|
||||
public:
|
||||
virtual void _ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue