Render3D: Fix clear image on big-endian systems.
- Also renames "Altivec" to "AltiVec" to remain consistent with Colorspace Handler's naming. - Also adds an AltiVec accelerated version of the clear image parser.
This commit is contained in:
parent
c49a480d51
commit
620d3b3a26
|
@ -684,7 +684,7 @@ class OpenGLRenderer : public Render3D_AVX2
|
||||||
#elif defined(ENABLE_SSE2)
|
#elif defined(ENABLE_SSE2)
|
||||||
class OpenGLRenderer : public Render3D_SSE2
|
class OpenGLRenderer : public Render3D_SSE2
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
class OpenGLRenderer : public Render3D_Altivec
|
class OpenGLRenderer : public Render3D_AltiVec
|
||||||
#else
|
#else
|
||||||
class OpenGLRenderer : public Render3D
|
class OpenGLRenderer : public Render3D
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -417,7 +417,7 @@ static void makeTables()
|
||||||
|
|
||||||
// Is GBATEK actually correct here? Let's try using a simplified formula and see if it's
|
// Is GBATEK actually correct here? Let's try using a simplified formula and see if it's
|
||||||
// more accurate.
|
// more accurate.
|
||||||
dsDepthExtend_15bit_to_24bit[i] = LE_TO_LOCAL_32( (i*0x0200) + 0x01FF );
|
dsDepthExtend_15bit_to_24bit[i] = (i * 0x0200) + 0x01FF;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < 65536; i++)
|
for (size_t i = 0; i < 65536; i++)
|
||||||
|
|
|
@ -1424,7 +1424,7 @@ static Render3D* SoftRasterizerRendererCreate()
|
||||||
#elif defined(ENABLE_SSE2)
|
#elif defined(ENABLE_SSE2)
|
||||||
return new SoftRasterizerRenderer_SSE2;
|
return new SoftRasterizerRenderer_SSE2;
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
return new SoftRasterizerRenderer_Altivec;
|
return new SoftRasterizerRenderer_AltiVec;
|
||||||
#else
|
#else
|
||||||
return new SoftRasterizerRenderer;
|
return new SoftRasterizerRenderer;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1439,7 +1439,7 @@ static void SoftRasterizerRendererDestroy()
|
||||||
#elif defined(ENABLE_SSE2)
|
#elif defined(ENABLE_SSE2)
|
||||||
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
SoftRasterizerRenderer_SSE2 *oldRenderer = (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
SoftRasterizerRenderer_Altivec *oldRenderer = (SoftRasterizerRenderer_Altivec *)CurrentRenderer;
|
SoftRasterizerRenderer_AltiVec *oldRenderer = (SoftRasterizerRenderer_AltiVec *)CurrentRenderer;
|
||||||
#else
|
#else
|
||||||
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
|
SoftRasterizerRenderer *oldRenderer = (SoftRasterizerRenderer *)CurrentRenderer;
|
||||||
#endif
|
#endif
|
||||||
|
@ -2682,7 +2682,7 @@ void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPix
|
||||||
|
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
|
|
||||||
void SoftRasterizerRenderer_Altivec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
void SoftRasterizerRenderer_AltiVec::LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes)
|
||||||
{
|
{
|
||||||
this->_clearColor_v128u32 = (v128u32){clearColor6665.color,clearColor6665.color,clearColor6665.color,clearColor6665.color};
|
this->_clearColor_v128u32 = (v128u32){clearColor6665.color,clearColor6665.color,clearColor6665.color,clearColor6665.color};
|
||||||
this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth};
|
this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth};
|
||||||
|
@ -2718,7 +2718,7 @@ void SoftRasterizerRenderer_Altivec::LoadClearValues(const FragmentColor &clearC
|
||||||
clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing};
|
clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing};
|
||||||
}
|
}
|
||||||
|
|
||||||
void SoftRasterizerRenderer_Altivec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
void SoftRasterizerRenderer_AltiVec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
|
||||||
{
|
{
|
||||||
for (size_t i = startPixel; i < endPixel; i+=16)
|
for (size_t i = startPixel; i < endPixel; i+=16)
|
||||||
{
|
{
|
||||||
|
|
|
@ -139,7 +139,7 @@ class SoftRasterizerRenderer : public Render3D_AVX2
|
||||||
#elif defined(ENABLE_SSE2)
|
#elif defined(ENABLE_SSE2)
|
||||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
class SoftRasterizerRenderer : public Render3D_Altivec
|
class SoftRasterizerRenderer : public Render3D_AltiVec
|
||||||
#else
|
#else
|
||||||
class SoftRasterizerRenderer : public Render3D
|
class SoftRasterizerRenderer : public Render3D
|
||||||
#endif
|
#endif
|
||||||
|
@ -268,7 +268,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
class SoftRasterizerRenderer_Altivec : public SoftRasterizer_SIMD<16>
|
class SoftRasterizerRenderer_AltiVec : public SoftRasterizer_SIMD<16>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
virtual void LoadClearValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||||
|
|
|
@ -507,9 +507,9 @@ void Render3D::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *_
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i++)
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i++)
|
||||||
{
|
{
|
||||||
outColor16[i] = inColor16[i];
|
outColor16[i] = LE_TO_LOCAL_16(inColor16[i]);
|
||||||
outDepth24[i] = DS_DEPTH15TO24(inDepth16[i]);
|
outDepth24[i] = DS_DEPTH15TO24( LE_TO_LOCAL_16(inDepth16[i]) );
|
||||||
outFog[i] = BIT15(inDepth16[i]);
|
outFog[i] = BIT15( LE_TO_LOCAL_16(inDepth16[i]) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -558,7 +558,7 @@ void Render3D::_ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const
|
||||||
// - Blazer Drive
|
// - Blazer Drive
|
||||||
if (!ISCOLORBLANK)
|
if (!ISCOLORBLANK)
|
||||||
{
|
{
|
||||||
outColor16[dstIndex] = inColor16[srcIndex];
|
outColor16[dstIndex] = LE_TO_LOCAL_16(inColor16[srcIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear image depth buffer, where the first 15 bits are converted to
|
// Clear image depth buffer, where the first 15 bits are converted to
|
||||||
|
@ -571,8 +571,8 @@ void Render3D::_ClearImageScrolledLoop(const u8 xScroll, const u8 yScroll, const
|
||||||
// - The Chronicles of Narnia: The Lion, the Witch and the Wardrobe
|
// - The Chronicles of Narnia: The Lion, the Witch and the Wardrobe
|
||||||
if (!ISDEPTHBLANK)
|
if (!ISDEPTHBLANK)
|
||||||
{
|
{
|
||||||
outDepth24[dstIndex] = DS_DEPTH15TO24(inDepth16[srcIndex]);
|
outDepth24[dstIndex] = DS_DEPTH15TO24( LE_TO_LOCAL_16(inDepth16[srcIndex]) );
|
||||||
outFog[dstIndex] = BIT15(inDepth16[srcIndex]);
|
outFog[dstIndex] = BIT15( LE_TO_LOCAL_16(inDepth16[srcIndex]) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -863,7 +863,56 @@ void Render3D_SSE2::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u
|
||||||
// Write the fog flags to the fog flag buffer.
|
// Write the fog flags to the fog flag buffer.
|
||||||
const __m128i clearFogLo = _mm_srli_epi16(clearDepthLo, 15);
|
const __m128i clearFogLo = _mm_srli_epi16(clearDepthLo, 15);
|
||||||
const __m128i clearFogHi = _mm_srli_epi16(clearDepthHi, 15);
|
const __m128i clearFogHi = _mm_srli_epi16(clearDepthHi, 15);
|
||||||
_mm_store_si128((__m128i *)(this->clearImageFogBuffer + i), _mm_packs_epi16(clearFogLo, clearFogHi));
|
_mm_store_si128((__m128i *)(outFog + i), _mm_packs_epi16(clearFogLo, clearFogHi));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
|
void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog)
|
||||||
|
{
|
||||||
|
const v128u16 calcDepthMul = ((v128u16){0x0200,0,0x0200,0,0x0200,0,0x0200,0});
|
||||||
|
const v128u32 calcDepthAdd = ((v128u32){0x01FF,0x01FF,0x01FF,0x01FF});
|
||||||
|
|
||||||
|
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i+=sizeof(v128u16))
|
||||||
|
{
|
||||||
|
// Copy the colors to the color buffer.
|
||||||
|
v128u16 inColor16SwappedLo = vec_ld( 0, inColor16 + i);
|
||||||
|
v128u16 inColor16SwappedHi = vec_ld(16, inColor16 + i);
|
||||||
|
|
||||||
|
inColor16SwappedLo = vec_perm(inColor16SwappedLo, inColor16SwappedLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||||
|
inColor16SwappedHi = vec_perm(inColor16SwappedHi, inColor16SwappedHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||||
|
|
||||||
|
vec_st(inColor16SwappedLo, 0, outColor16 + i);
|
||||||
|
vec_st(inColor16SwappedHi, 16, outColor16 + i);
|
||||||
|
|
||||||
|
// Write the depth values to the depth buffer using the following formula from GBATEK.
|
||||||
|
// 15-bit to 24-bit depth formula from http://problemkaputt.de/gbatek.htm#ds3drearplane
|
||||||
|
// D24 = (D15 * 0x0200) + (((D15 + 1) >> 15) * 0x01FF);
|
||||||
|
//
|
||||||
|
// For now, let's forget GBATEK (which could be wrong) and try using a simpified formula:
|
||||||
|
// D24 = (D15 * 0x0200) + 0x01FF;
|
||||||
|
v128u16 clearDepthLo = vec_ld( 0, inDepth16 + i);
|
||||||
|
v128u16 clearDepthHi = vec_ld(16, inDepth16 + i);
|
||||||
|
|
||||||
|
clearDepthLo = vec_perm(clearDepthLo, clearDepthLo, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||||
|
clearDepthHi = vec_perm(clearDepthHi, clearDepthHi, ((v128u8){1,0, 3,2, 5,4, 7,6, 9,8, 11,10, 13,12, 15,14}));
|
||||||
|
|
||||||
|
const v128u16 clearDepthValueLo = vec_and(clearDepthLo, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
|
||||||
|
const v128u16 clearDepthValueHi = vec_and(clearDepthHi, ((v128u16){0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF,0x7FFF}));
|
||||||
|
|
||||||
|
const v128u16 calcDepth0 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||||
|
const v128u16 calcDepth1 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueLo, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||||
|
const v128u16 calcDepth2 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x10,0x11,0,0, 0x12,0x13,0,0, 0x14,0x15,0,0, 0x16,0x17,0,0}));
|
||||||
|
const v128u16 calcDepth3 = vec_perm(((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), clearDepthValueHi, ((v128u8){0x18,0x19,0,0, 0x1A,0x1B,0,0, 0x1C,0x1D,0,0, 0x1E,0x1F,0,0}));
|
||||||
|
|
||||||
|
vec_st( vec_msum(calcDepth0, calcDepthMul, calcDepthAdd), 0, outDepth24 + i);
|
||||||
|
vec_st( vec_msum(calcDepth1, calcDepthMul, calcDepthAdd), 16, outDepth24 + i);
|
||||||
|
vec_st( vec_msum(calcDepth2, calcDepthMul, calcDepthAdd), 32, outDepth24 + i);
|
||||||
|
vec_st( vec_msum(calcDepth3, calcDepthMul, calcDepthAdd), 48, outDepth24 + i);
|
||||||
|
|
||||||
|
// Write the fog flags to the fog flag buffer.
|
||||||
|
const v128u16 clearFogLo = vec_sr(clearDepthLo, ((v128u16){15,15,15,15,15,15,15,15}));
|
||||||
|
const v128u16 clearFogHi = vec_sr(clearDepthHi, ((v128u16){15,15,15,15,15,15,15,15}));
|
||||||
|
vec_st( vec_pack(clearFogLo, clearFogHi), 0, outFog + i );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -300,8 +300,11 @@ public:
|
||||||
|
|
||||||
#elif defined(ENABLE_ALTIVEC)
|
#elif defined(ENABLE_ALTIVEC)
|
||||||
|
|
||||||
class Render3D_Altivec : public Render3D_SIMD<16>
|
class Render3D_AltiVec : public Render3D_SIMD<16>
|
||||||
{};
|
{
|
||||||
|
public:
|
||||||
|
virtual void _ClearImageBaseLoop(const u16 *__restrict inColor16, const u16 *__restrict inDepth16, u16 *__restrict outColor16, u32 *__restrict outDepth24, u8 *__restrict outFog);
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue