Windows Port: Fix Windows build. (Regression from commit 037d328.)
This commit is contained in:
parent
037d3285a9
commit
f3c3228c70
|
@ -4585,15 +4585,16 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo
|
||||||
|
|
||||||
#ifdef USEMANUALVECTORIZATION
|
#ifdef USEMANUALVECTORIZATION
|
||||||
i = this->_RenderLine_DispCapture_Blend_VecLoop<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, length);
|
i = this->_RenderLine_DispCapture_Blend_VecLoop<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, length);
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||||
{
|
{
|
||||||
const FragmentColor *srcA_32 = (const FragmentColor *)srcA;
|
const FragmentColor *srcA_32 = (const FragmentColor *)srcA;
|
||||||
const FragmentColor *srcB_32 = (const FragmentColor *)srcB;
|
const FragmentColor *srcB_32 = (const FragmentColor *)srcB;
|
||||||
FragmentColor *dst32 = (FragmentColor *)dst;
|
FragmentColor *dst32 = (FragmentColor *)dst;
|
||||||
|
|
||||||
|
#ifdef USEMANUALVECTORIZATION
|
||||||
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
|
#endif
|
||||||
for (; i < length; i++)
|
for (; i < length; i++)
|
||||||
{
|
{
|
||||||
const FragmentColor colorA = srcA_32[i];
|
const FragmentColor colorA = srcA_32[i];
|
||||||
|
@ -4608,6 +4609,9 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo
|
||||||
const u16 *srcB_16 = (const u16 *)srcB;
|
const u16 *srcB_16 = (const u16 *)srcB;
|
||||||
u16 *dst16 = (u16 *)dst;
|
u16 *dst16 = (u16 *)dst;
|
||||||
|
|
||||||
|
#ifdef USEMANUALVECTORIZATION
|
||||||
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
|
#endif
|
||||||
for (; i < length; i++)
|
for (; i < length; i++)
|
||||||
{
|
{
|
||||||
const u16 colorA = srcA_16[i];
|
const u16 colorA = srcA_16[i];
|
||||||
|
|
|
@ -1185,6 +1185,11 @@ void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLine
|
||||||
CopyLineExpand<4, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 4, 4);
|
CopyLineExpand<4, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 4, 4);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
// Building on MSVC takes too long when LTO is on (typical use case), so remove these extra calls to
|
||||||
|
// CopyLineExpand() in order to reduce the number of permutations and make build times more sane.
|
||||||
|
// Other compilers, such as GCC and Clang, have no problems with building using LTO within a
|
||||||
|
// reasonable time frame.
|
||||||
|
#ifndef _MSC_VER
|
||||||
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 5):
|
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 5):
|
||||||
CopyLineExpand<5, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 5, 5);
|
CopyLineExpand<5, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 5, 5);
|
||||||
break;
|
break;
|
||||||
|
@ -1232,7 +1237,7 @@ void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLine
|
||||||
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 16):
|
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 16):
|
||||||
CopyLineExpand<16, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 16, 16);
|
CopyLineExpand<16, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 16, 16);
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)
|
if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)
|
||||||
|
|
Loading…
Reference in New Issue