Windows Port: Fix Windows build. (Regression from commit 037d328.)

This commit is contained in:
rogerman 2021-09-07 02:13:35 -07:00
parent 037d3285a9
commit f3c3228c70
2 changed files with 14 additions and 5 deletions

View File

@ -4585,15 +4585,16 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo
#ifdef USEMANUALVECTORIZATION #ifdef USEMANUALVECTORIZATION
i = this->_RenderLine_DispCapture_Blend_VecLoop<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, length); i = this->_RenderLine_DispCapture_Blend_VecLoop<OUTPUTFORMAT>(srcA, srcB, dst, blendEVA, blendEVB, length);
#pragma LOOPVECTORIZE_DISABLE
#endif #endif
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{ {
const FragmentColor *srcA_32 = (const FragmentColor *)srcA; const FragmentColor *srcA_32 = (const FragmentColor *)srcA;
const FragmentColor *srcB_32 = (const FragmentColor *)srcB; const FragmentColor *srcB_32 = (const FragmentColor *)srcB;
FragmentColor *dst32 = (FragmentColor *)dst; FragmentColor *dst32 = (FragmentColor *)dst;
#ifdef USEMANUALVECTORIZATION
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < length; i++) for (; i < length; i++)
{ {
const FragmentColor colorA = srcA_32[i]; const FragmentColor colorA = srcA_32[i];
@ -4608,6 +4609,9 @@ void GPUEngineA::_RenderLine_DispCapture_Blend_Buffer(const void *srcA, const vo
const u16 *srcB_16 = (const u16 *)srcB; const u16 *srcB_16 = (const u16 *)srcB;
u16 *dst16 = (u16 *)dst; u16 *dst16 = (u16 *)dst;
#ifdef USEMANUALVECTORIZATION
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < length; i++) for (; i < length; i++)
{ {
const u16 colorA = srcA_16[i]; const u16 colorA = srcA_16[i];

View File

@ -1185,6 +1185,11 @@ void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLine
CopyLineExpand<4, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 4, 4); CopyLineExpand<4, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 4, 4);
break; break;
// Building on MSVC takes too long when LTO is on (typical use case), so remove these extra calls to
// CopyLineExpand() in order to reduce the number of permutations and make build times more sane.
// Other compilers, such as GCC and Clang, have no problems with building using LTO within a
// reasonable time frame.
#ifndef _MSC_VER
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 5): case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 5):
CopyLineExpand<5, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 5, 5); CopyLineExpand<5, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 5, 5);
break; break;
@ -1232,7 +1237,7 @@ void CopyLineExpandHinted(const void *__restrict srcBuffer, const size_t srcLine
case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 16): case (GPU_FRAMEBUFFER_NATIVE_WIDTH * 16):
CopyLineExpand<16, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 16, 16); CopyLineExpand<16, SCALEVERTICAL, NEEDENDIANSWAP, ELEMENTSIZE>(dst, src, GPU_FRAMEBUFFER_NATIVE_WIDTH * 16, 16);
break; break;
#endif
default: default:
{ {
if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0) if ((dstLineWidth % GPU_FRAMEBUFFER_NATIVE_WIDTH) == 0)