GPU:
- Display capture blending functions now support RGB888 color format. (Related to r5433. This rework is still incomplete.)
This commit is contained in:
parent
c7e3298b94
commit
a966765ae5
|
@ -5517,7 +5517,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
|
|||
}
|
||||
}
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH>
|
||||
template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH>
|
||||
void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
||||
{
|
||||
assert( (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH/2) || (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) );
|
||||
|
@ -5708,11 +5708,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
{
|
||||
if (this->isLineRenderNative[l])
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
|
||||
newCaptureLineNativeState = false;
|
||||
|
@ -5727,11 +5727,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
|
||||
if (this->isLineRenderNative[l])
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
|
||||
newCaptureLineNativeState = this->isLineRenderNative[l];
|
||||
|
@ -5743,7 +5743,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
{
|
||||
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
newCaptureLineNativeState = false;
|
||||
}
|
||||
else
|
||||
|
@ -5754,7 +5754,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine);
|
||||
}
|
||||
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
newCaptureLineNativeState = true;
|
||||
}
|
||||
}
|
||||
|
@ -5762,7 +5762,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
{
|
||||
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -5772,7 +5772,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine);
|
||||
}
|
||||
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
|
||||
}
|
||||
|
||||
newCaptureLineNativeState = false;
|
||||
|
@ -5879,22 +5879,22 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
{
|
||||
if (this->isLineRenderNative[l])
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (this->isLineRenderNative[l])
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5904,22 +5904,22 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
|||
{
|
||||
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6244,7 +6244,7 @@ FragmentColor GPUEngineA::_RenderLine_DispCapture_BlendFunc(const FragmentColor
|
|||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
template<NDSColorFormat COLORFORMAT>
|
||||
template <NDSColorFormat COLORFORMAT>
|
||||
__m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB)
|
||||
{
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
@ -6356,8 +6356,8 @@ __m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA,
|
|||
}
|
||||
#endif
|
||||
|
||||
template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
|
||||
void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l)
|
||||
template <NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
|
||||
void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l)
|
||||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA);
|
||||
|
@ -6368,49 +6368,91 @@ void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA,
|
|||
size_t offset = _gpuDstToSrcIndex[_gpuDstLineIndex[l] * dispInfo.customWidth] - (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = length - (length % 8);
|
||||
for (; i < ssePixCount; i += 8)
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[offset + i + 7],
|
||||
srcA[offset + i + 6],
|
||||
srcA[offset + i + 5],
|
||||
srcA[offset + i + 4],
|
||||
srcA[offset + i + 3],
|
||||
srcA[offset + i + 2],
|
||||
srcA[offset + i + 1],
|
||||
srcA[offset + i + 0]);
|
||||
const u32 *srcA_32 = (const u32 *)srcA;
|
||||
const u32 *srcB_32 = (const u32 *)srcB;
|
||||
FragmentColor *dst32 = (FragmentColor *)dst;
|
||||
|
||||
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[offset + i + 7],
|
||||
srcB[offset + i + 6],
|
||||
srcB[offset + i + 5],
|
||||
srcB[offset + i + 4],
|
||||
srcB[offset + i + 3],
|
||||
srcB[offset + i + 2],
|
||||
srcB[offset + i + 1],
|
||||
srcB[offset + i + 0]);
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = length - (length % 4);
|
||||
for (; i < ssePixCount; i+=4)
|
||||
{
|
||||
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[offset + i + 3],
|
||||
srcA_32[offset + i + 2],
|
||||
srcA_32[offset + i + 1],
|
||||
srcA_32[offset + i + 0]);
|
||||
|
||||
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[offset + i + 3],
|
||||
srcB_32[offset + i + 2],
|
||||
srcB_32[offset + i + 1],
|
||||
srcB_32[offset + i + 0]);
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < length; i++)
|
||||
for (; i < length; i++)
|
||||
{
|
||||
const FragmentColor colorA = (!CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[offset + i];
|
||||
const FragmentColor colorB = (!CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[offset + i];
|
||||
|
||||
((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[offset + i];
|
||||
const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[offset + i];
|
||||
const u16 *srcA_16 = (const u16 *)srcA;
|
||||
const u16 *srcB_16 = (const u16 *)srcB;
|
||||
u16 *dst16 = (u16 *)dst;
|
||||
|
||||
dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = length - (length % 8);
|
||||
for (; i < ssePixCount; i+=8)
|
||||
{
|
||||
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[offset + i + 7],
|
||||
srcA_16[offset + i + 6],
|
||||
srcA_16[offset + i + 5],
|
||||
srcA_16[offset + i + 4],
|
||||
srcA_16[offset + i + 3],
|
||||
srcA_16[offset + i + 2],
|
||||
srcA_16[offset + i + 1],
|
||||
srcA_16[offset + i + 0]);
|
||||
|
||||
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[offset + i + 7],
|
||||
srcB_16[offset + i + 6],
|
||||
srcB_16[offset + i + 5],
|
||||
srcB_16[offset + i + 4],
|
||||
srcB_16[offset + i + 3],
|
||||
srcB_16[offset + i + 2],
|
||||
srcB_16[offset + i + 1],
|
||||
srcB_16[offset + i + 0]);
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; i < length; i++)
|
||||
{
|
||||
const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA_16[i] : srcA_16[offset + i];
|
||||
const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB_16[i] : srcB_16[offset + i];
|
||||
|
||||
dst16[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
|
||||
void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l)
|
||||
template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
|
||||
void GPUEngineA::_RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l)
|
||||
{
|
||||
const u8 blendEVA = GPU->GetEngineMain()->_dispCapCnt.EVA;
|
||||
const u8 blendEVB = GPU->GetEngineMain()->_dispCapCnt.EVB;
|
||||
const u8 blendEVA = this->_dispCapCnt.EVA;
|
||||
const u8 blendEVB = this->_dispCapCnt.EVB;
|
||||
|
||||
if (CAPTURETONATIVEDST)
|
||||
{
|
||||
|
@ -6418,55 +6460,93 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB,
|
|||
const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA);
|
||||
const __m128i blendEVB_vec128 = _mm_set1_epi16(blendEVB);
|
||||
|
||||
for (size_t i = 0; i < CAPTURELENGTH; i += 8)
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[_gpuDstPitchIndex[i+7]],
|
||||
srcA[_gpuDstPitchIndex[i+6]],
|
||||
srcA[_gpuDstPitchIndex[i+5]],
|
||||
srcA[_gpuDstPitchIndex[i+4]],
|
||||
srcA[_gpuDstPitchIndex[i+3]],
|
||||
srcA[_gpuDstPitchIndex[i+2]],
|
||||
srcA[_gpuDstPitchIndex[i+1]],
|
||||
srcA[_gpuDstPitchIndex[i+0]]);
|
||||
const u32 *srcA_32 = (const u32 *)srcA;
|
||||
const u32 *srcB_32 = (const u32 *)srcB;
|
||||
FragmentColor *dst32 = (FragmentColor *)dst;
|
||||
|
||||
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]],
|
||||
srcB[_gpuDstPitchIndex[i+6]],
|
||||
srcB[_gpuDstPitchIndex[i+5]],
|
||||
srcB[_gpuDstPitchIndex[i+4]],
|
||||
srcB[_gpuDstPitchIndex[i+3]],
|
||||
srcB[_gpuDstPitchIndex[i+2]],
|
||||
srcB[_gpuDstPitchIndex[i+1]],
|
||||
srcB[_gpuDstPitchIndex[i+0]]);
|
||||
for (size_t i = 0; i < CAPTURELENGTH; i+=4)
|
||||
{
|
||||
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[_gpuDstPitchIndex[i+3]],
|
||||
srcA_32[_gpuDstPitchIndex[i+2]],
|
||||
srcA_32[_gpuDstPitchIndex[i+1]],
|
||||
srcA_32[_gpuDstPitchIndex[i+0]]);
|
||||
|
||||
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[_gpuDstPitchIndex[i+3]],
|
||||
srcB_32[_gpuDstPitchIndex[i+2]],
|
||||
srcB_32[_gpuDstPitchIndex[i+1]],
|
||||
srcB_32[_gpuDstPitchIndex[i+0]]);
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const u16 *srcA_16 = (const u16 *)srcA;
|
||||
const u16 *srcB_16 = (const u16 *)srcB;
|
||||
u16 *dst16 = (u16 *)dst;
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
for (size_t i = 0; i < CAPTURELENGTH; i+=8)
|
||||
{
|
||||
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[_gpuDstPitchIndex[i+7]],
|
||||
srcA_16[_gpuDstPitchIndex[i+6]],
|
||||
srcA_16[_gpuDstPitchIndex[i+5]],
|
||||
srcA_16[_gpuDstPitchIndex[i+4]],
|
||||
srcA_16[_gpuDstPitchIndex[i+3]],
|
||||
srcA_16[_gpuDstPitchIndex[i+2]],
|
||||
srcA_16[_gpuDstPitchIndex[i+1]],
|
||||
srcA_16[_gpuDstPitchIndex[i+0]]);
|
||||
|
||||
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[_gpuDstPitchIndex[i+7]],
|
||||
srcB_16[_gpuDstPitchIndex[i+6]],
|
||||
srcB_16[_gpuDstPitchIndex[i+5]],
|
||||
srcB_16[_gpuDstPitchIndex[i+4]],
|
||||
srcB_16[_gpuDstPitchIndex[i+3]],
|
||||
srcB_16[_gpuDstPitchIndex[i+2]],
|
||||
srcB_16[_gpuDstPitchIndex[i+1]],
|
||||
srcB_16[_gpuDstPitchIndex[i+0]]);
|
||||
|
||||
_mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (size_t i = 0; i < CAPTURELENGTH; i++)
|
||||
{
|
||||
const u16 colorA = (CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[_gpuDstPitchIndex[i]];
|
||||
const u16 colorB = (CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[_gpuDstPitchIndex[i]];
|
||||
|
||||
dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
|
||||
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
|
||||
{
|
||||
const FragmentColor colorA = (CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[_gpuDstPitchIndex[i]];
|
||||
const FragmentColor colorB = (CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[_gpuDstPitchIndex[i]];
|
||||
|
||||
((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u16 colorA = (CAPTUREFROMNATIVESRCA) ? ((u16 *)srcA)[i] : ((u16 *)srcA)[_gpuDstPitchIndex[i]];
|
||||
const u16 colorB = (CAPTUREFROMNATIVESRCB) ? ((u16 *)srcB)[i] : ((u16 *)srcB)[_gpuDstPitchIndex[i]];
|
||||
|
||||
((u16 *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
|
||||
const size_t lineWidth = GPU->GetDisplayInfo().customWidth;
|
||||
const size_t captureLineCount = _gpuCaptureLineCount[l];
|
||||
|
||||
if (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH)
|
||||
{
|
||||
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l);
|
||||
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t line = 0; line < captureLineCount; line++)
|
||||
{
|
||||
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l);
|
||||
srcA += dispInfo.customWidth;
|
||||
srcB += dispInfo.customWidth;
|
||||
dst += dispInfo.customWidth;
|
||||
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l);
|
||||
srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcA + lineWidth) : (void *)((u16 *)srcA + lineWidth);
|
||||
srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcB + lineWidth) : (void *)((u16 *)srcB + lineWidth);
|
||||
dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)dst + lineWidth) : (void *)((u16 *)dst + lineWidth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1567,11 +1567,11 @@ protected:
|
|||
template<NDSColorFormat COLORFORMAT> __m128i _RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB);
|
||||
#endif
|
||||
|
||||
template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
|
||||
void _RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
|
||||
void _RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
|
||||
template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
|
||||
void _RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
|
||||
void _RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeVRAM(const size_t l);
|
||||
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeMainMemory(const size_t l);
|
||||
|
|
Loading…
Reference in New Issue