- Display capture blending functions now support RGB888 color format. (Related to r5433. This rework is still incomplete.)
This commit is contained in:
rogerman 2016-08-07 00:26:31 +00:00
parent c7e3298b94
commit a966765ae5
2 changed files with 163 additions and 83 deletions

View File

@ -5517,7 +5517,7 @@ void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
}
}
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH>
template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH>
void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
assert( (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH/2) || (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH) );
@ -5708,11 +5708,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
if (this->isLineRenderNative[l])
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = false;
@ -5727,11 +5727,11 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
if (this->isLineRenderNative[l])
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = this->isLineRenderNative[l];
@ -5743,7 +5743,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
newCaptureLineNativeState = false;
}
else
@ -5754,7 +5754,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine);
}
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
newCaptureLineNativeState = true;
}
}
@ -5762,7 +5762,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
else
{
@ -5772,7 +5772,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
this->_RenderLine_DispCapture_FIFOToBuffer(fifoLine);
}
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, true>(srcA, srcB, cap_dst, CAPTURELENGTH, 1);
}
newCaptureLineNativeState = false;
@ -5879,22 +5879,22 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
if (this->isLineRenderNative[l])
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
}
else
{
if (this->isLineRenderNative[l])
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
}
}
@ -5904,22 +5904,22 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
{
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, true, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
}
else
{
if ( (DISPCAPCNT.SrcB == 0) && !this->isLineCaptureNative[vramReadBlock][readLineIndexWithOffset] )
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, false, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
else
{
this->_RenderLine_DispCapture_Blend<CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
this->_RenderLine_DispCapture_Blend<NDSColorFormat_BGR555_Rev, CAPTURELENGTH, false, true, false>(srcA, srcB, cap_dst_ext, captureLengthExt, captureLineCount);
}
}
}
@ -6244,7 +6244,7 @@ FragmentColor GPUEngineA::_RenderLine_DispCapture_BlendFunc(const FragmentColor
}
#ifdef ENABLE_SSE2
template<NDSColorFormat COLORFORMAT>
template <NDSColorFormat COLORFORMAT>
__m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB)
{
#ifdef ENABLE_SSSE3
@ -6356,8 +6356,8 @@ __m128i GPUEngineA::_RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA,
}
#endif
template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l)
template <NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l)
{
#ifdef ENABLE_SSE2
const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA);
@ -6368,49 +6368,91 @@ void GPUEngineA::_RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA,
size_t offset = _gpuDstToSrcIndex[_gpuDstLineIndex[l] * dispInfo.customWidth] - (l * GPU_FRAMEBUFFER_NATIVE_WIDTH);
size_t i = 0;
#ifdef ENABLE_SSE2
const size_t ssePixCount = length - (length % 8);
for (; i < ssePixCount; i += 8)
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[offset + i + 7],
srcA[offset + i + 6],
srcA[offset + i + 5],
srcA[offset + i + 4],
srcA[offset + i + 3],
srcA[offset + i + 2],
srcA[offset + i + 1],
srcA[offset + i + 0]);
const u32 *srcA_32 = (const u32 *)srcA;
const u32 *srcB_32 = (const u32 *)srcB;
FragmentColor *dst32 = (FragmentColor *)dst;
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[offset + i + 7],
srcB[offset + i + 6],
srcB[offset + i + 5],
srcB[offset + i + 4],
srcB[offset + i + 3],
srcB[offset + i + 2],
srcB[offset + i + 1],
srcB[offset + i + 0]);
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
}
#ifdef ENABLE_SSE2
const size_t ssePixCount = length - (length % 4);
for (; i < ssePixCount; i+=4)
{
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[offset + i + 3],
srcA_32[offset + i + 2],
srcA_32[offset + i + 1],
srcA_32[offset + i + 0]);
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[offset + i + 3],
srcB_32[offset + i + 2],
srcB_32[offset + i + 1],
srcB_32[offset + i + 0]);
_mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < length; i++)
for (; i < length; i++)
{
const FragmentColor colorA = (!CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[offset + i];
const FragmentColor colorB = (!CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[offset + i];
((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB);
}
}
else
{
const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[offset + i];
const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[offset + i];
const u16 *srcA_16 = (const u16 *)srcA;
const u16 *srcB_16 = (const u16 *)srcB;
u16 *dst16 = (u16 *)dst;
dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
#ifdef ENABLE_SSE2
const size_t ssePixCount = length - (length % 8);
for (; i < ssePixCount; i+=8)
{
__m128i srcA_vec128 = (!CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[offset + i + 7],
srcA_16[offset + i + 6],
srcA_16[offset + i + 5],
srcA_16[offset + i + 4],
srcA_16[offset + i + 3],
srcA_16[offset + i + 2],
srcA_16[offset + i + 1],
srcA_16[offset + i + 0]);
__m128i srcB_vec128 = (!CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[offset + i + 7],
srcB_16[offset + i + 6],
srcB_16[offset + i + 5],
srcB_16[offset + i + 4],
srcB_16[offset + i + 3],
srcB_16[offset + i + 2],
srcB_16[offset + i + 1],
srcB_16[offset + i + 0]);
_mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
}
#endif
#ifdef ENABLE_SSE2
#pragma LOOPVECTORIZE_DISABLE
#endif
for (; i < length; i++)
{
const u16 colorA = (!CAPTUREFROMNATIVESRCA) ? srcA_16[i] : srcA_16[offset + i];
const u16 colorB = (!CAPTUREFROMNATIVESRCB) ? srcB_16[i] : srcB_16[offset + i];
dst16[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
}
}
}
template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l)
template <NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
void GPUEngineA::_RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l)
{
const u8 blendEVA = GPU->GetEngineMain()->_dispCapCnt.EVA;
const u8 blendEVB = GPU->GetEngineMain()->_dispCapCnt.EVB;
const u8 blendEVA = this->_dispCapCnt.EVA;
const u8 blendEVB = this->_dispCapCnt.EVB;
if (CAPTURETONATIVEDST)
{
@ -6418,55 +6460,93 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB,
const __m128i blendEVA_vec128 = _mm_set1_epi16(blendEVA);
const __m128i blendEVB_vec128 = _mm_set1_epi16(blendEVB);
for (size_t i = 0; i < CAPTURELENGTH; i += 8)
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA + i)) : _mm_set_epi16(srcA[_gpuDstPitchIndex[i+7]],
srcA[_gpuDstPitchIndex[i+6]],
srcA[_gpuDstPitchIndex[i+5]],
srcA[_gpuDstPitchIndex[i+4]],
srcA[_gpuDstPitchIndex[i+3]],
srcA[_gpuDstPitchIndex[i+2]],
srcA[_gpuDstPitchIndex[i+1]],
srcA[_gpuDstPitchIndex[i+0]]);
const u32 *srcA_32 = (const u32 *)srcA;
const u32 *srcB_32 = (const u32 *)srcB;
FragmentColor *dst32 = (FragmentColor *)dst;
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]],
srcB[_gpuDstPitchIndex[i+6]],
srcB[_gpuDstPitchIndex[i+5]],
srcB[_gpuDstPitchIndex[i+4]],
srcB[_gpuDstPitchIndex[i+3]],
srcB[_gpuDstPitchIndex[i+2]],
srcB[_gpuDstPitchIndex[i+1]],
srcB[_gpuDstPitchIndex[i+0]]);
for (size_t i = 0; i < CAPTURELENGTH; i+=4)
{
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_32 + i)) : _mm_set_epi32(srcA_32[_gpuDstPitchIndex[i+3]],
srcA_32[_gpuDstPitchIndex[i+2]],
srcA_32[_gpuDstPitchIndex[i+1]],
srcA_32[_gpuDstPitchIndex[i+0]]);
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_32 + i)) : _mm_set_epi32(srcB_32[_gpuDstPitchIndex[i+3]],
srcB_32[_gpuDstPitchIndex[i+2]],
srcB_32[_gpuDstPitchIndex[i+1]],
srcB_32[_gpuDstPitchIndex[i+0]]);
_mm_store_si128( (__m128i *)(dst32 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<OUTPUTFORMAT>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
}
}
else
{
const u16 *srcA_16 = (const u16 *)srcA;
const u16 *srcB_16 = (const u16 *)srcB;
u16 *dst16 = (u16 *)dst;
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
for (size_t i = 0; i < CAPTURELENGTH; i+=8)
{
__m128i srcA_vec128 = (CAPTUREFROMNATIVESRCA) ? _mm_load_si128((__m128i *)(srcA_16 + i)) : _mm_set_epi16(srcA_16[_gpuDstPitchIndex[i+7]],
srcA_16[_gpuDstPitchIndex[i+6]],
srcA_16[_gpuDstPitchIndex[i+5]],
srcA_16[_gpuDstPitchIndex[i+4]],
srcA_16[_gpuDstPitchIndex[i+3]],
srcA_16[_gpuDstPitchIndex[i+2]],
srcA_16[_gpuDstPitchIndex[i+1]],
srcA_16[_gpuDstPitchIndex[i+0]]);
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB_16 + i)) : _mm_set_epi16(srcB_16[_gpuDstPitchIndex[i+7]],
srcB_16[_gpuDstPitchIndex[i+6]],
srcB_16[_gpuDstPitchIndex[i+5]],
srcB_16[_gpuDstPitchIndex[i+4]],
srcB_16[_gpuDstPitchIndex[i+3]],
srcB_16[_gpuDstPitchIndex[i+2]],
srcB_16[_gpuDstPitchIndex[i+1]],
srcB_16[_gpuDstPitchIndex[i+0]]);
_mm_store_si128( (__m128i *)(dst16 + i), this->_RenderLine_DispCapture_BlendFunc_SSE2<NDSColorFormat_BGR555_Rev>(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
}
}
#else
for (size_t i = 0; i < CAPTURELENGTH; i++)
{
const u16 colorA = (CAPTUREFROMNATIVESRCA) ? srcA[i] : srcA[_gpuDstPitchIndex[i]];
const u16 colorB = (CAPTUREFROMNATIVESRCB) ? srcB[i] : srcB[_gpuDstPitchIndex[i]];
dst[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
const FragmentColor colorA = (CAPTUREFROMNATIVESRCA) ? ((const FragmentColor *)srcA)[i] : ((const FragmentColor *)srcA)[_gpuDstPitchIndex[i]];
const FragmentColor colorB = (CAPTUREFROMNATIVESRCB) ? ((const FragmentColor *)srcB)[i] : ((const FragmentColor *)srcB)[_gpuDstPitchIndex[i]];
((FragmentColor *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc<OUTPUTFORMAT>(colorA, colorB, blendEVA, blendEVB);
}
else
{
const u16 colorA = (CAPTUREFROMNATIVESRCA) ? ((u16 *)srcA)[i] : ((u16 *)srcA)[_gpuDstPitchIndex[i]];
const u16 colorB = (CAPTUREFROMNATIVESRCB) ? ((u16 *)srcB)[i] : ((u16 *)srcB)[_gpuDstPitchIndex[i]];
((u16 *)dst)[i] = this->_RenderLine_DispCapture_BlendFunc(colorA, colorB, blendEVA, blendEVB);
}
}
#endif
}
else
{
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
const size_t lineWidth = GPU->GetDisplayInfo().customWidth;
const size_t captureLineCount = _gpuCaptureLineCount[l];
if (CAPTURELENGTH == GPU_FRAMEBUFFER_NATIVE_WIDTH)
{
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l);
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt * captureLineCount, l);
}
else
{
for (size_t line = 0; line < captureLineCount; line++)
{
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l);
srcA += dispInfo.customWidth;
srcB += dispInfo.customWidth;
dst += dispInfo.customWidth;
this->_RenderLine_DispCapture_BlendToCustomDstBuffer<OUTPUTFORMAT, CAPTUREFROMNATIVESRCA, CAPTUREFROMNATIVESRCB>(srcA, srcB, dst, blendEVA, blendEVB, captureLengthExt, l);
srcA = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcA + lineWidth) : (void *)((u16 *)srcA + lineWidth);
srcB = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)srcB + lineWidth) : (void *)((u16 *)srcB + lineWidth);
dst = (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) ? (void *)((FragmentColor *)dst + lineWidth) : (void *)((u16 *)dst + lineWidth);
}
}
}

View File

@ -1567,11 +1567,11 @@ protected:
template<NDSColorFormat COLORFORMAT> __m128i _RenderLine_DispCapture_BlendFunc_SSE2(const __m128i &srcA, const __m128i &srcB, const __m128i &blendEVA, const __m128i &blendEVB);
#endif
template<bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
void _RenderLine_DispCapture_BlendToCustomDstBuffer(const u16 *srcA, const u16 *srcB, u16 *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same
template<NDSColorFormat OUTPUTFORMAT, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB>
void _RenderLine_DispCapture_BlendToCustomDstBuffer(const void *srcA, const void *srcB, void *dst, const u8 blendEVA, const u8 blendEVB, const size_t length, size_t l); // Do not use restrict pointers, since srcB and dst can be the same
template<size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
void _RenderLine_DispCapture_Blend(const u16 *srcA, const u16 *srcB, u16 *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same
template<NDSColorFormat OUTPUTFORMAT, size_t CAPTURELENGTH, bool CAPTUREFROMNATIVESRCA, bool CAPTUREFROMNATIVESRCB, bool CAPTURETONATIVEDST>
void _RenderLine_DispCapture_Blend(const void *srcA, const void *srcB, void *dst, const size_t captureLengthExt, const size_t l); // Do not use restrict pointers, since srcB and dst can be the same
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeVRAM(const size_t l);
template<NDSColorFormat OUTPUTFORMAT> void _HandleDisplayModeMainMemory(const size_t l);