GPU: Do a few more minor optimizations to rendering complete OBJ layer lines. Most notably, all complete OBJ layer lines, not just ones reading directly from custom VRAM, now benefit from the SSE2-accelerated code.

This commit is contained in:
rogerman 2018-11-30 20:12:58 -08:00
parent 9a53e8be69
commit 60c01bd63a
2 changed files with 163 additions and 117 deletions

View File

@ -1402,6 +1402,7 @@ GPUEngineBase::GPUEngineBase()
_enableBGLayer[GPULayerID_BG3] = true;
_enableBGLayer[GPULayerID_OBJ] = true;
_needExpandSprColorCustom = false;
_sprColorCustom = NULL;
_sprAlphaCustom = NULL;
_sprTypeCustom = NULL;
@ -1459,6 +1460,8 @@ void GPUEngineBase::_Reset_Base()
{
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
this->_needExpandSprColorCustom = false;
memset(this->_sprColor, 0, sizeof(this->_sprColor));
memset(this->_sprAlpha, 0, sizeof(this->_sprAlpha));
memset(this->_sprType, OBJMode_Normal, sizeof(this->_sprType));
@ -3917,72 +3920,75 @@ FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG>(compInfo, srcColor16, 0, enableColorEffect);
}
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool MOSAIC, bool WILLPERFORMWINDOWTEST>
void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, u16 *__restrict srcColorCustom, u8 *__restrict srcIndexCustom)
{
if (MOSAIC)
if (LAYERTYPE == GPULayerType_BG)
{
if (MOSAIC)
{
#ifdef ENABLE_SSE2
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8)
{
__m128i mosaicColor_vec128;
u16 *mosaicColorBG = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID];
const __m128i index_vec128 = _mm_loadl_epi64((__m128i *)(this->_deferredIndexNative + x));
const __m128i col_vec128 = _mm_load_si128((__m128i *)(this->_deferredColorNative + x));
if (compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8)
{
const __m128i mosaicSetColorMask = _mm_cmpgt_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(compInfo.renderState.mosaicWidthBG + x))), _mm_setzero_si128() );
const __m128i idxMask = _mm_cmpeq_epi16(_mm_unpacklo_epi8(index_vec128, _mm_setzero_si128()), _mm_setzero_si128());
mosaicColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask);
__m128i mosaicColor_vec128;
u16 *mosaicColorBG = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID];
_mm_storeu_si128( (__m128i *)(mosaicColorBG + x), _mm_blendv_epi8(_mm_loadu_si128((__m128i *)(mosaicColorBG + x)), mosaicColor_vec128, mosaicSetColorMask) );
const __m128i index_vec128 = _mm_loadl_epi64((__m128i *)(this->_deferredIndexNative + x));
const __m128i col_vec128 = _mm_load_si128((__m128i *)(this->_deferredColorNative + x));
if (compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
{
const __m128i mosaicSetColorMask = _mm_cmpgt_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(compInfo.renderState.mosaicWidthBG + x))), _mm_setzero_si128() );
const __m128i idxMask = _mm_cmpeq_epi16(_mm_unpacklo_epi8(index_vec128, _mm_setzero_si128()), _mm_setzero_si128());
mosaicColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask);
_mm_storeu_si128( (__m128i *)(mosaicColorBG + x), _mm_blendv_epi8(_mm_loadu_si128((__m128i *)(mosaicColorBG + x)), mosaicColor_vec128, mosaicSetColorMask) );
}
mosaicColor_vec128 = _mm_set_epi16(mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+7].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+6].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+5].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+4].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+3].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+2].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+1].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+0].trunc]);
const __m128i writeColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF));
_mm_storel_epi64( (__m128i *)(this->_deferredIndexNative + x), _mm_andnot_si128(_mm_packs_epi16(writeColorMask, _mm_setzero_si128()), index_vec128) );
_mm_store_si128( (__m128i *)(this->_deferredColorNative + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, writeColorMask) );
}
mosaicColor_vec128 = _mm_set_epi16(mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+7].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+6].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+5].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+4].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+3].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+2].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+1].trunc],
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+0].trunc]);
const __m128i writeColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF));
_mm_storel_epi64( (__m128i *)(this->_deferredIndexNative + x), _mm_andnot_si128(_mm_packs_epi16(writeColorMask, _mm_setzero_si128()), index_vec128) );
_mm_store_si128( (__m128i *)(this->_deferredColorNative + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, writeColorMask) );
}
#else
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++)
{
u16 mosaicColor;
if (compInfo.renderState.mosaicWidthBG[x].begin && compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++)
{
mosaicColor = (this->_deferredIndexNative[x] == 0) ? 0xFFFF : this->_deferredColorNative[x] & 0x7FFF;
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][x] = mosaicColor;
u16 mosaicColor;
if (compInfo.renderState.mosaicWidthBG[x].begin && compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
{
mosaicColor = (this->_deferredIndexNative[x] == 0) ? 0xFFFF : this->_deferredColorNative[x] & 0x7FFF;
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][x] = mosaicColor;
}
else
{
mosaicColor = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[x].trunc];
}
if (mosaicColor == 0xFFFF)
{
this->_deferredIndexNative[x] = 0;
}
else
{
this->_deferredColorNative[x] = mosaicColor;
}
}
else
{
mosaicColor = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[x].trunc];
}
if (mosaicColor == 0xFFFF)
{
this->_deferredIndexNative[x] = 0;
}
else
{
this->_deferredColorNative[x] = mosaicColor;
}
}
#endif
}
CopyLineExpand<0xFFFF, false, false, 2>(srcColorCustom, this->_deferredColorNative, compInfo.line.widthCustom, 1);
CopyLineExpand<0xFFFF, false, false, 1>(srcIndexCustom, this->_deferredIndexNative, compInfo.line.widthCustom, 1);
}
CopyLineExpand<0xFFFF, false, false, 2>(this->_deferredColorCustom, this->_deferredColorNative, compInfo.line.widthCustom, 1);
CopyLineExpand<0xFFFF, false, false, 1>(this->_deferredIndexCustom, this->_deferredIndexNative, compInfo.line.widthCustom, 1);
compInfo.target.xNative = 0;
compInfo.target.xCustom = 0;
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
@ -4015,8 +4021,11 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
passMask8 = _mm_set1_epi8(0xFF);
}
// Do the index test. Pixels with an index value of 0 are rejected.
passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)(this->_deferredIndexCustom + compInfo.target.xCustom)), _mm_setzero_si128()), passMask8);
if (LAYERTYPE == GPULayerType_BG)
{
// Do the index test. Pixels with an index value of 0 are rejected.
passMask8 = _mm_andnot_si128(_mm_cmpeq_epi8(_mm_load_si128((__m128i *)(srcIndexCustom + compInfo.target.xCustom)), _mm_setzero_si128()), passMask8);
}
const int passMaskValue = _mm_movemask_epi8(passMask8);
@ -4030,13 +4039,15 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
if (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev)
{
src[0] = _mm_load_si128((__m128i *)(this->_deferredColorCustom + compInfo.target.xCustom + 0));
src[1] = _mm_load_si128((__m128i *)(this->_deferredColorCustom + compInfo.target.xCustom + 8));
src[0] = _mm_load_si128((__m128i *)(srcColorCustom + compInfo.target.xCustom + 0));
src[1] = _mm_load_si128((__m128i *)(srcColorCustom + compInfo.target.xCustom + 8));
}
else
{
const __m128i src16[2] = { _mm_load_si128((__m128i *)(this->_deferredColorCustom + compInfo.target.xCustom + 0)),
_mm_load_si128((__m128i *)(this->_deferredColorCustom + compInfo.target.xCustom + 8)) };
const __m128i src16[2] = {
_mm_load_si128((__m128i *)(srcColorCustom + compInfo.target.xCustom + 0)),
_mm_load_si128((__m128i *)(srcColorCustom + compInfo.target.xCustom + 8))
};
if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev)
{
@ -4052,11 +4063,11 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
// Write out the pixels.
const bool didAllPixelsPass = (passMaskValue == 0xFFFF);
this->_PixelComposite16_SSE2<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG, WILLPERFORMWINDOWTEST>(compInfo,
didAllPixelsPass,
passMask8,
src[3], src[2], src[1], src[0],
srcEffectEnableMask);
this->_PixelComposite16_SSE2<COMPOSITORMODE, OUTPUTFORMAT, LAYERTYPE, WILLPERFORMWINDOWTEST>(compInfo,
didAllPixelsPass,
passMask8,
src[3], src[2], src[1], src[0],
srcEffectEnableMask);
}
#endif
@ -4076,13 +4087,13 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
continue;
}
if (this->_deferredIndexCustom[compInfo.target.xCustom] == 0)
if ( (LAYERTYPE == GPULayerType_BG) && (srcIndexCustom[compInfo.target.xCustom] == 0) )
{
continue;
}
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true;
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG>(compInfo, this->_deferredColorCustom[compInfo.target.xCustom], 0, enableColorEffect);
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG>(compInfo, srcColorCustom[compInfo.target.xCustom], 0, enableColorEffect);
}
}
@ -4119,8 +4130,12 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
const __m128i src16[2] = { _mm_load_si128((__m128i *)((u16 *)vramColorPtr + i + 0)), _mm_load_si128((__m128i *)((u16 *)vramColorPtr + i + 8)) };
src[0] = src16[0];
src[1] = src16[1];
passMask8 = _mm_packus_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1));
if (LAYERTYPE != GPULayerType_OBJ)
{
passMask8 = _mm_packus_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1));
}
break;
}
@ -4129,8 +4144,12 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
const __m128i src16[2] = { _mm_load_si128((__m128i *)((u16 *)vramColorPtr + i + 0)), _mm_load_si128((__m128i *)((u16 *)vramColorPtr + i + 8)) };
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[0], src[0], src[1]);
ColorspaceConvert555To6665Opaque_SSE2<false>(src16[1], src[2], src[3]);
passMask8 = _mm_packus_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1));
if (LAYERTYPE != GPULayerType_OBJ)
{
passMask8 = _mm_packus_epi16( _mm_srli_epi16(src16[0], 15), _mm_srli_epi16(src16[1], 15) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_set1_epi8(1));
}
break;
}
@ -4139,12 +4158,21 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
src[1] = _mm_load_si128((__m128i *)((FragmentColor *)vramColorPtr + i + 4));
src[2] = _mm_load_si128((__m128i *)((FragmentColor *)vramColorPtr + i + 8));
src[3] = _mm_load_si128((__m128i *)((FragmentColor *)vramColorPtr + i + 12));
passMask8 = _mm_packus_epi16( _mm_packs_epi32(_mm_srli_epi32(src[0], 24), _mm_srli_epi32(src[1], 24)), _mm_packs_epi32(_mm_srli_epi32(src[2], 24), _mm_srli_epi32(src[3], 24)) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_setzero_si128());
passMask8 = _mm_xor_si128(passMask8, _mm_set1_epi32(0xFFFFFFFF));
if (LAYERTYPE != GPULayerType_OBJ)
{
passMask8 = _mm_packus_epi16( _mm_packs_epi32(_mm_srli_epi32(src[0], 24), _mm_srli_epi32(src[1], 24)), _mm_packs_epi32(_mm_srli_epi32(src[2], 24), _mm_srli_epi32(src[3], 24)) );
passMask8 = _mm_cmpeq_epi8(passMask8, _mm_setzero_si128());
passMask8 = _mm_xor_si128(passMask8, _mm_set1_epi32(0xFFFFFFFF));
}
break;
}
if (LAYERTYPE == GPULayerType_OBJ)
{
passMask8 = _mm_set1_epi8(0xFF);
}
if (WILLPERFORMWINDOWTEST)
{
// Do the window test.
@ -4187,7 +4215,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev)
{
if ((((u32 *)vramColorPtr)[i] & 0xFF000000) == 0)
if ( (LAYERTYPE != GPULayerType_OBJ) && ((((u32 *)vramColorPtr)[i] & 0xFF000000) == 0) )
{
continue;
}
@ -4197,7 +4225,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
}
else
{
if ((((u16 *)vramColorPtr)[i] & 0x8000) == 0)
if ( (LAYERTYPE != GPULayerType_OBJ) && ((((u16 *)vramColorPtr)[i] & 0x8000) == 0) )
{
continue;
}
@ -5215,6 +5243,8 @@ void GPUEngineBase::_RenderLine_SetupSprites(GPUEngineCompositorInfo &compInfo)
{
itemsForPriority_t *item;
this->_needExpandSprColorCustom = false;
//n.b. - this is clearing the sprite line buffer to the background color,
memset_u16_fast<GPU_FRAMEBUFFER_NATIVE_WIDTH>(this->_sprColor, compInfo.renderState.backdropColor16);
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
@ -5255,8 +5285,7 @@ void GPUEngineBase::_RenderLine_SetupSprites(GPUEngineCompositorInfo &compInfo)
if (isLineComplete)
{
// TODO: Enable _sprColorCustom if we ever get the OBJ layer SIMD code working.
//CopyLineExpandHinted<0xFFFF, false, false, false, 2>(compInfo.line, this->_sprColor, this->_sprColorCustom);
this->_needExpandSprColorCustom = true;
CopyLineExpandHinted<0xFFFF, false, false, false, 1>(compInfo.line, this->_sprAlpha, this->_sprAlphaCustom);
CopyLineExpandHinted<0xFFFF, false, false, false, 1>(compInfo.line, this->_sprType, this->_sprTypeCustom);
}
@ -5336,19 +5365,35 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
}
else
{
void *__restrict dstColorPtr = compInfo.target.lineColorHead;
u8 *__restrict dstLayerIDPtr = compInfo.target.lineLayerIDHead;
if (useCustomVRAM)
if (item->nbPixelsX == GPU_FRAMEBUFFER_NATIVE_WIDTH)
{
const void *__restrict vramColorPtr = GPU->GetCustomVRAMAddressUsingMappedAddress<OUTPUTFORMAT>(this->vramBlockOBJAddress, 0);
if (item->nbPixelsX == GPU_FRAMEBUFFER_NATIVE_WIDTH)
if (useCustomVRAM)
{
const void *__restrict vramColorPtr = GPU->GetCustomVRAMAddressUsingMappedAddress<OUTPUTFORMAT>(this->vramBlockOBJAddress, 0);
this->_CompositeVRAMLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ, false, WILLPERFORMWINDOWTEST>(compInfo, vramColorPtr);
}
else
{
// Lazily expand the sprite color line since there are also many instances where the OBJ layer will be
// reading directly out of VRAM instead of reading out of the rendered sprite line.
if (this->_needExpandSprColorCustom)
{
this->_needExpandSprColorCustom = false;
CopyLineExpandHinted<0xFFFF, false, false, false, 2>(compInfo.line, this->_sprColor, this->_sprColorCustom);
}
this->_CompositeLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ, false, WILLPERFORMWINDOWTEST>(compInfo, this->_sprColorCustom, NULL);
}
}
else
{
void *__restrict dstColorPtr = compInfo.target.lineColorHead;
u8 *__restrict dstLayerIDPtr = compInfo.target.lineLayerIDHead;
if (useCustomVRAM)
{
const void *__restrict vramColorPtr = GPU->GetCustomVRAMAddressUsingMappedAddress<OUTPUTFORMAT>(this->vramBlockOBJAddress, 0);
for (size_t line = 0; line < compInfo.line.renderCount; line++)
{
compInfo.target.lineColor16 = (u16 *)dstColorPtr;
@ -5393,42 +5438,42 @@ void GPUEngineBase::_RenderLine_LayerOBJ(GPUEngineCompositorInfo &compInfo, item
dstLayerIDPtr += compInfo.line.widthCustom;
}
}
}
else
{
for (size_t line = 0; line < compInfo.line.renderCount; line++)
else
{
compInfo.target.lineColor16 = (u16 *)dstColorPtr;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr;
compInfo.target.lineLayerID = dstLayerIDPtr;
for (size_t i = 0; i < item->nbPixelsX; i++)
for (size_t line = 0; line < compInfo.line.renderCount; line++)
{
const size_t srcX = item->PixelsX[i];
compInfo.target.lineColor16 = (u16 *)dstColorPtr;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr;
compInfo.target.lineLayerID = dstLayerIDPtr;
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][srcX] == 0) )
for (size_t i = 0; i < item->nbPixelsX; i++)
{
continue;
const size_t srcX = item->PixelsX[i];
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][srcX] == 0) )
{
continue;
}
compInfo.target.xNative = srcX;
compInfo.target.xCustom = _gpuDstPitchIndex[srcX];
for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++)
{
const size_t dstX = compInfo.target.xCustom + p;
compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX;
compInfo.target.lineLayerID = dstLayerIDPtr + dstX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true;
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ>(compInfo, this->_sprColor[srcX], this->_sprAlpha[srcX], enableColorEffect);
}
}
compInfo.target.xNative = srcX;
compInfo.target.xCustom = _gpuDstPitchIndex[srcX];
for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++)
{
const size_t dstX = compInfo.target.xCustom + p;
compInfo.target.lineColor16 = (u16 *)dstColorPtr + dstX;
compInfo.target.lineColor32 = (FragmentColor *)dstColorPtr + dstX;
compInfo.target.lineLayerID = dstLayerIDPtr + dstX;
const bool enableColorEffect = (WILLPERFORMWINDOWTEST) ? (this->_enableColorEffectNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] != 0) : true;
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_OBJ>(compInfo, this->_sprColor[srcX], this->_sprAlpha[srcX], enableColorEffect);
}
dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom);
dstLayerIDPtr += compInfo.line.widthCustom;
}
dstColorPtr = (OUTPUTFORMAT == NDSColorFormat_BGR555_Rev) ? (void *)((u16 *)dstColorPtr + compInfo.line.widthCustom) : (void *)((FragmentColor *)dstColorPtr + compInfo.line.widthCustom);
dstLayerIDPtr += compInfo.line.widthCustom;
}
}
}
@ -5935,7 +5980,7 @@ FORCEINLINE void GPUEngineBase::_RenderLine_LayerBG_Final(GPUEngineCompositorInf
}
else
{
this->_CompositeLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, MOSAIC, WILLPERFORMWINDOWTEST>(compInfo);
this->_CompositeLineDeferred<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG, MOSAIC, WILLPERFORMWINDOWTEST>(compInfo, this->_deferredColorCustom, this->_deferredIndexCustom);
}
}
}
@ -6260,7 +6305,7 @@ void GPUEngineBase::SetCustomFramebufferSize(size_t w, size_t h)
this->_deferredIndexCustom = (u8 *)malloc_alignedPage(w * sizeof(u8));
this->_deferredColorCustom = (u16 *)malloc_alignedPage(w * sizeof(u16));
//this->_sprColorCustom = (u16 *)malloc_alignedPage(w * sizeof(u16));
this->_sprColorCustom = (u16 *)malloc_alignedPage(w * sizeof(u16));
this->_sprAlphaCustom = (u8 *)malloc_alignedPage(w * sizeof(u8));
this->_sprTypeCustom = (u8 *)malloc_alignedPage(w * sizeof(u8));

View File

@ -1382,6 +1382,7 @@ protected:
CACHE_ALIGN u8 _deferredIndexNative[GPU_FRAMEBUFFER_NATIVE_WIDTH * 4];
CACHE_ALIGN u16 _deferredColorNative[GPU_FRAMEBUFFER_NATIVE_WIDTH * 4];
bool _needExpandSprColorCustom;
u16 *_sprColorCustom;
u8 *_sprAlphaCustom;
u8 *_sprTypeCustom;
@ -1449,7 +1450,7 @@ protected:
TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, bool opaque);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo, u16 *__restrict srcColorCustom, u8 *__restrict srcIndexCustom);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr);
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING> void _RenderLine_BGText(GPUEngineCompositorInfo &compInfo, const u16 XBG, const u16 YBG);