GPU: Clean up and optimize the BG mosaic code.
This commit is contained in:
parent
da6c79c6bd
commit
e4f935743c
|
@ -3033,26 +3033,24 @@ TILEENTRY GPUEngineBase::_GetTileEntry(const u32 tileMapAddress, const u16 xOffs
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST>
|
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST>
|
||||||
FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, const bool opaque)
|
FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, bool opaque)
|
||||||
{
|
{
|
||||||
bool willRenderColor = opaque;
|
|
||||||
|
|
||||||
if (MOSAIC)
|
if (MOSAIC)
|
||||||
{
|
{
|
||||||
//due to this early out, we will get incorrect behavior in cases where
|
//due to this early out, we will get incorrect behavior in cases where
|
||||||
//we enable mosaic in the middle of a frame. this is deemed unlikely.
|
//we enable mosaic in the middle of a frame. this is deemed unlikely.
|
||||||
|
|
||||||
if (!opaque) srcColor16 = 0xFFFF;
|
if (compInfo.renderState.mosaicWidthBG[srcX].begin && compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
|
||||||
else srcColor16 &= 0x7FFF;
|
{
|
||||||
|
srcColor16 = (!opaque) ? 0xFFFF : (srcColor16 & 0x7FFF);
|
||||||
if (!compInfo.renderState.mosaicWidthBG[srcX].begin || !compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
|
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][srcX] = srcColor16;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
srcColor16 = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[srcX].trunc];
|
srcColor16 = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[srcX].trunc];
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][srcX] = srcColor16;
|
opaque = (srcColor16 != 0xFFFF);
|
||||||
|
|
||||||
willRenderColor = (srcColor16 != 0xFFFF);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][srcX] == 0) )
|
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][srcX] == 0) )
|
||||||
|
@ -3060,7 +3058,7 @@ FORCEINLINE void GPUEngineBase::_CompositePixelImmediate(GPUEngineCompositorInfo
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!willRenderColor)
|
if (!opaque)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3083,50 +3081,56 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8)
|
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x+=8)
|
||||||
{
|
{
|
||||||
|
__m128i mosaicColor_vec128;
|
||||||
|
u16 *mosaicColorBG = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID];
|
||||||
|
|
||||||
const __m128i index_vec128 = _mm_loadl_epi64((__m128i *)(this->_deferredIndexNative + x));
|
const __m128i index_vec128 = _mm_loadl_epi64((__m128i *)(this->_deferredIndexNative + x));
|
||||||
const __m128i col_vec128 = _mm_load_si128((__m128i *)(this->_deferredColorNative + x));
|
const __m128i col_vec128 = _mm_load_si128((__m128i *)(this->_deferredColorNative + x));
|
||||||
|
|
||||||
const __m128i idxMask = _mm_cmpeq_epi16(_mm_unpacklo_epi8(index_vec128, _mm_setzero_si128()), _mm_setzero_si128());
|
if (compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
|
||||||
const __m128i tmpColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask);
|
|
||||||
|
|
||||||
const __m128i mosaicWidthMask = _mm_cmpeq_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(compInfo.renderState.mosaicWidthBG + x))), _mm_setzero_si128() );
|
|
||||||
const __m128i mosaicHeightMask = _mm_cmpeq_epi16(_mm_set1_epi16(compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin), _mm_setzero_si128());
|
|
||||||
const __m128i mosaicMask = _mm_or_si128(mosaicWidthMask, mosaicHeightMask);
|
|
||||||
|
|
||||||
u16 *mosaicColorBG = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID];
|
|
||||||
mosaicColorBG[x+0] = (_mm_extract_epi16(mosaicMask, 0) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+0].trunc] : _mm_extract_epi16(tmpColor_vec128, 0);
|
|
||||||
mosaicColorBG[x+1] = (_mm_extract_epi16(mosaicMask, 1) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+1].trunc] : _mm_extract_epi16(tmpColor_vec128, 1);
|
|
||||||
mosaicColorBG[x+2] = (_mm_extract_epi16(mosaicMask, 2) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+2].trunc] : _mm_extract_epi16(tmpColor_vec128, 2);
|
|
||||||
mosaicColorBG[x+3] = (_mm_extract_epi16(mosaicMask, 3) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+3].trunc] : _mm_extract_epi16(tmpColor_vec128, 3);
|
|
||||||
mosaicColorBG[x+4] = (_mm_extract_epi16(mosaicMask, 4) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+4].trunc] : _mm_extract_epi16(tmpColor_vec128, 4);
|
|
||||||
mosaicColorBG[x+5] = (_mm_extract_epi16(mosaicMask, 5) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+5].trunc] : _mm_extract_epi16(tmpColor_vec128, 5);
|
|
||||||
mosaicColorBG[x+6] = (_mm_extract_epi16(mosaicMask, 6) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+6].trunc] : _mm_extract_epi16(tmpColor_vec128, 6);
|
|
||||||
mosaicColorBG[x+7] = (_mm_extract_epi16(mosaicMask, 7) != 0) ? mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+7].trunc] : _mm_extract_epi16(tmpColor_vec128, 7);
|
|
||||||
|
|
||||||
const __m128i mosaicColor_vec128 = _mm_loadu_si128((__m128i *)(mosaicColorBG + x));
|
|
||||||
const __m128i mosaicColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF));
|
|
||||||
_mm_storel_epi64( (__m128i *)(this->_deferredIndexNative + x), _mm_andnot_si128(_mm_packs_epi16(mosaicColorMask, _mm_setzero_si128()), index_vec128) );
|
|
||||||
_mm_store_si128( (__m128i *)(this->_deferredColorNative + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, mosaicColorMask) );
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (size_t x = 0, dstIdx = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++)
|
|
||||||
{
|
|
||||||
u16 tmpColor = (this->_deferredIndexNative[x] == 0) ? 0xFFFF : this->_deferredColorNative[x] & 0x7FFF;
|
|
||||||
|
|
||||||
if (!compInfo.renderState.mosaicWidthBG[x].begin || !compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
|
|
||||||
{
|
{
|
||||||
tmpColor = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[x].trunc];
|
const __m128i mosaicSetColorMask = _mm_cmpgt_epi16( _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_loadu_si128((__m128i *)(compInfo.renderState.mosaicWidthBG + x))), _mm_setzero_si128() );
|
||||||
|
const __m128i idxMask = _mm_cmpeq_epi16(_mm_unpacklo_epi8(index_vec128, _mm_setzero_si128()), _mm_setzero_si128());
|
||||||
|
mosaicColor_vec128 = _mm_blendv_epi8(_mm_and_si128(col_vec128, _mm_set1_epi16(0x7FFF)), _mm_set1_epi16(0xFFFF), idxMask);
|
||||||
|
|
||||||
|
_mm_storeu_si128( (__m128i *)(mosaicColorBG + x), _mm_blendv_epi8(_mm_loadu_si128((__m128i *)(mosaicColorBG + x)), mosaicColor_vec128, mosaicSetColorMask) );
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][x] = tmpColor;
|
mosaicColor_vec128 = _mm_set_epi16(mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+7].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+6].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+5].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+4].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+3].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+2].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+1].trunc],
|
||||||
|
mosaicColorBG[compInfo.renderState.mosaicWidthBG[x+0].trunc]);
|
||||||
|
|
||||||
if (tmpColor == 0xFFFF)
|
const __m128i writeColorMask = _mm_cmpeq_epi16(mosaicColor_vec128, _mm_set1_epi16(0xFFFF));
|
||||||
|
_mm_storel_epi64( (__m128i *)(this->_deferredIndexNative + x), _mm_andnot_si128(_mm_packs_epi16(writeColorMask, _mm_setzero_si128()), index_vec128) );
|
||||||
|
_mm_store_si128( (__m128i *)(this->_deferredColorNative + x), _mm_blendv_epi8(mosaicColor_vec128, col_vec128, writeColorMask) );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for (size_t x = 0; x < GPU_FRAMEBUFFER_NATIVE_WIDTH; x++)
|
||||||
|
{
|
||||||
|
u16 mosaicColor;
|
||||||
|
|
||||||
|
if (compInfo.renderState.mosaicWidthBG[x].begin && compInfo.renderState.mosaicHeightBG[compInfo.line.indexNative].begin)
|
||||||
|
{
|
||||||
|
mosaicColor = (this->_deferredIndexNative[x] == 0) ? 0xFFFF : this->_deferredColorNative[x] & 0x7FFF;
|
||||||
|
this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][x] = mosaicColor;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mosaicColor = this->_mosaicColors.bg[compInfo.renderState.selectedLayerID][compInfo.renderState.mosaicWidthBG[x].trunc];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mosaicColor == 0xFFFF)
|
||||||
{
|
{
|
||||||
this->_deferredIndexNative[x] = 0;
|
this->_deferredIndexNative[x] = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this->_deferredColorNative[x] = tmpColor;
|
this->_deferredColorNative[x] = mosaicColor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1285,8 +1285,8 @@ protected:
|
||||||
{
|
{
|
||||||
size_t mosaic = m+1;
|
size_t mosaic = m+1;
|
||||||
MosaicTableEntry &te = table[m][i];
|
MosaicTableEntry &te = table[m][i];
|
||||||
te.begin = (i%mosaic==0);
|
te.begin = ((i % mosaic) == 0);
|
||||||
te.trunc = i/mosaic*mosaic;
|
te.trunc = (i / mosaic) * mosaic;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1364,7 +1364,7 @@ protected:
|
||||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal);
|
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST, bool WILLDEFERCOMPOSITING, PixelLookupFunc GetPixelFunc> void _RenderPixelIterate(GPUEngineCompositorInfo &compInfo, const IOREG_BGnParameter ¶m, const u32 map, const u32 tile, const u16 *__restrict pal);
|
||||||
|
|
||||||
TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask);
|
TILEENTRY _GetTileEntry(const u32 tileMapAddress, const u16 xOffset, const u16 layerWidthMask);
|
||||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, const bool opaque);
|
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> FORCEINLINE void _CompositePixelImmediate(GPUEngineCompositorInfo &compInfo, const size_t srcX, u16 srcColor16, bool opaque);
|
||||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo);
|
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeLineDeferred(GPUEngineCompositorInfo &compInfo);
|
||||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo);
|
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool MOSAIC, bool WILLPERFORMWINDOWTEST> void _CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue