GPU: Use the same technique in the commit 6bcd19b
GPUEngineBase::_CompositeVRAMLineDeferred() bug fix in order to do a tiny optimization to GPUEngineBase::_CompositeLineDeferred(). Also makes the code more consistent as well.
This commit is contained in:
parent
d0330fc96e
commit
2c5c2f6186
|
@ -3899,23 +3899,26 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
|
|||
CopyLineExpand<0xFFFF, false, false, 2>(this->_deferredColorCustom, this->_deferredColorNative, compInfo.line.widthCustom, 1);
|
||||
CopyLineExpand<0xFFFF, false, false, 1>(this->_deferredIndexCustom, this->_deferredIndexNative, compInfo.line.widthCustom, 1);
|
||||
|
||||
compInfo.target.xNative = 0;
|
||||
compInfo.target.xCustom = 0;
|
||||
compInfo.target.lineColor16 = (u16 *)compInfo.target.lineColorHead;
|
||||
compInfo.target.lineColor32 = (FragmentColor *)compInfo.target.lineColorHead;
|
||||
compInfo.target.lineLayerID = compInfo.target.lineLayerIDHead;
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = (compInfo.line.widthCustom - (compInfo.line.widthCustom % 16));
|
||||
const size_t ssePixCount = (compInfo.line.pixelCount - (compInfo.line.pixelCount % 16));
|
||||
const __m128i srcEffectEnableMask = compInfo.renderState.srcEffectEnable_SSE2[compInfo.renderState.selectedLayerID];
|
||||
#endif
|
||||
|
||||
for (size_t l = 0; l < compInfo.line.renderCount; l++)
|
||||
for (; i < ssePixCount; i+=16, compInfo.target.xCustom+=16, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16+=16, compInfo.target.lineColor32+=16, compInfo.target.lineLayerID+=16)
|
||||
{
|
||||
compInfo.target.xNative = 0;
|
||||
compInfo.target.xCustom = 0;
|
||||
if (compInfo.target.xCustom >= compInfo.line.widthCustom)
|
||||
{
|
||||
compInfo.target.xCustom -= compInfo.line.widthCustom;
|
||||
compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom];
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
for (; compInfo.target.xCustom < ssePixCount; compInfo.target.xCustom+=16, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16+=16, compInfo.target.lineColor32+=16, compInfo.target.lineLayerID+=16)
|
||||
{
|
||||
__m128i passMask8;
|
||||
|
||||
if (WILLPERFORMWINDOWTEST)
|
||||
|
@ -3976,8 +3979,14 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
|
|||
#ifdef ENABLE_SSE2
|
||||
#pragma LOOPVECTORIZE_DISABLE
|
||||
#endif
|
||||
for (; compInfo.target.xCustom < compInfo.line.widthCustom; compInfo.target.xCustom++, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
|
||||
for (; i < compInfo.line.pixelCount; i++, compInfo.target.xCustom++, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16++, compInfo.target.lineColor32++, compInfo.target.lineLayerID++)
|
||||
{
|
||||
if (compInfo.target.xCustom >= compInfo.line.widthCustom)
|
||||
{
|
||||
compInfo.target.xCustom -= compInfo.line.widthCustom;
|
||||
compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom];
|
||||
}
|
||||
|
||||
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] == 0) )
|
||||
{
|
||||
continue;
|
||||
|
@ -3992,7 +4001,6 @@ void GPUEngineBase::_CompositeLineDeferred(GPUEngineCompositorInfo &compInfo)
|
|||
this->_PixelComposite<COMPOSITORMODE, OUTPUTFORMAT, GPULayerType_BG>(compInfo, this->_deferredColorCustom[compInfo.target.xCustom], 0, enableColorEffect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool MOSAIC, bool WILLPERFORMWINDOWTEST>
|
||||
void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo, const void *__restrict vramColorPtr)
|
||||
|
@ -4006,14 +4014,15 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
|
|||
size_t i = 0;
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t ssePixCount = (compInfo.line.pixelCount - (compInfo.line.pixelCount % 16));
|
||||
const __m128i srcEffectEnableMask = compInfo.renderState.srcEffectEnable_SSE2[compInfo.renderState.selectedLayerID];
|
||||
|
||||
const size_t ssePixCount = (compInfo.line.pixelCount - (compInfo.line.pixelCount % 16));
|
||||
for (; i < ssePixCount; i+=16, compInfo.target.xCustom+=16, compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom], compInfo.target.lineColor16+=16, compInfo.target.lineColor32+=16, compInfo.target.lineLayerID+=16)
|
||||
{
|
||||
if (compInfo.target.xCustom >= compInfo.line.widthCustom)
|
||||
{
|
||||
compInfo.target.xCustom -= compInfo.line.widthCustom;
|
||||
compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom];
|
||||
}
|
||||
|
||||
__m128i src[4];
|
||||
|
@ -4084,6 +4093,7 @@ void GPUEngineBase::_CompositeVRAMLineDeferred(GPUEngineCompositorInfo &compInfo
|
|||
if (compInfo.target.xCustom >= compInfo.line.widthCustom)
|
||||
{
|
||||
compInfo.target.xCustom -= compInfo.line.widthCustom;
|
||||
compInfo.target.xNative = _gpuDstToSrcIndex[compInfo.target.xCustom];
|
||||
}
|
||||
|
||||
if ( WILLPERFORMWINDOWTEST && (this->_didPassWindowTestNative[compInfo.renderState.selectedLayerID][compInfo.target.xNative] == 0) )
|
||||
|
|
Loading…
Reference in New Issue