GPU:
- Optimize the loading of destination blending masks if SSSE3 is available.
This commit is contained in:
parent
67989f5260
commit
7542718cde
|
@ -26,14 +26,6 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
|
||||||
#include <emmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_SSSE3
|
|
||||||
#include <tmmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "MMU.h"
|
#include "MMU.h"
|
||||||
#include "FIFO.h"
|
#include "FIFO.h"
|
||||||
|
@ -448,6 +440,10 @@ void GPUEngineBase::_Reset_Base()
|
||||||
this->_blend2[GPULayerID_OBJ] = false;
|
this->_blend2[GPULayerID_OBJ] = false;
|
||||||
this->_blend2[GPULayerID_Backdrop] = false;
|
this->_blend2[GPULayerID_Backdrop] = false;
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
this->_blend2_SSSE3 = _mm_setzero_si128();
|
||||||
|
#endif
|
||||||
|
|
||||||
this->_isMasterBrightFullIntensity = false;
|
this->_isMasterBrightFullIntensity = false;
|
||||||
|
|
||||||
this->_spriteRenderMode = SpriteRenderMode_Sprite1D;
|
this->_spriteRenderMode = SpriteRenderMode_Sprite1D;
|
||||||
|
@ -1636,6 +1632,10 @@ FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(const size_t dstX,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(srcEffectEnableValue), _mm_set1_epi8(1));
|
const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(srcEffectEnableValue), _mm_set1_epi8(1));
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
__m128i dstEffectEnableMask = _mm_shuffle_epi8(this->_blend2_SSSE3, dstLayerID_vec128);
|
||||||
|
#else
|
||||||
__m128i dstEffectEnableMask = _mm_set_epi8(this->_blend2[dstLayerIDLine[15]],
|
__m128i dstEffectEnableMask = _mm_set_epi8(this->_blend2[dstLayerIDLine[15]],
|
||||||
this->_blend2[dstLayerIDLine[14]],
|
this->_blend2[dstLayerIDLine[14]],
|
||||||
this->_blend2[dstLayerIDLine[13]],
|
this->_blend2[dstLayerIDLine[13]],
|
||||||
|
@ -1652,6 +1652,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel16_SSE2(const size_t dstX,
|
||||||
this->_blend2[dstLayerIDLine[ 2]],
|
this->_blend2[dstLayerIDLine[ 2]],
|
||||||
this->_blend2[dstLayerIDLine[ 1]],
|
this->_blend2[dstLayerIDLine[ 1]],
|
||||||
this->_blend2[dstLayerIDLine[ 0]]);
|
this->_blend2[dstLayerIDLine[ 0]]);
|
||||||
|
#endif
|
||||||
|
|
||||||
dstEffectEnableMask = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi8(dstLayerID_vec128, _mm_set1_epi8(LAYERID)), _mm_set1_epi32(0xFFFFFFFF)),
|
dstEffectEnableMask = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi8(dstLayerID_vec128, _mm_set1_epi8(LAYERID)), _mm_set1_epi32(0xFFFFFFFF)),
|
||||||
_mm_xor_si128(_mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
_mm_xor_si128(_mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
||||||
|
@ -1800,6 +1801,10 @@ FORCEINLINE void GPUEngineBase::_RenderPixel8_SSE2(const size_t dstX,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const __m128i srcEffectEnableMask = _mm_cmpeq_epi16(_mm_set1_epi16(srcEffectEnableValue), _mm_set1_epi16(1));
|
const __m128i srcEffectEnableMask = _mm_cmpeq_epi16(_mm_set1_epi16(srcEffectEnableValue), _mm_set1_epi16(1));
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
__m128i dstEffectEnableMask = _mm_unpacklo_epi8( _mm_shuffle_epi8(this->_blend2_SSSE3, dstLayerID_vec128), _mm_setzero_si128() );
|
||||||
|
#else
|
||||||
__m128i dstEffectEnableMask = _mm_set_epi16(this->_blend2[dstLayerIDLine[7]],
|
__m128i dstEffectEnableMask = _mm_set_epi16(this->_blend2[dstLayerIDLine[7]],
|
||||||
this->_blend2[dstLayerIDLine[6]],
|
this->_blend2[dstLayerIDLine[6]],
|
||||||
this->_blend2[dstLayerIDLine[5]],
|
this->_blend2[dstLayerIDLine[5]],
|
||||||
|
@ -1808,6 +1813,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel8_SSE2(const size_t dstX,
|
||||||
this->_blend2[dstLayerIDLine[2]],
|
this->_blend2[dstLayerIDLine[2]],
|
||||||
this->_blend2[dstLayerIDLine[1]],
|
this->_blend2[dstLayerIDLine[1]],
|
||||||
this->_blend2[dstLayerIDLine[0]]);
|
this->_blend2[dstLayerIDLine[0]]);
|
||||||
|
#endif
|
||||||
|
|
||||||
dstEffectEnableMask = _mm_and_si128( _mm_xor_si128(_mm_cmpeq_epi16(_mm_unpacklo_epi8(dstLayerID_vec128, _mm_setzero_si128()), _mm_set1_epi16(LAYERID)), _mm_set1_epi32(0xFFFFFFFF)),
|
dstEffectEnableMask = _mm_and_si128( _mm_xor_si128(_mm_cmpeq_epi16(_mm_unpacklo_epi8(dstLayerID_vec128, _mm_setzero_si128()), _mm_set1_epi16(LAYERID)), _mm_set1_epi32(0xFFFFFFFF)),
|
||||||
_mm_xor_si128(_mm_cmpeq_epi16(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
_mm_xor_si128(_mm_cmpeq_epi16(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
||||||
|
@ -2008,6 +2014,10 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(const size_t dstX,
|
||||||
|
|
||||||
const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT;
|
const IOREG_BLDCNT &BLDCNT = this->_IORegisterMap->BLDCNT;
|
||||||
const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(BLDCNT.BG0_Target1), _mm_set1_epi8(1));
|
const __m128i srcEffectEnableMask = _mm_cmpeq_epi8(_mm_set1_epi8(BLDCNT.BG0_Target1), _mm_set1_epi8(1));
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
__m128i dstEffectEnableMask = _mm_shuffle_epi8(this->_blend2_SSSE3, dstLayerID_vec128);
|
||||||
|
#else
|
||||||
__m128i dstEffectEnableMask = _mm_set_epi8(this->_blend2[dstLayerIDLine[15]],
|
__m128i dstEffectEnableMask = _mm_set_epi8(this->_blend2[dstLayerIDLine[15]],
|
||||||
this->_blend2[dstLayerIDLine[14]],
|
this->_blend2[dstLayerIDLine[14]],
|
||||||
this->_blend2[dstLayerIDLine[13]],
|
this->_blend2[dstLayerIDLine[13]],
|
||||||
|
@ -2024,6 +2034,7 @@ FORCEINLINE void GPUEngineBase::_RenderPixel3D_SSE2(const size_t dstX,
|
||||||
this->_blend2[dstLayerIDLine[ 2]],
|
this->_blend2[dstLayerIDLine[ 2]],
|
||||||
this->_blend2[dstLayerIDLine[ 1]],
|
this->_blend2[dstLayerIDLine[ 1]],
|
||||||
this->_blend2[dstLayerIDLine[ 0]]);
|
this->_blend2[dstLayerIDLine[ 0]]);
|
||||||
|
#endif
|
||||||
|
|
||||||
dstEffectEnableMask = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi8(dstLayerID_vec128, _mm_set1_epi8(GPULayerID_BG0)), _mm_set1_epi32(0xFFFFFFFF)),
|
dstEffectEnableMask = _mm_and_si128(_mm_xor_si128(_mm_cmpeq_epi8(dstLayerID_vec128, _mm_set1_epi8(GPULayerID_BG0)), _mm_set1_epi32(0xFFFFFFFF)),
|
||||||
_mm_xor_si128(_mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
_mm_xor_si128(_mm_cmpeq_epi8(dstEffectEnableMask, _mm_setzero_si128()), _mm_set1_epi32(0xFFFFFFFF)) );
|
||||||
|
@ -3676,6 +3687,16 @@ void GPUEngineBase::ParseReg_BLDCNT()
|
||||||
this->_blend2[GPULayerID_BG3] = (BLDCNT.BG3_Target2 != 0);
|
this->_blend2[GPULayerID_BG3] = (BLDCNT.BG3_Target2 != 0);
|
||||||
this->_blend2[GPULayerID_OBJ] = (BLDCNT.OBJ_Target2 != 0);
|
this->_blend2[GPULayerID_OBJ] = (BLDCNT.OBJ_Target2 != 0);
|
||||||
this->_blend2[GPULayerID_Backdrop] = (BLDCNT.Backdrop_Target2 != 0);
|
this->_blend2[GPULayerID_Backdrop] = (BLDCNT.Backdrop_Target2 != 0);
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
this->_blend2_SSSE3 = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
BLDCNT.Backdrop_Target2,
|
||||||
|
BLDCNT.OBJ_Target2,
|
||||||
|
BLDCNT.BG3_Target2,
|
||||||
|
BLDCNT.BG2_Target2,
|
||||||
|
BLDCNT.BG1_Target2,
|
||||||
|
BLDCNT.BG0_Target2);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUEngineBase::ParseReg_BLDALPHA()
|
void GPUEngineBase::ParseReg_BLDALPHA()
|
||||||
|
|
|
@ -30,6 +30,10 @@
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
#include <tmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
class GPUEngineBase;
|
class GPUEngineBase;
|
||||||
class EMUFILE;
|
class EMUFILE;
|
||||||
struct MMU_struct;
|
struct MMU_struct;
|
||||||
|
@ -1160,6 +1164,9 @@ protected:
|
||||||
u8 _sprBMPBoundary;
|
u8 _sprBMPBoundary;
|
||||||
|
|
||||||
bool _blend2[6];
|
bool _blend2[6];
|
||||||
|
#ifdef ENABLE_SSSE3
|
||||||
|
__m128i _blend2_SSSE3;
|
||||||
|
#endif
|
||||||
|
|
||||||
TBlendTable *_blendTable;
|
TBlendTable *_blendTable;
|
||||||
u16 *_currentFadeInColors;
|
u16 *_currentFadeInColors;
|
||||||
|
|
Loading…
Reference in New Issue