mirror of https://github.com/PCSX2/pcsx2.git
GS: ReadColumn8 AVX2 path
This commit is contained in:
parent
244a4da28a
commit
bdc7dc2cd8
|
@ -20,6 +20,9 @@ CONSTINIT const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6,
|
|||
CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
||||
CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
|
||||
CONSTINIT const GSVector4i GSBlock::m_avx2_r8mask1(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
|
||||
CONSTINIT const GSVector4i GSBlock::m_avx2_r8mask2(1, 5, 9, 13, 0, 4, 8, 12, 3, 7, 11, 15, 2, 6, 10, 14);
|
||||
|
||||
CONSTINIT const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
||||
CONSTINIT const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
|
||||
CONSTINIT const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
|
||||
|
|
|
@ -25,6 +25,9 @@ class GSBlock
|
|||
static const GSVector4i m_r8mask;
|
||||
static const GSVector4i m_r4mask;
|
||||
|
||||
static const GSVector4i m_avx2_r8mask1;
|
||||
static const GSVector4i m_avx2_r8mask2;
|
||||
|
||||
static const GSVector4i m_uw8hmask0;
|
||||
static const GSVector4i m_uw8hmask1;
|
||||
static const GSVector4i m_uw8hmask2;
|
||||
|
@ -512,30 +515,34 @@ public:
|
|||
|
||||
//for(int j = 0; j < 64; j++) ((u8*)src)[j] = (u8)j;
|
||||
|
||||
#if 0 //_M_SSE >= 0x501
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
const GSVector8i* s = (const GSVector8i*)src;
|
||||
|
||||
GSVector8i v0 = s[i * 2 + 0];
|
||||
GSVector8i v1 = s[i * 2 + 1];
|
||||
GSVector8i v0, v1;
|
||||
|
||||
GSVector8i::sw8(v0, v1);
|
||||
GSVector8i::sw16(v0, v1);
|
||||
GSVector8i::sw8(v0, v1);
|
||||
GSVector8i::sw128(v0, v1);
|
||||
GSVector8i::sw16(v0, v1);
|
||||
if ((i & 1) == 0)
|
||||
{
|
||||
v0 = s[i * 2 + 0];
|
||||
v1 = s[i * 2 + 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
v1 = s[i * 2 + 0];
|
||||
v0 = s[i * 2 + 1];
|
||||
}
|
||||
|
||||
v0 = v0.acbd();
|
||||
v1 = v1.acbd();
|
||||
v1 = v1.yxwz();
|
||||
GSVector8i v2 = v0.acbd().shuffle8(GSVector8i::broadcast128(m_avx2_r8mask1));
|
||||
GSVector8i v3 = v1.acbd().shuffle8(GSVector8i::broadcast128(m_avx2_r8mask2));
|
||||
|
||||
v0 = v2.blend32<0xaa>(v3);
|
||||
v1 = v3.blend32<0xaa>(v2);
|
||||
|
||||
GSVector8i::storel(&dst[dstpitch * 0], v0);
|
||||
GSVector8i::storeh(&dst[dstpitch * 1], v0);
|
||||
GSVector8i::storel(&dst[dstpitch * 2], v1);
|
||||
GSVector8i::storeh(&dst[dstpitch * 3], v1);
|
||||
|
||||
// TODO: not sure if this is worth it, not in this form, there should be a shorter path
|
||||
|
||||
#else
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
|
|
@ -316,6 +316,12 @@ public:
|
|||
return GSVector8i(_mm256_blend_epi16(m, a, mask));
|
||||
}
|
||||
|
||||
template <int mask>
|
||||
__forceinline GSVector8i blend32(const GSVector8i& a) const
|
||||
{
|
||||
return GSVector8i(_mm256_blend_epi32(m, a, mask));
|
||||
}
|
||||
|
||||
__forceinline GSVector8i blend(const GSVector8i& a, const GSVector8i& mask) const
|
||||
{
|
||||
return GSVector8i(_mm256_or_si256(_mm256_andnot_si256(mask, m), _mm256_and_si256(mask, a)));
|
||||
|
|
Loading…
Reference in New Issue