GS: ReadAndExpandBlock4H_32 improvements

This commit is contained in:
TellowKrinkle 2021-06-26 00:46:41 -05:00 committed by refractionpcsx2
parent c24ed8d47b
commit 5d3cbf8d1a
2 changed files with 7 additions and 9 deletions

View File

@ -20,6 +20,8 @@ CONSTINIT const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6,
CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15); CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15);
CONSTINIT const GSVector4i GSBlock::m_w4mask(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15); CONSTINIT const GSVector4i GSBlock::m_w4mask(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15);
CONSTINIT const GSVector4i GSBlock::m_r4hmask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
CONSTINIT const GSVector4i GSBlock::m_r4hmask_avx2(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
CONSTINIT const GSVector4i GSBlock::m_palvec_mask(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); CONSTINIT const GSVector4i GSBlock::m_palvec_mask(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
CONSTINIT const GSVector4i GSBlock::m_avx2_r8mask1(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); CONSTINIT const GSVector4i GSBlock::m_avx2_r8mask1(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);

View File

@ -25,6 +25,8 @@ class GSBlock
static const GSVector4i m_r8mask; static const GSVector4i m_r8mask;
static const GSVector4i m_r4mask; static const GSVector4i m_r4mask;
static const GSVector4i m_w4mask; static const GSVector4i m_w4mask;
static const GSVector4i m_r4hmask;
static const GSVector4i m_r4hmask_avx2;
static const GSVector4i m_palvec_mask; static const GSVector4i m_palvec_mask;
static const GSVector4i m_avx2_r8mask1; static const GSVector4i m_avx2_r8mask1;
@ -1865,6 +1867,7 @@ public:
GSVector8i p0, p1, p2, p3; GSVector8i p0, p1, p2, p3;
LoadPalVecs(pal, p0, p1, p2, p3); LoadPalVecs(pal, p0, p1, p2, p3);
GSVector8i maskvec(mask); GSVector8i maskvec(mask);
GSVector8i shufvec = GSVector8i::broadcast128(m_r4hmask_avx2);
GSVector8i v0, v1, v2, v3; GSVector8i v0, v1, v2, v3;
@ -1880,12 +1883,7 @@ public:
v2 = s[i * 4 + 2] >> shift; v2 = s[i * 4 + 2] >> shift;
v3 = s[i * 4 + 3] >> shift; v3 = s[i * 4 + 3] >> shift;
GSVector8i::sw128(v0, v1); GSVector8i all = v0.ps32(v1).pu16(v2.ps32(v3)).xzyw().acbd().shuffle8(shufvec);
GSVector8i::sw64(v0, v1);
GSVector8i::sw128(v2, v3);
GSVector8i::sw64(v2, v3);
GSVector8i all = v0.ps32(v1).pu16(v2.ps32(v3));
if (mask != 0xffffffff) if (mask != 0xffffffff)
all = all & mask; all = all & mask;
@ -1914,9 +1912,7 @@ public:
v2 = s[i * 4 + 2] >> shift; v2 = s[i * 4 + 2] >> shift;
v3 = s[i * 4 + 3] >> shift; v3 = s[i * 4 + 3] >> shift;
GSVector4i::sw64(v0, v1, v2, v3); GSVector4i all = v0.ps32(v1).pu16(v2.ps32(v3)).shuffle8(m_r4hmask);
GSVector4i all = v0.ps32(v1).pu16(v2.ps32(v3));
if (mask != 0xffffffff) if (mask != 0xffffffff)
all = all & mask; all = all & mask;