From 5d3cbf8d1a5bcdd8eae6e01373202630322dd5de Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sat, 26 Jun 2021 00:46:41 -0500 Subject: [PATCH] GS: ReadAndExpandBlock4H_32 improvements --- pcsx2/GS/GSBlock.cpp | 2 ++ pcsx2/GS/GSBlock.h | 14 +++++--------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pcsx2/GS/GSBlock.cpp b/pcsx2/GS/GSBlock.cpp index f89353c14e..b3bd16aa9a 100644 --- a/pcsx2/GS/GSBlock.cpp +++ b/pcsx2/GS/GSBlock.cpp @@ -20,6 +20,8 @@ CONSTINIT const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 8, 4, 12, 1, 9, 5, 13, 2, 10, 6, 14, 3, 11, 7, 15); CONSTINIT const GSVector4i GSBlock::m_w4mask(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7, 11, 15); +CONSTINIT const GSVector4i GSBlock::m_r4hmask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); +CONSTINIT const GSVector4i GSBlock::m_r4hmask_avx2(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15); CONSTINIT const GSVector4i GSBlock::m_palvec_mask(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); CONSTINIT const GSVector4i GSBlock::m_avx2_r8mask1(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); diff --git a/pcsx2/GS/GSBlock.h b/pcsx2/GS/GSBlock.h index 8a47adac4c..eb475184bd 100644 --- a/pcsx2/GS/GSBlock.h +++ b/pcsx2/GS/GSBlock.h @@ -25,6 +25,8 @@ class GSBlock static const GSVector4i m_r8mask; static const GSVector4i m_r4mask; static const GSVector4i m_w4mask; + static const GSVector4i m_r4hmask; + static const GSVector4i m_r4hmask_avx2; static const GSVector4i m_palvec_mask; static const GSVector4i m_avx2_r8mask1; @@ -1865,6 +1867,7 @@ public: GSVector8i p0, p1, p2, p3; LoadPalVecs(pal, p0, p1, p2, p3); GSVector8i maskvec(mask); + GSVector8i shufvec = GSVector8i::broadcast128(m_r4hmask_avx2); GSVector8i v0, v1, v2, v3; @@ -1880,12 +1883,7 @@ public: v2 = s[i * 4 + 2] >> shift; v3 = s[i * 4 + 3] >> shift; - GSVector8i::sw128(v0, v1); - GSVector8i::sw64(v0, v1); - GSVector8i::sw128(v2, v3); - GSVector8i::sw64(v2, v3); - - GSVector8i all = v0.ps32(v1).pu16(v2.ps32(v3)); + GSVector8i all = v0.ps32(v1).pu16(v2.ps32(v3)).xzyw().acbd().shuffle8(shufvec); if (mask != 0xffffffff) all = all & mask; @@ -1914,9 +1912,7 @@ public: v2 = s[i * 4 + 2] >> shift; v3 = s[i * 4 + 3] >> shift; - GSVector4i::sw64(v0, v1, v2, v3); - - GSVector4i all = v0.ps32(v1).pu16(v2.ps32(v3)); + GSVector4i all = v0.ps32(v1).pu16(v2.ps32(v3)).shuffle8(m_r4hmask); if (mask != 0xffffffff) all = all & mask;