mirror of https://github.com/PCSX2/pcsx2.git
GS: Use broadcast loads on AVX2
Broadcast loads are free on AVX2 processors, might as well use them
This commit is contained in:
parent
793ba944d6
commit
1f6b2e629b
|
@ -16,11 +16,7 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
#include "GSBlock.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
CONSTINIT const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
#else
|
||||
CONSTINIT const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
#endif
|
||||
CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
||||
CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
|
||||
|
|
|
@ -21,11 +21,7 @@
|
|||
|
||||
class GSBlock
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8i m_r16mask;
|
||||
#else
|
||||
static const GSVector4i m_r16mask;
|
||||
#endif
|
||||
static const GSVector4i m_r8mask;
|
||||
static const GSVector4i m_r4mask;
|
||||
|
||||
|
@ -490,8 +486,10 @@ public:
|
|||
|
||||
const GSVector8i* s = (const GSVector8i*)src;
|
||||
|
||||
GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask);
|
||||
GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask);
|
||||
GSVector8i mask = GSVector8i::broadcast128(m_r16mask);
|
||||
|
||||
GSVector8i v0 = s[i * 2 + 0].shuffle8(mask);
|
||||
GSVector8i v1 = s[i * 2 + 1].shuffle8(mask);
|
||||
|
||||
GSVector8i::sw128(v0, v1);
|
||||
GSVector8i::sw32(v0, v1);
|
||||
|
@ -1637,10 +1635,12 @@ public:
|
|||
GSVector8i TA0(TEXA.TA0 << 24);
|
||||
GSVector8i TA1(TEXA.TA1 << 24);
|
||||
|
||||
GSVector8i mask = GSVector8i::broadcast128(m_r16mask);
|
||||
|
||||
for (int i = 0; i < 4; i++, dst += dstpitch * 2)
|
||||
{
|
||||
GSVector8i v0 = s[i * 2 + 0].shuffle8(m_r16mask);
|
||||
GSVector8i v1 = s[i * 2 + 1].shuffle8(m_r16mask);
|
||||
GSVector8i v0 = s[i * 2 + 0].shuffle8(mask);
|
||||
GSVector8i v1 = s[i * 2 + 1].shuffle8(mask);
|
||||
|
||||
GSVector8i::sw128(v0, v1);
|
||||
GSVector8i::sw32(v0, v1);
|
||||
|
|
Loading…
Reference in New Issue