GS: Use templates for shift immediates

Also removes the __m128 overloads - it's too easy to mistake for a
variable shift (which doesn't exist in SSE4), instead it takes the shift
amount from the lowest 32-bits.
This commit is contained in:
Stenzek 2023-12-29 22:20:58 +10:00 committed by Connor McLaughlin
parent b2a0dba3bb
commit 47f8d8c71c
9 changed files with 511 additions and 596 deletions

View File

@ -837,8 +837,8 @@ void GSClut::Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const G
c = s[i];
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15));
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15));
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16<15>());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16<15>());
}
}
else
@ -848,8 +848,8 @@ void GSClut::Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const G
c = s[i];
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16(15)).andnot(ch == GSVector4i::zero());
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16<15>()).andnot(cl == GSVector4i::zero());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA0.blend8(TA1, ch.sra16<15>()).andnot(ch == GSVector4i::zero());
}
}
}

View File

@ -97,7 +97,7 @@ void GSDrawingContext::UpdateScissor()
scissor.in = rscissor + GSVector4i::cxpr(0, 0, 1, 1);
// Fixed-point scissor min/max, used for rejecting primitives which are entirely outside.
scissor.cull = rscissor.sll32(4);
scissor.cull = rscissor.sll32<4>();
// Offset applied to vertices for culling, zw is for native resolution culling
// We want to round subpixels down, because at least one pixel gets filled per scanline.

View File

@ -262,7 +262,6 @@ union name \
#define REG128_SET(name) \
union name \
{ \
__m128i m128; \
u64 U64[2]; \
u32 U32[4];

View File

@ -594,7 +594,7 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
GSVector4i zf = xy.zwzw();
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
zf = zf.srl32<4>() & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf);
@ -654,7 +654,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u3
GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]);
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
zf = zf.srl32<4>() & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
@ -784,7 +784,7 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
const GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
const GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff()));
const GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>());
const GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32<24>().srl<4>());
m_v.m[1] = xyz.upl64(uvf);
@ -3363,7 +3363,7 @@ __forceinline void GSState::VertexKick(u32 skip)
// integer coordinates for culling at native resolution, and the fixed point for all others. The XY offset has to be
// applied, then we split it into the fixed/integer portions.
const GSVector4i xy_ofs = new_v1.xxxx().u16to32().sub32(m_xyof);
const GSVector4i xy = xy_ofs.blend32<12>(xy_ofs.sra32(4));
const GSVector4i xy = xy_ofs.blend32<12>(xy_ofs.sra32<4>());
m_vertex.xy[xy_tail & 3] = xy;
// Backup head for triangle fans so we can read it later, otherwise it'll get lost after the 4th vertex.

View File

@ -153,7 +153,7 @@ public:
{
GSVector4i v((int)u);
*this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
*this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32<31>()));
}
__forceinline explicit GSVector4(const GSVector4i& v);
@ -643,7 +643,7 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
{
GSVector4i v = GSVector4i::load((int)u);
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32<31>()));
}
template <bool aligned>

View File

@ -706,14 +706,16 @@ public:
return GSVector4i(_mm_slli_si128(m, i));
}
__forceinline GSVector4i sra16(int i) const
template <int i>
__forceinline GSVector4i sra16() const
{
return GSVector4i(_mm_srai_epi16(m, i));
}
__forceinline GSVector4i sra16(__m128i i) const
template <int i>
__forceinline GSVector4i sra32() const
{
return GSVector4i(_mm_sra_epi16(m, i));
return GSVector4i(_mm_srai_epi32(m, i));
}
__forceinline GSVector4i sra32(int i) const
@ -721,11 +723,6 @@ public:
return GSVector4i(_mm_srai_epi32(m, i));
}
__forceinline GSVector4i sra32(__m128i i) const
{
return GSVector4i(_mm_sra_epi32(m, i));
}
#if _M_SSE >= 0x501
__forceinline GSVector4i srav32(const GSVector4i& v) const
{
@ -733,26 +730,18 @@ public:
}
#endif
__forceinline GSVector4i sll16(int i) const
template<int i>
__forceinline GSVector4i sll16() const
{
return GSVector4i(_mm_slli_epi16(m, i));
}
__forceinline GSVector4i sll16(__m128i i) const
{
return GSVector4i(_mm_sll_epi16(m, i));
}
__forceinline GSVector4i sll32(int i) const
template <int i>
__forceinline GSVector4i sll32() const
{
return GSVector4i(_mm_slli_epi32(m, i));
}
__forceinline GSVector4i sll32(__m128i i) const
{
return GSVector4i(_mm_sll_epi32(m, i));
}
#if _M_SSE >= 0x501
__forceinline GSVector4i sllv32(const GSVector4i& v) const
{
@ -760,14 +749,16 @@ public:
}
#endif
__forceinline GSVector4i sll64(int i) const
template <int i>
__forceinline GSVector4i sll64() const
{
return GSVector4i(_mm_slli_epi64(m, i));
}
__forceinline GSVector4i sll64(__m128i i) const
template <int i>
__forceinline GSVector4i srl16() const
{
return GSVector4i(_mm_sll_epi64(m, i));
return GSVector4i(_mm_srli_epi16(m, i));
}
__forceinline GSVector4i srl16(int i) const
@ -775,9 +766,10 @@ public:
return GSVector4i(_mm_srli_epi16(m, i));
}
__forceinline GSVector4i srl16(__m128i i) const
template <int i>
__forceinline GSVector4i srl32() const
{
return GSVector4i(_mm_srl_epi16(m, i));
return GSVector4i(_mm_srli_epi32(m, i));
}
__forceinline GSVector4i srl32(int i) const
@ -785,11 +777,6 @@ public:
return GSVector4i(_mm_srli_epi32(m, i));
}
__forceinline GSVector4i srl32(__m128i i) const
{
return GSVector4i(_mm_srl_epi32(m, i));
}
#if _M_SSE >= 0x501
__forceinline GSVector4i srlv32(const GSVector4i& v) const
{
@ -797,16 +784,12 @@ public:
}
#endif
__forceinline GSVector4i srl64(int i) const
template <int i>
__forceinline GSVector4i srl64() const
{
return GSVector4i(_mm_srli_epi64(m, i));
}
__forceinline GSVector4i srl64(__m128i i) const
{
return GSVector4i(_mm_srl_epi64(m, i));
}
__forceinline GSVector4i add8(const GSVector4i& v) const
{
return GSVector4i(_mm_add_epi8(m, v.m));
@ -945,7 +928,7 @@ public:
{
// (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit)
return add16(a.sub16(*this).mul16l(f).sra16(4));
return add16(a.sub16(*this).mul16l(f).sra16<4>());
}
template <int shift>
@ -957,7 +940,7 @@ public:
return mul16hrs(f);
}
return sll16(shift + 1).mul16hs(f);
return sll16<shift + 1>().mul16hs(f);
}
__forceinline bool eq(const GSVector4i& v) const
@ -1988,199 +1971,199 @@ public:
__forceinline static GSVector4i xffffffff() { return zero() == zero(); }
__forceinline static GSVector4i x00000001() { return xffffffff().srl32(31); }
__forceinline static GSVector4i x00000003() { return xffffffff().srl32(30); }
__forceinline static GSVector4i x00000007() { return xffffffff().srl32(29); }
__forceinline static GSVector4i x0000000f() { return xffffffff().srl32(28); }
__forceinline static GSVector4i x0000001f() { return xffffffff().srl32(27); }
__forceinline static GSVector4i x0000003f() { return xffffffff().srl32(26); }
__forceinline static GSVector4i x0000007f() { return xffffffff().srl32(25); }
__forceinline static GSVector4i x000000ff() { return xffffffff().srl32(24); }
__forceinline static GSVector4i x000001ff() { return xffffffff().srl32(23); }
__forceinline static GSVector4i x000003ff() { return xffffffff().srl32(22); }
__forceinline static GSVector4i x000007ff() { return xffffffff().srl32(21); }
__forceinline static GSVector4i x00000fff() { return xffffffff().srl32(20); }
__forceinline static GSVector4i x00001fff() { return xffffffff().srl32(19); }
__forceinline static GSVector4i x00003fff() { return xffffffff().srl32(18); }
__forceinline static GSVector4i x00007fff() { return xffffffff().srl32(17); }
__forceinline static GSVector4i x0000ffff() { return xffffffff().srl32(16); }
__forceinline static GSVector4i x0001ffff() { return xffffffff().srl32(15); }
__forceinline static GSVector4i x0003ffff() { return xffffffff().srl32(14); }
__forceinline static GSVector4i x0007ffff() { return xffffffff().srl32(13); }
__forceinline static GSVector4i x000fffff() { return xffffffff().srl32(12); }
__forceinline static GSVector4i x001fffff() { return xffffffff().srl32(11); }
__forceinline static GSVector4i x003fffff() { return xffffffff().srl32(10); }
__forceinline static GSVector4i x007fffff() { return xffffffff().srl32( 9); }
__forceinline static GSVector4i x00ffffff() { return xffffffff().srl32( 8); }
__forceinline static GSVector4i x01ffffff() { return xffffffff().srl32( 7); }
__forceinline static GSVector4i x03ffffff() { return xffffffff().srl32( 6); }
__forceinline static GSVector4i x07ffffff() { return xffffffff().srl32( 5); }
__forceinline static GSVector4i x0fffffff() { return xffffffff().srl32( 4); }
__forceinline static GSVector4i x1fffffff() { return xffffffff().srl32( 3); }
__forceinline static GSVector4i x3fffffff() { return xffffffff().srl32( 2); }
__forceinline static GSVector4i x7fffffff() { return xffffffff().srl32( 1); }
__forceinline static GSVector4i x00000001() { return xffffffff().srl32<31>(); }
__forceinline static GSVector4i x00000003() { return xffffffff().srl32<30>(); }
__forceinline static GSVector4i x00000007() { return xffffffff().srl32<29>(); }
__forceinline static GSVector4i x0000000f() { return xffffffff().srl32<28>(); }
__forceinline static GSVector4i x0000001f() { return xffffffff().srl32<27>(); }
__forceinline static GSVector4i x0000003f() { return xffffffff().srl32<26>(); }
__forceinline static GSVector4i x0000007f() { return xffffffff().srl32<25>(); }
__forceinline static GSVector4i x000000ff() { return xffffffff().srl32<24>(); }
__forceinline static GSVector4i x000001ff() { return xffffffff().srl32<23>(); }
__forceinline static GSVector4i x000003ff() { return xffffffff().srl32<22>(); }
__forceinline static GSVector4i x000007ff() { return xffffffff().srl32<21>(); }
__forceinline static GSVector4i x00000fff() { return xffffffff().srl32<20>(); }
__forceinline static GSVector4i x00001fff() { return xffffffff().srl32<19>(); }
__forceinline static GSVector4i x00003fff() { return xffffffff().srl32<18>(); }
__forceinline static GSVector4i x00007fff() { return xffffffff().srl32<17>(); }
__forceinline static GSVector4i x0000ffff() { return xffffffff().srl32<16>(); }
__forceinline static GSVector4i x0001ffff() { return xffffffff().srl32<15>(); }
__forceinline static GSVector4i x0003ffff() { return xffffffff().srl32<14>(); }
__forceinline static GSVector4i x0007ffff() { return xffffffff().srl32<13>(); }
__forceinline static GSVector4i x000fffff() { return xffffffff().srl32<12>(); }
__forceinline static GSVector4i x001fffff() { return xffffffff().srl32<11>(); }
__forceinline static GSVector4i x003fffff() { return xffffffff().srl32<10>(); }
__forceinline static GSVector4i x007fffff() { return xffffffff().srl32< 9>(); }
__forceinline static GSVector4i x00ffffff() { return xffffffff().srl32< 8>(); }
__forceinline static GSVector4i x01ffffff() { return xffffffff().srl32< 7>(); }
__forceinline static GSVector4i x03ffffff() { return xffffffff().srl32< 6>(); }
__forceinline static GSVector4i x07ffffff() { return xffffffff().srl32< 5>(); }
__forceinline static GSVector4i x0fffffff() { return xffffffff().srl32< 4>(); }
__forceinline static GSVector4i x1fffffff() { return xffffffff().srl32< 3>(); }
__forceinline static GSVector4i x3fffffff() { return xffffffff().srl32< 2>(); }
__forceinline static GSVector4i x7fffffff() { return xffffffff().srl32< 1>(); }
__forceinline static GSVector4i x80000000() { return xffffffff().sll32(31); }
__forceinline static GSVector4i xc0000000() { return xffffffff().sll32(30); }
__forceinline static GSVector4i xe0000000() { return xffffffff().sll32(29); }
__forceinline static GSVector4i xf0000000() { return xffffffff().sll32(28); }
__forceinline static GSVector4i xf8000000() { return xffffffff().sll32(27); }
__forceinline static GSVector4i xfc000000() { return xffffffff().sll32(26); }
__forceinline static GSVector4i xfe000000() { return xffffffff().sll32(25); }
__forceinline static GSVector4i xff000000() { return xffffffff().sll32(24); }
__forceinline static GSVector4i xff800000() { return xffffffff().sll32(23); }
__forceinline static GSVector4i xffc00000() { return xffffffff().sll32(22); }
__forceinline static GSVector4i xffe00000() { return xffffffff().sll32(21); }
__forceinline static GSVector4i xfff00000() { return xffffffff().sll32(20); }
__forceinline static GSVector4i xfff80000() { return xffffffff().sll32(19); }
__forceinline static GSVector4i xfffc0000() { return xffffffff().sll32(18); }
__forceinline static GSVector4i xfffe0000() { return xffffffff().sll32(17); }
__forceinline static GSVector4i xffff0000() { return xffffffff().sll32(16); }
__forceinline static GSVector4i xffff8000() { return xffffffff().sll32(15); }
__forceinline static GSVector4i xffffc000() { return xffffffff().sll32(14); }
__forceinline static GSVector4i xffffe000() { return xffffffff().sll32(13); }
__forceinline static GSVector4i xfffff000() { return xffffffff().sll32(12); }
__forceinline static GSVector4i xfffff800() { return xffffffff().sll32(11); }
__forceinline static GSVector4i xfffffc00() { return xffffffff().sll32(10); }
__forceinline static GSVector4i xfffffe00() { return xffffffff().sll32( 9); }
__forceinline static GSVector4i xffffff00() { return xffffffff().sll32( 8); }
__forceinline static GSVector4i xffffff80() { return xffffffff().sll32( 7); }
__forceinline static GSVector4i xffffffc0() { return xffffffff().sll32( 6); }
__forceinline static GSVector4i xffffffe0() { return xffffffff().sll32( 5); }
__forceinline static GSVector4i xfffffff0() { return xffffffff().sll32( 4); }
__forceinline static GSVector4i xfffffff8() { return xffffffff().sll32( 3); }
__forceinline static GSVector4i xfffffffc() { return xffffffff().sll32( 2); }
__forceinline static GSVector4i xfffffffe() { return xffffffff().sll32( 1); }
__forceinline static GSVector4i x80000000() { return xffffffff().sll32<31>(); }
__forceinline static GSVector4i xc0000000() { return xffffffff().sll32<30>(); }
__forceinline static GSVector4i xe0000000() { return xffffffff().sll32<29>(); }
__forceinline static GSVector4i xf0000000() { return xffffffff().sll32<28>(); }
__forceinline static GSVector4i xf8000000() { return xffffffff().sll32<27>(); }
__forceinline static GSVector4i xfc000000() { return xffffffff().sll32<26>(); }
__forceinline static GSVector4i xfe000000() { return xffffffff().sll32<25>(); }
__forceinline static GSVector4i xff000000() { return xffffffff().sll32<24>(); }
__forceinline static GSVector4i xff800000() { return xffffffff().sll32<23>(); }
__forceinline static GSVector4i xffc00000() { return xffffffff().sll32<22>(); }
__forceinline static GSVector4i xffe00000() { return xffffffff().sll32<21>(); }
__forceinline static GSVector4i xfff00000() { return xffffffff().sll32<20>(); }
__forceinline static GSVector4i xfff80000() { return xffffffff().sll32<19>(); }
__forceinline static GSVector4i xfffc0000() { return xffffffff().sll32<18>(); }
__forceinline static GSVector4i xfffe0000() { return xffffffff().sll32<17>(); }
__forceinline static GSVector4i xffff0000() { return xffffffff().sll32<16>(); }
__forceinline static GSVector4i xffff8000() { return xffffffff().sll32<15>(); }
__forceinline static GSVector4i xffffc000() { return xffffffff().sll32<14>(); }
__forceinline static GSVector4i xffffe000() { return xffffffff().sll32<13>(); }
__forceinline static GSVector4i xfffff000() { return xffffffff().sll32<12>(); }
__forceinline static GSVector4i xfffff800() { return xffffffff().sll32<11>(); }
__forceinline static GSVector4i xfffffc00() { return xffffffff().sll32<10>(); }
__forceinline static GSVector4i xfffffe00() { return xffffffff().sll32< 9>(); }
__forceinline static GSVector4i xffffff00() { return xffffffff().sll32< 8>(); }
__forceinline static GSVector4i xffffff80() { return xffffffff().sll32< 7>(); }
__forceinline static GSVector4i xffffffc0() { return xffffffff().sll32< 6>(); }
__forceinline static GSVector4i xffffffe0() { return xffffffff().sll32< 5>(); }
__forceinline static GSVector4i xfffffff0() { return xffffffff().sll32< 4>(); }
__forceinline static GSVector4i xfffffff8() { return xffffffff().sll32< 3>(); }
__forceinline static GSVector4i xfffffffc() { return xffffffff().sll32< 2>(); }
__forceinline static GSVector4i xfffffffe() { return xffffffff().sll32< 1>(); }
__forceinline static GSVector4i x0001() { return xffffffff().srl16(15); }
__forceinline static GSVector4i x0003() { return xffffffff().srl16(14); }
__forceinline static GSVector4i x0007() { return xffffffff().srl16(13); }
__forceinline static GSVector4i x000f() { return xffffffff().srl16(12); }
__forceinline static GSVector4i x001f() { return xffffffff().srl16(11); }
__forceinline static GSVector4i x003f() { return xffffffff().srl16(10); }
__forceinline static GSVector4i x007f() { return xffffffff().srl16( 9); }
__forceinline static GSVector4i x00ff() { return xffffffff().srl16( 8); }
__forceinline static GSVector4i x01ff() { return xffffffff().srl16( 7); }
__forceinline static GSVector4i x03ff() { return xffffffff().srl16( 6); }
__forceinline static GSVector4i x07ff() { return xffffffff().srl16( 5); }
__forceinline static GSVector4i x0fff() { return xffffffff().srl16( 4); }
__forceinline static GSVector4i x1fff() { return xffffffff().srl16( 3); }
__forceinline static GSVector4i x3fff() { return xffffffff().srl16( 2); }
__forceinline static GSVector4i x7fff() { return xffffffff().srl16( 1); }
__forceinline static GSVector4i x0001() { return xffffffff().srl16<15>(); }
__forceinline static GSVector4i x0003() { return xffffffff().srl16<14>(); }
__forceinline static GSVector4i x0007() { return xffffffff().srl16<13>(); }
__forceinline static GSVector4i x000f() { return xffffffff().srl16<12>(); }
__forceinline static GSVector4i x001f() { return xffffffff().srl16<11>(); }
__forceinline static GSVector4i x003f() { return xffffffff().srl16<10>(); }
__forceinline static GSVector4i x007f() { return xffffffff().srl16< 9>(); }
__forceinline static GSVector4i x00ff() { return xffffffff().srl16< 8>(); }
__forceinline static GSVector4i x01ff() { return xffffffff().srl16< 7>(); }
__forceinline static GSVector4i x03ff() { return xffffffff().srl16< 6>(); }
__forceinline static GSVector4i x07ff() { return xffffffff().srl16< 5>(); }
__forceinline static GSVector4i x0fff() { return xffffffff().srl16< 4>(); }
__forceinline static GSVector4i x1fff() { return xffffffff().srl16< 3>(); }
__forceinline static GSVector4i x3fff() { return xffffffff().srl16< 2>(); }
__forceinline static GSVector4i x7fff() { return xffffffff().srl16< 1>(); }
__forceinline static GSVector4i x8000() { return xffffffff().sll16(15); }
__forceinline static GSVector4i xc000() { return xffffffff().sll16(14); }
__forceinline static GSVector4i xe000() { return xffffffff().sll16(13); }
__forceinline static GSVector4i xf000() { return xffffffff().sll16(12); }
__forceinline static GSVector4i xf800() { return xffffffff().sll16(11); }
__forceinline static GSVector4i xfc00() { return xffffffff().sll16(10); }
__forceinline static GSVector4i xfe00() { return xffffffff().sll16( 9); }
__forceinline static GSVector4i xff00() { return xffffffff().sll16( 8); }
__forceinline static GSVector4i xff80() { return xffffffff().sll16( 7); }
__forceinline static GSVector4i xffc0() { return xffffffff().sll16( 6); }
__forceinline static GSVector4i xffe0() { return xffffffff().sll16( 5); }
__forceinline static GSVector4i xfff0() { return xffffffff().sll16( 4); }
__forceinline static GSVector4i xfff8() { return xffffffff().sll16( 3); }
__forceinline static GSVector4i xfffc() { return xffffffff().sll16( 2); }
__forceinline static GSVector4i xfffe() { return xffffffff().sll16( 1); }
__forceinline static GSVector4i x8000() { return xffffffff().sll16<15>(); }
__forceinline static GSVector4i xc000() { return xffffffff().sll16<14>(); }
__forceinline static GSVector4i xe000() { return xffffffff().sll16<13>(); }
__forceinline static GSVector4i xf000() { return xffffffff().sll16<12>(); }
__forceinline static GSVector4i xf800() { return xffffffff().sll16<11>(); }
__forceinline static GSVector4i xfc00() { return xffffffff().sll16<10>(); }
__forceinline static GSVector4i xfe00() { return xffffffff().sll16< 9>(); }
__forceinline static GSVector4i xff00() { return xffffffff().sll16< 8>(); }
__forceinline static GSVector4i xff80() { return xffffffff().sll16< 7>(); }
__forceinline static GSVector4i xffc0() { return xffffffff().sll16< 6>(); }
__forceinline static GSVector4i xffe0() { return xffffffff().sll16< 5>(); }
__forceinline static GSVector4i xfff0() { return xffffffff().sll16< 4>(); }
__forceinline static GSVector4i xfff8() { return xffffffff().sll16< 3>(); }
__forceinline static GSVector4i xfffc() { return xffffffff().sll16< 2>(); }
__forceinline static GSVector4i xfffe() { return xffffffff().sll16< 1>(); }
__forceinline static GSVector4i xffffffff(const GSVector4i& v) { return v == v; }
__forceinline static GSVector4i x00000001(const GSVector4i& v) { return xffffffff(v).srl32(31); }
__forceinline static GSVector4i x00000003(const GSVector4i& v) { return xffffffff(v).srl32(30); }
__forceinline static GSVector4i x00000007(const GSVector4i& v) { return xffffffff(v).srl32(29); }
__forceinline static GSVector4i x0000000f(const GSVector4i& v) { return xffffffff(v).srl32(28); }
__forceinline static GSVector4i x0000001f(const GSVector4i& v) { return xffffffff(v).srl32(27); }
__forceinline static GSVector4i x0000003f(const GSVector4i& v) { return xffffffff(v).srl32(26); }
__forceinline static GSVector4i x0000007f(const GSVector4i& v) { return xffffffff(v).srl32(25); }
__forceinline static GSVector4i x000000ff(const GSVector4i& v) { return xffffffff(v).srl32(24); }
__forceinline static GSVector4i x000001ff(const GSVector4i& v) { return xffffffff(v).srl32(23); }
__forceinline static GSVector4i x000003ff(const GSVector4i& v) { return xffffffff(v).srl32(22); }
__forceinline static GSVector4i x000007ff(const GSVector4i& v) { return xffffffff(v).srl32(21); }
__forceinline static GSVector4i x00000fff(const GSVector4i& v) { return xffffffff(v).srl32(20); }
__forceinline static GSVector4i x00001fff(const GSVector4i& v) { return xffffffff(v).srl32(19); }
__forceinline static GSVector4i x00003fff(const GSVector4i& v) { return xffffffff(v).srl32(18); }
__forceinline static GSVector4i x00007fff(const GSVector4i& v) { return xffffffff(v).srl32(17); }
__forceinline static GSVector4i x0000ffff(const GSVector4i& v) { return xffffffff(v).srl32(16); }
__forceinline static GSVector4i x0001ffff(const GSVector4i& v) { return xffffffff(v).srl32(15); }
__forceinline static GSVector4i x0003ffff(const GSVector4i& v) { return xffffffff(v).srl32(14); }
__forceinline static GSVector4i x0007ffff(const GSVector4i& v) { return xffffffff(v).srl32(13); }
__forceinline static GSVector4i x000fffff(const GSVector4i& v) { return xffffffff(v).srl32(12); }
__forceinline static GSVector4i x001fffff(const GSVector4i& v) { return xffffffff(v).srl32(11); }
__forceinline static GSVector4i x003fffff(const GSVector4i& v) { return xffffffff(v).srl32(10); }
__forceinline static GSVector4i x007fffff(const GSVector4i& v) { return xffffffff(v).srl32( 9); }
__forceinline static GSVector4i x00ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 8); }
__forceinline static GSVector4i x01ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 7); }
__forceinline static GSVector4i x03ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 6); }
__forceinline static GSVector4i x07ffffff(const GSVector4i& v) { return xffffffff(v).srl32( 5); }
__forceinline static GSVector4i x0fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 4); }
__forceinline static GSVector4i x1fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 3); }
__forceinline static GSVector4i x3fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 2); }
__forceinline static GSVector4i x7fffffff(const GSVector4i& v) { return xffffffff(v).srl32( 1); }
__forceinline static GSVector4i x00000001(const GSVector4i& v) { return xffffffff(v).srl32<31>(); }
__forceinline static GSVector4i x00000003(const GSVector4i& v) { return xffffffff(v).srl32<30>(); }
__forceinline static GSVector4i x00000007(const GSVector4i& v) { return xffffffff(v).srl32<29>(); }
__forceinline static GSVector4i x0000000f(const GSVector4i& v) { return xffffffff(v).srl32<28>(); }
__forceinline static GSVector4i x0000001f(const GSVector4i& v) { return xffffffff(v).srl32<27>(); }
__forceinline static GSVector4i x0000003f(const GSVector4i& v) { return xffffffff(v).srl32<26>(); }
__forceinline static GSVector4i x0000007f(const GSVector4i& v) { return xffffffff(v).srl32<25>(); }
__forceinline static GSVector4i x000000ff(const GSVector4i& v) { return xffffffff(v).srl32<24>(); }
__forceinline static GSVector4i x000001ff(const GSVector4i& v) { return xffffffff(v).srl32<23>(); }
__forceinline static GSVector4i x000003ff(const GSVector4i& v) { return xffffffff(v).srl32<22>(); }
__forceinline static GSVector4i x000007ff(const GSVector4i& v) { return xffffffff(v).srl32<21>(); }
__forceinline static GSVector4i x00000fff(const GSVector4i& v) { return xffffffff(v).srl32<20>(); }
__forceinline static GSVector4i x00001fff(const GSVector4i& v) { return xffffffff(v).srl32<19>(); }
__forceinline static GSVector4i x00003fff(const GSVector4i& v) { return xffffffff(v).srl32<18>(); }
__forceinline static GSVector4i x00007fff(const GSVector4i& v) { return xffffffff(v).srl32<17>(); }
__forceinline static GSVector4i x0000ffff(const GSVector4i& v) { return xffffffff(v).srl32<16>(); }
__forceinline static GSVector4i x0001ffff(const GSVector4i& v) { return xffffffff(v).srl32<15>(); }
__forceinline static GSVector4i x0003ffff(const GSVector4i& v) { return xffffffff(v).srl32<14>(); }
__forceinline static GSVector4i x0007ffff(const GSVector4i& v) { return xffffffff(v).srl32<13>(); }
__forceinline static GSVector4i x000fffff(const GSVector4i& v) { return xffffffff(v).srl32<12>(); }
__forceinline static GSVector4i x001fffff(const GSVector4i& v) { return xffffffff(v).srl32<11>(); }
__forceinline static GSVector4i x003fffff(const GSVector4i& v) { return xffffffff(v).srl32<10>(); }
__forceinline static GSVector4i x007fffff(const GSVector4i& v) { return xffffffff(v).srl32< 9>(); }
__forceinline static GSVector4i x00ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 8>(); }
__forceinline static GSVector4i x01ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 7>(); }
__forceinline static GSVector4i x03ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 6>(); }
__forceinline static GSVector4i x07ffffff(const GSVector4i& v) { return xffffffff(v).srl32< 5>(); }
__forceinline static GSVector4i x0fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 4>(); }
__forceinline static GSVector4i x1fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 3>(); }
__forceinline static GSVector4i x3fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 2>(); }
__forceinline static GSVector4i x7fffffff(const GSVector4i& v) { return xffffffff(v).srl32< 1>(); }
__forceinline static GSVector4i x80000000(const GSVector4i& v) { return xffffffff(v).sll32(31); }
__forceinline static GSVector4i xc0000000(const GSVector4i& v) { return xffffffff(v).sll32(30); }
__forceinline static GSVector4i xe0000000(const GSVector4i& v) { return xffffffff(v).sll32(29); }
__forceinline static GSVector4i xf0000000(const GSVector4i& v) { return xffffffff(v).sll32(28); }
__forceinline static GSVector4i xf8000000(const GSVector4i& v) { return xffffffff(v).sll32(27); }
__forceinline static GSVector4i xfc000000(const GSVector4i& v) { return xffffffff(v).sll32(26); }
__forceinline static GSVector4i xfe000000(const GSVector4i& v) { return xffffffff(v).sll32(25); }
__forceinline static GSVector4i xff000000(const GSVector4i& v) { return xffffffff(v).sll32(24); }
__forceinline static GSVector4i xff800000(const GSVector4i& v) { return xffffffff(v).sll32(23); }
__forceinline static GSVector4i xffc00000(const GSVector4i& v) { return xffffffff(v).sll32(22); }
__forceinline static GSVector4i xffe00000(const GSVector4i& v) { return xffffffff(v).sll32(21); }
__forceinline static GSVector4i xfff00000(const GSVector4i& v) { return xffffffff(v).sll32(20); }
__forceinline static GSVector4i xfff80000(const GSVector4i& v) { return xffffffff(v).sll32(19); }
__forceinline static GSVector4i xfffc0000(const GSVector4i& v) { return xffffffff(v).sll32(18); }
__forceinline static GSVector4i xfffe0000(const GSVector4i& v) { return xffffffff(v).sll32(17); }
__forceinline static GSVector4i xffff0000(const GSVector4i& v) { return xffffffff(v).sll32(16); }
__forceinline static GSVector4i xffff8000(const GSVector4i& v) { return xffffffff(v).sll32(15); }
__forceinline static GSVector4i xffffc000(const GSVector4i& v) { return xffffffff(v).sll32(14); }
__forceinline static GSVector4i xffffe000(const GSVector4i& v) { return xffffffff(v).sll32(13); }
__forceinline static GSVector4i xfffff000(const GSVector4i& v) { return xffffffff(v).sll32(12); }
__forceinline static GSVector4i xfffff800(const GSVector4i& v) { return xffffffff(v).sll32(11); }
__forceinline static GSVector4i xfffffc00(const GSVector4i& v) { return xffffffff(v).sll32(10); }
__forceinline static GSVector4i xfffffe00(const GSVector4i& v) { return xffffffff(v).sll32( 9); }
__forceinline static GSVector4i xffffff00(const GSVector4i& v) { return xffffffff(v).sll32( 8); }
__forceinline static GSVector4i xffffff80(const GSVector4i& v) { return xffffffff(v).sll32( 7); }
__forceinline static GSVector4i xffffffc0(const GSVector4i& v) { return xffffffff(v).sll32( 6); }
__forceinline static GSVector4i xffffffe0(const GSVector4i& v) { return xffffffff(v).sll32( 5); }
__forceinline static GSVector4i xfffffff0(const GSVector4i& v) { return xffffffff(v).sll32( 4); }
__forceinline static GSVector4i xfffffff8(const GSVector4i& v) { return xffffffff(v).sll32( 3); }
__forceinline static GSVector4i xfffffffc(const GSVector4i& v) { return xffffffff(v).sll32( 2); }
__forceinline static GSVector4i xfffffffe(const GSVector4i& v) { return xffffffff(v).sll32( 1); }
__forceinline static GSVector4i x80000000(const GSVector4i& v) { return xffffffff(v).sll32<31>(); }
__forceinline static GSVector4i xc0000000(const GSVector4i& v) { return xffffffff(v).sll32<30>(); }
__forceinline static GSVector4i xe0000000(const GSVector4i& v) { return xffffffff(v).sll32<29>(); }
__forceinline static GSVector4i xf0000000(const GSVector4i& v) { return xffffffff(v).sll32<28>(); }
__forceinline static GSVector4i xf8000000(const GSVector4i& v) { return xffffffff(v).sll32<27>(); }
__forceinline static GSVector4i xfc000000(const GSVector4i& v) { return xffffffff(v).sll32<26>(); }
__forceinline static GSVector4i xfe000000(const GSVector4i& v) { return xffffffff(v).sll32<25>(); }
__forceinline static GSVector4i xff000000(const GSVector4i& v) { return xffffffff(v).sll32<24>(); }
__forceinline static GSVector4i xff800000(const GSVector4i& v) { return xffffffff(v).sll32<23>(); }
__forceinline static GSVector4i xffc00000(const GSVector4i& v) { return xffffffff(v).sll32<22>(); }
__forceinline static GSVector4i xffe00000(const GSVector4i& v) { return xffffffff(v).sll32<21>(); }
__forceinline static GSVector4i xfff00000(const GSVector4i& v) { return xffffffff(v).sll32<20>(); }
__forceinline static GSVector4i xfff80000(const GSVector4i& v) { return xffffffff(v).sll32<19>(); }
__forceinline static GSVector4i xfffc0000(const GSVector4i& v) { return xffffffff(v).sll32<18>(); }
__forceinline static GSVector4i xfffe0000(const GSVector4i& v) { return xffffffff(v).sll32<17>(); }
__forceinline static GSVector4i xffff0000(const GSVector4i& v) { return xffffffff(v).sll32<16>(); }
__forceinline static GSVector4i xffff8000(const GSVector4i& v) { return xffffffff(v).sll32<15>(); }
__forceinline static GSVector4i xffffc000(const GSVector4i& v) { return xffffffff(v).sll32<14>(); }
__forceinline static GSVector4i xffffe000(const GSVector4i& v) { return xffffffff(v).sll32<13>(); }
__forceinline static GSVector4i xfffff000(const GSVector4i& v) { return xffffffff(v).sll32<12>(); }
__forceinline static GSVector4i xfffff800(const GSVector4i& v) { return xffffffff(v).sll32<11>(); }
__forceinline static GSVector4i xfffffc00(const GSVector4i& v) { return xffffffff(v).sll32<10>(); }
__forceinline static GSVector4i xfffffe00(const GSVector4i& v) { return xffffffff(v).sll32< 9>(); }
__forceinline static GSVector4i xffffff00(const GSVector4i& v) { return xffffffff(v).sll32< 8>(); }
__forceinline static GSVector4i xffffff80(const GSVector4i& v) { return xffffffff(v).sll32< 7>(); }
__forceinline static GSVector4i xffffffc0(const GSVector4i& v) { return xffffffff(v).sll32< 6>(); }
__forceinline static GSVector4i xffffffe0(const GSVector4i& v) { return xffffffff(v).sll32< 5>(); }
__forceinline static GSVector4i xfffffff0(const GSVector4i& v) { return xffffffff(v).sll32< 4>(); }
__forceinline static GSVector4i xfffffff8(const GSVector4i& v) { return xffffffff(v).sll32< 3>(); }
__forceinline static GSVector4i xfffffffc(const GSVector4i& v) { return xffffffff(v).sll32< 2>(); }
__forceinline static GSVector4i xfffffffe(const GSVector4i& v) { return xffffffff(v).sll32< 1>(); }
__forceinline static GSVector4i x0001(const GSVector4i& v) { return xffffffff(v).srl16(15); }
__forceinline static GSVector4i x0003(const GSVector4i& v) { return xffffffff(v).srl16(14); }
__forceinline static GSVector4i x0007(const GSVector4i& v) { return xffffffff(v).srl16(13); }
__forceinline static GSVector4i x000f(const GSVector4i& v) { return xffffffff(v).srl16(12); }
__forceinline static GSVector4i x001f(const GSVector4i& v) { return xffffffff(v).srl16(11); }
__forceinline static GSVector4i x003f(const GSVector4i& v) { return xffffffff(v).srl16(10); }
__forceinline static GSVector4i x007f(const GSVector4i& v) { return xffffffff(v).srl16( 9); }
__forceinline static GSVector4i x00ff(const GSVector4i& v) { return xffffffff(v).srl16( 8); }
__forceinline static GSVector4i x01ff(const GSVector4i& v) { return xffffffff(v).srl16( 7); }
__forceinline static GSVector4i x03ff(const GSVector4i& v) { return xffffffff(v).srl16( 6); }
__forceinline static GSVector4i x07ff(const GSVector4i& v) { return xffffffff(v).srl16( 5); }
__forceinline static GSVector4i x0fff(const GSVector4i& v) { return xffffffff(v).srl16( 4); }
__forceinline static GSVector4i x1fff(const GSVector4i& v) { return xffffffff(v).srl16( 3); }
__forceinline static GSVector4i x3fff(const GSVector4i& v) { return xffffffff(v).srl16( 2); }
__forceinline static GSVector4i x7fff(const GSVector4i& v) { return xffffffff(v).srl16( 1); }
__forceinline static GSVector4i x0001(const GSVector4i& v) { return xffffffff(v).srl16<15>(); }
__forceinline static GSVector4i x0003(const GSVector4i& v) { return xffffffff(v).srl16<14>(); }
__forceinline static GSVector4i x0007(const GSVector4i& v) { return xffffffff(v).srl16<13>(); }
__forceinline static GSVector4i x000f(const GSVector4i& v) { return xffffffff(v).srl16<12>(); }
__forceinline static GSVector4i x001f(const GSVector4i& v) { return xffffffff(v).srl16<11>(); }
__forceinline static GSVector4i x003f(const GSVector4i& v) { return xffffffff(v).srl16<10>(); }
__forceinline static GSVector4i x007f(const GSVector4i& v) { return xffffffff(v).srl16< 9>(); }
__forceinline static GSVector4i x00ff(const GSVector4i& v) { return xffffffff(v).srl16< 8>(); }
__forceinline static GSVector4i x01ff(const GSVector4i& v) { return xffffffff(v).srl16< 7>(); }
__forceinline static GSVector4i x03ff(const GSVector4i& v) { return xffffffff(v).srl16< 6>(); }
__forceinline static GSVector4i x07ff(const GSVector4i& v) { return xffffffff(v).srl16< 5>(); }
__forceinline static GSVector4i x0fff(const GSVector4i& v) { return xffffffff(v).srl16< 4>(); }
__forceinline static GSVector4i x1fff(const GSVector4i& v) { return xffffffff(v).srl16< 3>(); }
__forceinline static GSVector4i x3fff(const GSVector4i& v) { return xffffffff(v).srl16< 2>(); }
__forceinline static GSVector4i x7fff(const GSVector4i& v) { return xffffffff(v).srl16< 1>(); }
__forceinline static GSVector4i x8000(const GSVector4i& v) { return xffffffff(v).sll16(15); }
__forceinline static GSVector4i xc000(const GSVector4i& v) { return xffffffff(v).sll16(14); }
__forceinline static GSVector4i xe000(const GSVector4i& v) { return xffffffff(v).sll16(13); }
__forceinline static GSVector4i xf000(const GSVector4i& v) { return xffffffff(v).sll16(12); }
__forceinline static GSVector4i xf800(const GSVector4i& v) { return xffffffff(v).sll16(11); }
__forceinline static GSVector4i xfc00(const GSVector4i& v) { return xffffffff(v).sll16(10); }
__forceinline static GSVector4i xfe00(const GSVector4i& v) { return xffffffff(v).sll16( 9); }
__forceinline static GSVector4i xff00(const GSVector4i& v) { return xffffffff(v).sll16( 8); }
__forceinline static GSVector4i xff80(const GSVector4i& v) { return xffffffff(v).sll16( 7); }
__forceinline static GSVector4i xffc0(const GSVector4i& v) { return xffffffff(v).sll16( 6); }
__forceinline static GSVector4i xffe0(const GSVector4i& v) { return xffffffff(v).sll16( 5); }
__forceinline static GSVector4i xfff0(const GSVector4i& v) { return xffffffff(v).sll16( 4); }
__forceinline static GSVector4i xfff8(const GSVector4i& v) { return xffffffff(v).sll16( 3); }
__forceinline static GSVector4i xfffc(const GSVector4i& v) { return xffffffff(v).sll16( 2); }
__forceinline static GSVector4i xfffe(const GSVector4i& v) { return xffffffff(v).sll16( 1); }
__forceinline static GSVector4i x8000(const GSVector4i& v) { return xffffffff(v).sll16<15>(); }
__forceinline static GSVector4i xc000(const GSVector4i& v) { return xffffffff(v).sll16<14>(); }
__forceinline static GSVector4i xe000(const GSVector4i& v) { return xffffffff(v).sll16<13>(); }
__forceinline static GSVector4i xf000(const GSVector4i& v) { return xffffffff(v).sll16<12>(); }
__forceinline static GSVector4i xf800(const GSVector4i& v) { return xffffffff(v).sll16<11>(); }
__forceinline static GSVector4i xfc00(const GSVector4i& v) { return xffffffff(v).sll16<10>(); }
__forceinline static GSVector4i xfe00(const GSVector4i& v) { return xffffffff(v).sll16< 9>(); }
__forceinline static GSVector4i xff00(const GSVector4i& v) { return xffffffff(v).sll16< 8>(); }
__forceinline static GSVector4i xff80(const GSVector4i& v) { return xffffffff(v).sll16< 7>(); }
__forceinline static GSVector4i xffc0(const GSVector4i& v) { return xffffffff(v).sll16< 6>(); }
__forceinline static GSVector4i xffe0(const GSVector4i& v) { return xffffffff(v).sll16< 5>(); }
__forceinline static GSVector4i xfff0(const GSVector4i& v) { return xffffffff(v).sll16< 4>(); }
__forceinline static GSVector4i xfff8(const GSVector4i& v) { return xffffffff(v).sll16< 3>(); }
__forceinline static GSVector4i xfffc(const GSVector4i& v) { return xffffffff(v).sll16< 2>(); }
__forceinline static GSVector4i xfffe(const GSVector4i& v) { return xffffffff(v).sll16< 1>(); }
__forceinline static GSVector4i xff(int n) { return m_xff[n]; }
__forceinline static GSVector4i x0f(int n) { return m_x0f[n]; }

View File

@ -581,151 +581,84 @@ public:
//return GSVector8i(_mm256_slli_si128(m, i));
}
__forceinline GSVector8i sra16(int i) const
template <int i>
__forceinline GSVector8i sra16() const
{
return GSVector8i(_mm256_srai_epi16(m, i));
}
__forceinline GSVector8i sra16(__m128i i) const
__forceinline GSVector8i srav16(const GSVector8i& i) const
{
return GSVector8i(_mm256_sra_epi16(m, i));
return GSVector8i(_mm256_srav_epi16(m, i.m));
}
__forceinline GSVector8i sra16(__m256i i) const
{
return GSVector8i(_mm256_sra_epi16(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i sra32(int i) const
template <int i>
__forceinline GSVector8i sra32() const
{
return GSVector8i(_mm256_srai_epi32(m, i));
}
__forceinline GSVector8i sra32(__m128i i) const
__forceinline GSVector8i srav32(const GSVector8i& i) const
{
return GSVector8i(_mm256_sra_epi32(m, i));
return GSVector8i(_mm256_srav_epi32(m, i.m));
}
__forceinline GSVector8i sra32(__m256i i) const
{
return GSVector8i(_mm256_sra_epi32(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i srav32(__m256i i) const
{
return GSVector8i(_mm256_srav_epi32(m, i));
}
__forceinline GSVector8i sll16(int i) const
template <int i>
__forceinline GSVector8i sll16() const
{
return GSVector8i(_mm256_slli_epi16(m, i));
}
__forceinline GSVector8i sll16(__m128i i) const
__forceinline GSVector8i sllv16(const GSVector8i& i) const
{
return GSVector8i(_mm256_sll_epi16(m, i));
return GSVector8i(_mm256_sllv_epi16(m, i.m));
}
__forceinline GSVector8i sll16(__m256i i) const
{
return GSVector8i(_mm256_sll_epi16(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i sll32(int i) const
template <int i>
__forceinline GSVector8i sll32() const
{
return GSVector8i(_mm256_slli_epi32(m, i));
}
__forceinline GSVector8i sll32(__m128i i) const
__forceinline GSVector8i sllv32(const GSVector8i& i) const
{
return GSVector8i(_mm256_sll_epi32(m, i));
return GSVector8i(_mm256_sllv_epi32(m, i.m));
}
__forceinline GSVector8i sll32(__m256i i) const
{
return GSVector8i(_mm256_sll_epi32(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i sllv32(__m256i i) const
{
return GSVector8i(_mm256_sllv_epi32(m, i));
}
__forceinline GSVector8i sll64(int i) const
template <int i>
__forceinline GSVector8i sll64() const
{
return GSVector8i(_mm256_slli_epi64(m, i));
}
__forceinline GSVector8i sll64(__m128i i) const
__forceinline GSVector8i sllv64(const GSVector8i& i) const
{
return GSVector8i(_mm256_sll_epi64(m, i));
return GSVector8i(_mm256_sllv_epi64(m, i.m));
}
__forceinline GSVector8i sll64(__m256i i) const
{
return GSVector8i(_mm256_sll_epi64(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i sllv64(__m256i i) const
{
return GSVector8i(_mm256_sllv_epi64(m, i));
}
__forceinline GSVector8i srl16(int i) const
template<int i>
__forceinline GSVector8i srl16() const
{
return GSVector8i(_mm256_srli_epi16(m, i));
}
__forceinline GSVector8i srl16(__m128i i) const
{
return GSVector8i(_mm256_srl_epi16(m, i));
}
__forceinline GSVector8i srl16(__m256i i) const
{
return GSVector8i(_mm256_srl_epi16(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i srl32(int i) const
template <int i>
__forceinline GSVector8i srl32() const
{
return GSVector8i(_mm256_srli_epi32(m, i));
}
__forceinline GSVector8i srl32(__m128i i) const
__forceinline GSVector8i srlv32(const GSVector8i& i) const
{
return GSVector8i(_mm256_srl_epi32(m, i));
return GSVector8i(_mm256_srlv_epi32(m, i.m));
}
__forceinline GSVector8i srl32(__m256i i) const
{
return GSVector8i(_mm256_srl_epi32(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i srlv32(__m256i i) const
{
return GSVector8i(_mm256_srlv_epi32(m, i));
}
__forceinline GSVector8i srl64(int i) const
template <int i>
__forceinline GSVector8i srl64() const
{
return GSVector8i(_mm256_srli_epi64(m, i));
}
__forceinline GSVector8i srl64(__m128i i) const
{
return GSVector8i(_mm256_srl_epi64(m, i));
}
__forceinline GSVector8i srl64(__m256i i) const
{
return GSVector8i(_mm256_srl_epi64(m, _mm256_castsi256_si128(i)));
}
__forceinline GSVector8i srlv64(__m256i i) const
{
return GSVector8i(_mm256_srlv_epi64(m, i));
}
__forceinline GSVector8i add8(const GSVector8i& v) const
{
return GSVector8i(_mm256_add_epi8(m, v.m));
@ -864,7 +797,7 @@ public:
{
// (a - this) * f >> 4 + this (a, this: 8-bit, f: 4-bit)
return add16(a.sub16(*this).mul16l(f).sra16(4));
return add16(a.sub16(*this).mul16l(f).sra16<4>());
}
template <int shift>
@ -877,7 +810,7 @@ public:
return mul16hrs(f);
}
return sll16(shift + 1).mul16hs(f);
return sll16<shift + 1>().mul16hs(f);
}
__forceinline bool eq(const GSVector8i& v) const
@ -1676,199 +1609,199 @@ public:
__forceinline static GSVector8i xffffffff() { return zero() == zero(); }
__forceinline static GSVector8i x00000001() { return xffffffff().srl32(31); }
__forceinline static GSVector8i x00000003() { return xffffffff().srl32(30); }
__forceinline static GSVector8i x00000007() { return xffffffff().srl32(29); }
__forceinline static GSVector8i x0000000f() { return xffffffff().srl32(28); }
__forceinline static GSVector8i x0000001f() { return xffffffff().srl32(27); }
__forceinline static GSVector8i x0000003f() { return xffffffff().srl32(26); }
__forceinline static GSVector8i x0000007f() { return xffffffff().srl32(25); }
__forceinline static GSVector8i x000000ff() { return xffffffff().srl32(24); }
__forceinline static GSVector8i x000001ff() { return xffffffff().srl32(23); }
__forceinline static GSVector8i x000003ff() { return xffffffff().srl32(22); }
__forceinline static GSVector8i x000007ff() { return xffffffff().srl32(21); }
__forceinline static GSVector8i x00000fff() { return xffffffff().srl32(20); }
__forceinline static GSVector8i x00001fff() { return xffffffff().srl32(19); }
__forceinline static GSVector8i x00003fff() { return xffffffff().srl32(18); }
__forceinline static GSVector8i x00007fff() { return xffffffff().srl32(17); }
__forceinline static GSVector8i x0000ffff() { return xffffffff().srl32(16); }
__forceinline static GSVector8i x0001ffff() { return xffffffff().srl32(15); }
__forceinline static GSVector8i x0003ffff() { return xffffffff().srl32(14); }
__forceinline static GSVector8i x0007ffff() { return xffffffff().srl32(13); }
__forceinline static GSVector8i x000fffff() { return xffffffff().srl32(12); }
__forceinline static GSVector8i x001fffff() { return xffffffff().srl32(11); }
__forceinline static GSVector8i x003fffff() { return xffffffff().srl32(10); }
__forceinline static GSVector8i x007fffff() { return xffffffff().srl32( 9); }
__forceinline static GSVector8i x00ffffff() { return xffffffff().srl32( 8); }
__forceinline static GSVector8i x01ffffff() { return xffffffff().srl32( 7); }
__forceinline static GSVector8i x03ffffff() { return xffffffff().srl32( 6); }
__forceinline static GSVector8i x07ffffff() { return xffffffff().srl32( 5); }
__forceinline static GSVector8i x0fffffff() { return xffffffff().srl32( 4); }
__forceinline static GSVector8i x1fffffff() { return xffffffff().srl32( 3); }
__forceinline static GSVector8i x3fffffff() { return xffffffff().srl32( 2); }
__forceinline static GSVector8i x7fffffff() { return xffffffff().srl32( 1); }
__forceinline static GSVector8i x00000001() { return xffffffff().srl32<31>(); }
__forceinline static GSVector8i x00000003() { return xffffffff().srl32<30>(); }
__forceinline static GSVector8i x00000007() { return xffffffff().srl32<29>(); }
__forceinline static GSVector8i x0000000f() { return xffffffff().srl32<28>(); }
__forceinline static GSVector8i x0000001f() { return xffffffff().srl32<27>(); }
__forceinline static GSVector8i x0000003f() { return xffffffff().srl32<26>(); }
__forceinline static GSVector8i x0000007f() { return xffffffff().srl32<25>(); }
__forceinline static GSVector8i x000000ff() { return xffffffff().srl32<24>(); }
__forceinline static GSVector8i x000001ff() { return xffffffff().srl32<23>(); }
__forceinline static GSVector8i x000003ff() { return xffffffff().srl32<22>(); }
__forceinline static GSVector8i x000007ff() { return xffffffff().srl32<21>(); }
__forceinline static GSVector8i x00000fff() { return xffffffff().srl32<20>(); }
__forceinline static GSVector8i x00001fff() { return xffffffff().srl32<19>(); }
__forceinline static GSVector8i x00003fff() { return xffffffff().srl32<18>(); }
__forceinline static GSVector8i x00007fff() { return xffffffff().srl32<17>(); }
__forceinline static GSVector8i x0000ffff() { return xffffffff().srl32<16>(); }
__forceinline static GSVector8i x0001ffff() { return xffffffff().srl32<15>(); }
__forceinline static GSVector8i x0003ffff() { return xffffffff().srl32<14>(); }
__forceinline static GSVector8i x0007ffff() { return xffffffff().srl32<13>(); }
__forceinline static GSVector8i x000fffff() { return xffffffff().srl32<12>(); }
__forceinline static GSVector8i x001fffff() { return xffffffff().srl32<11>(); }
__forceinline static GSVector8i x003fffff() { return xffffffff().srl32<10>(); }
__forceinline static GSVector8i x007fffff() { return xffffffff().srl32< 9>(); }
__forceinline static GSVector8i x00ffffff() { return xffffffff().srl32< 8>(); }
__forceinline static GSVector8i x01ffffff() { return xffffffff().srl32< 7>(); }
__forceinline static GSVector8i x03ffffff() { return xffffffff().srl32< 6>(); }
__forceinline static GSVector8i x07ffffff() { return xffffffff().srl32< 5>(); }
__forceinline static GSVector8i x0fffffff() { return xffffffff().srl32< 4>(); }
__forceinline static GSVector8i x1fffffff() { return xffffffff().srl32< 3>(); }
__forceinline static GSVector8i x3fffffff() { return xffffffff().srl32< 2>(); }
__forceinline static GSVector8i x7fffffff() { return xffffffff().srl32< 1>(); }
__forceinline static GSVector8i x80000000() { return xffffffff().sll32(31); }
__forceinline static GSVector8i xc0000000() { return xffffffff().sll32(30); }
__forceinline static GSVector8i xe0000000() { return xffffffff().sll32(29); }
__forceinline static GSVector8i xf0000000() { return xffffffff().sll32(28); }
__forceinline static GSVector8i xf8000000() { return xffffffff().sll32(27); }
__forceinline static GSVector8i xfc000000() { return xffffffff().sll32(26); }
__forceinline static GSVector8i xfe000000() { return xffffffff().sll32(25); }
__forceinline static GSVector8i xff000000() { return xffffffff().sll32(24); }
__forceinline static GSVector8i xff800000() { return xffffffff().sll32(23); }
__forceinline static GSVector8i xffc00000() { return xffffffff().sll32(22); }
__forceinline static GSVector8i xffe00000() { return xffffffff().sll32(21); }
__forceinline static GSVector8i xfff00000() { return xffffffff().sll32(20); }
__forceinline static GSVector8i xfff80000() { return xffffffff().sll32(19); }
__forceinline static GSVector8i xfffc0000() { return xffffffff().sll32(18); }
__forceinline static GSVector8i xfffe0000() { return xffffffff().sll32(17); }
__forceinline static GSVector8i xffff0000() { return xffffffff().sll32(16); }
__forceinline static GSVector8i xffff8000() { return xffffffff().sll32(15); }
__forceinline static GSVector8i xffffc000() { return xffffffff().sll32(14); }
__forceinline static GSVector8i xffffe000() { return xffffffff().sll32(13); }
__forceinline static GSVector8i xfffff000() { return xffffffff().sll32(12); }
__forceinline static GSVector8i xfffff800() { return xffffffff().sll32(11); }
__forceinline static GSVector8i xfffffc00() { return xffffffff().sll32(10); }
__forceinline static GSVector8i xfffffe00() { return xffffffff().sll32( 9); }
__forceinline static GSVector8i xffffff00() { return xffffffff().sll32( 8); }
__forceinline static GSVector8i xffffff80() { return xffffffff().sll32( 7); }
__forceinline static GSVector8i xffffffc0() { return xffffffff().sll32( 6); }
__forceinline static GSVector8i xffffffe0() { return xffffffff().sll32( 5); }
__forceinline static GSVector8i xfffffff0() { return xffffffff().sll32( 4); }
__forceinline static GSVector8i xfffffff8() { return xffffffff().sll32( 3); }
__forceinline static GSVector8i xfffffffc() { return xffffffff().sll32( 2); }
__forceinline static GSVector8i xfffffffe() { return xffffffff().sll32( 1); }
__forceinline static GSVector8i x80000000() { return xffffffff().sll32<31>(); }
__forceinline static GSVector8i xc0000000() { return xffffffff().sll32<30>(); }
__forceinline static GSVector8i xe0000000() { return xffffffff().sll32<29>(); }
__forceinline static GSVector8i xf0000000() { return xffffffff().sll32<28>(); }
__forceinline static GSVector8i xf8000000() { return xffffffff().sll32<27>(); }
__forceinline static GSVector8i xfc000000() { return xffffffff().sll32<26>(); }
__forceinline static GSVector8i xfe000000() { return xffffffff().sll32<25>(); }
__forceinline static GSVector8i xff000000() { return xffffffff().sll32<24>(); }
__forceinline static GSVector8i xff800000() { return xffffffff().sll32<23>(); }
__forceinline static GSVector8i xffc00000() { return xffffffff().sll32<22>(); }
__forceinline static GSVector8i xffe00000() { return xffffffff().sll32<21>(); }
__forceinline static GSVector8i xfff00000() { return xffffffff().sll32<20>(); }
__forceinline static GSVector8i xfff80000() { return xffffffff().sll32<19>(); }
__forceinline static GSVector8i xfffc0000() { return xffffffff().sll32<18>(); }
__forceinline static GSVector8i xfffe0000() { return xffffffff().sll32<17>(); }
__forceinline static GSVector8i xffff0000() { return xffffffff().sll32<16>(); }
__forceinline static GSVector8i xffff8000() { return xffffffff().sll32<15>(); }
__forceinline static GSVector8i xffffc000() { return xffffffff().sll32<14>(); }
__forceinline static GSVector8i xffffe000() { return xffffffff().sll32<13>(); }
__forceinline static GSVector8i xfffff000() { return xffffffff().sll32<12>(); }
__forceinline static GSVector8i xfffff800() { return xffffffff().sll32<11>(); }
__forceinline static GSVector8i xfffffc00() { return xffffffff().sll32<10>(); }
__forceinline static GSVector8i xfffffe00() { return xffffffff().sll32< 9>(); }
__forceinline static GSVector8i xffffff00() { return xffffffff().sll32< 8>(); }
__forceinline static GSVector8i xffffff80() { return xffffffff().sll32< 7>(); }
__forceinline static GSVector8i xffffffc0() { return xffffffff().sll32< 6>(); }
__forceinline static GSVector8i xffffffe0() { return xffffffff().sll32< 5>(); }
__forceinline static GSVector8i xfffffff0() { return xffffffff().sll32< 4>(); }
__forceinline static GSVector8i xfffffff8() { return xffffffff().sll32< 3>(); }
__forceinline static GSVector8i xfffffffc() { return xffffffff().sll32< 2>(); }
__forceinline static GSVector8i xfffffffe() { return xffffffff().sll32< 1>(); }
__forceinline static GSVector8i x0001() { return xffffffff().srl16(15); }
__forceinline static GSVector8i x0003() { return xffffffff().srl16(14); }
__forceinline static GSVector8i x0007() { return xffffffff().srl16(13); }
__forceinline static GSVector8i x000f() { return xffffffff().srl16(12); }
__forceinline static GSVector8i x001f() { return xffffffff().srl16(11); }
__forceinline static GSVector8i x003f() { return xffffffff().srl16(10); }
__forceinline static GSVector8i x007f() { return xffffffff().srl16( 9); }
__forceinline static GSVector8i x00ff() { return xffffffff().srl16( 8); }
__forceinline static GSVector8i x01ff() { return xffffffff().srl16( 7); }
__forceinline static GSVector8i x03ff() { return xffffffff().srl16( 6); }
__forceinline static GSVector8i x07ff() { return xffffffff().srl16( 5); }
__forceinline static GSVector8i x0fff() { return xffffffff().srl16( 4); }
__forceinline static GSVector8i x1fff() { return xffffffff().srl16( 3); }
__forceinline static GSVector8i x3fff() { return xffffffff().srl16( 2); }
__forceinline static GSVector8i x7fff() { return xffffffff().srl16( 1); }
__forceinline static GSVector8i x0001() { return xffffffff().srl16<15>(); }
__forceinline static GSVector8i x0003() { return xffffffff().srl16<14>(); }
__forceinline static GSVector8i x0007() { return xffffffff().srl16<13>(); }
__forceinline static GSVector8i x000f() { return xffffffff().srl16<12>(); }
__forceinline static GSVector8i x001f() { return xffffffff().srl16<11>(); }
__forceinline static GSVector8i x003f() { return xffffffff().srl16<10>(); }
__forceinline static GSVector8i x007f() { return xffffffff().srl16< 9>(); }
__forceinline static GSVector8i x00ff() { return xffffffff().srl16< 8>(); }
__forceinline static GSVector8i x01ff() { return xffffffff().srl16< 7>(); }
__forceinline static GSVector8i x03ff() { return xffffffff().srl16< 6>(); }
__forceinline static GSVector8i x07ff() { return xffffffff().srl16< 5>(); }
__forceinline static GSVector8i x0fff() { return xffffffff().srl16< 4>(); }
__forceinline static GSVector8i x1fff() { return xffffffff().srl16< 3>(); }
__forceinline static GSVector8i x3fff() { return xffffffff().srl16< 2>(); }
__forceinline static GSVector8i x7fff() { return xffffffff().srl16< 1>(); }
__forceinline static GSVector8i x8000() { return xffffffff().sll16(15); }
__forceinline static GSVector8i xc000() { return xffffffff().sll16(14); }
__forceinline static GSVector8i xe000() { return xffffffff().sll16(13); }
__forceinline static GSVector8i xf000() { return xffffffff().sll16(12); }
__forceinline static GSVector8i xf800() { return xffffffff().sll16(11); }
__forceinline static GSVector8i xfc00() { return xffffffff().sll16(10); }
__forceinline static GSVector8i xfe00() { return xffffffff().sll16( 9); }
__forceinline static GSVector8i xff00() { return xffffffff().sll16( 8); }
__forceinline static GSVector8i xff80() { return xffffffff().sll16( 7); }
__forceinline static GSVector8i xffc0() { return xffffffff().sll16( 6); }
__forceinline static GSVector8i xffe0() { return xffffffff().sll16( 5); }
__forceinline static GSVector8i xfff0() { return xffffffff().sll16( 4); }
__forceinline static GSVector8i xfff8() { return xffffffff().sll16( 3); }
__forceinline static GSVector8i xfffc() { return xffffffff().sll16( 2); }
__forceinline static GSVector8i xfffe() { return xffffffff().sll16( 1); }
__forceinline static GSVector8i x8000() { return xffffffff().sll16<15>(); }
__forceinline static GSVector8i xc000() { return xffffffff().sll16<14>(); }
__forceinline static GSVector8i xe000() { return xffffffff().sll16<13>(); }
__forceinline static GSVector8i xf000() { return xffffffff().sll16<12>(); }
__forceinline static GSVector8i xf800() { return xffffffff().sll16<11>(); }
__forceinline static GSVector8i xfc00() { return xffffffff().sll16<10>(); }
__forceinline static GSVector8i xfe00() { return xffffffff().sll16< 9>(); }
__forceinline static GSVector8i xff00() { return xffffffff().sll16< 8>(); }
__forceinline static GSVector8i xff80() { return xffffffff().sll16< 7>(); }
__forceinline static GSVector8i xffc0() { return xffffffff().sll16< 6>(); }
__forceinline static GSVector8i xffe0() { return xffffffff().sll16< 5>(); }
__forceinline static GSVector8i xfff0() { return xffffffff().sll16< 4>(); }
__forceinline static GSVector8i xfff8() { return xffffffff().sll16< 3>(); }
__forceinline static GSVector8i xfffc() { return xffffffff().sll16< 2>(); }
__forceinline static GSVector8i xfffe() { return xffffffff().sll16< 1>(); }
__forceinline static GSVector8i xffffffff(const GSVector8i& v) { return v == v; }
__forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32(31); }
__forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32(30); }
__forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32(29); }
__forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32(28); }
__forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32(27); }
__forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32(26); }
__forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32(25); }
__forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32(24); }
__forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32(23); }
__forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32(22); }
__forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32(21); }
__forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32(20); }
__forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32(19); }
__forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32(18); }
__forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32(17); }
__forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32(16); }
__forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32(15); }
__forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32(14); }
__forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32(13); }
__forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32(12); }
__forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32(11); }
__forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32(10); }
__forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32( 9); }
__forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 8); }
__forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 7); }
__forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 6); }
__forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32( 5); }
__forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 4); }
__forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 3); }
__forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 2); }
__forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32( 1); }
__forceinline static GSVector8i x00000001(const GSVector8i& v) { return xffffffff(v).srl32<31>(); }
__forceinline static GSVector8i x00000003(const GSVector8i& v) { return xffffffff(v).srl32<30>(); }
__forceinline static GSVector8i x00000007(const GSVector8i& v) { return xffffffff(v).srl32<29>(); }
__forceinline static GSVector8i x0000000f(const GSVector8i& v) { return xffffffff(v).srl32<28>(); }
__forceinline static GSVector8i x0000001f(const GSVector8i& v) { return xffffffff(v).srl32<27>(); }
__forceinline static GSVector8i x0000003f(const GSVector8i& v) { return xffffffff(v).srl32<26>(); }
__forceinline static GSVector8i x0000007f(const GSVector8i& v) { return xffffffff(v).srl32<25>(); }
__forceinline static GSVector8i x000000ff(const GSVector8i& v) { return xffffffff(v).srl32<24>(); }
__forceinline static GSVector8i x000001ff(const GSVector8i& v) { return xffffffff(v).srl32<23>(); }
__forceinline static GSVector8i x000003ff(const GSVector8i& v) { return xffffffff(v).srl32<22>(); }
__forceinline static GSVector8i x000007ff(const GSVector8i& v) { return xffffffff(v).srl32<21>(); }
__forceinline static GSVector8i x00000fff(const GSVector8i& v) { return xffffffff(v).srl32<20>(); }
__forceinline static GSVector8i x00001fff(const GSVector8i& v) { return xffffffff(v).srl32<19>(); }
__forceinline static GSVector8i x00003fff(const GSVector8i& v) { return xffffffff(v).srl32<18>(); }
__forceinline static GSVector8i x00007fff(const GSVector8i& v) { return xffffffff(v).srl32<17>(); }
__forceinline static GSVector8i x0000ffff(const GSVector8i& v) { return xffffffff(v).srl32<16>(); }
__forceinline static GSVector8i x0001ffff(const GSVector8i& v) { return xffffffff(v).srl32<15>(); }
__forceinline static GSVector8i x0003ffff(const GSVector8i& v) { return xffffffff(v).srl32<14>(); }
__forceinline static GSVector8i x0007ffff(const GSVector8i& v) { return xffffffff(v).srl32<13>(); }
__forceinline static GSVector8i x000fffff(const GSVector8i& v) { return xffffffff(v).srl32<12>(); }
__forceinline static GSVector8i x001fffff(const GSVector8i& v) { return xffffffff(v).srl32<11>(); }
__forceinline static GSVector8i x003fffff(const GSVector8i& v) { return xffffffff(v).srl32<10>(); }
__forceinline static GSVector8i x007fffff(const GSVector8i& v) { return xffffffff(v).srl32< 9>(); }
__forceinline static GSVector8i x00ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 8>(); }
__forceinline static GSVector8i x01ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 7>(); }
__forceinline static GSVector8i x03ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 6>(); }
__forceinline static GSVector8i x07ffffff(const GSVector8i& v) { return xffffffff(v).srl32< 5>(); }
__forceinline static GSVector8i x0fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 4>(); }
__forceinline static GSVector8i x1fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 3>(); }
__forceinline static GSVector8i x3fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 2>(); }
__forceinline static GSVector8i x7fffffff(const GSVector8i& v) { return xffffffff(v).srl32< 1>(); }
__forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32(31); }
__forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32(30); }
__forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32(29); }
__forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32(28); }
__forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32(27); }
__forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32(26); }
__forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32(25); }
__forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32(24); }
__forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32(23); }
__forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32(22); }
__forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32(21); }
__forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32(20); }
__forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32(19); }
__forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32(18); }
__forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32(17); }
__forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32(16); }
__forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32(15); }
__forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32(14); }
__forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32(13); }
__forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32(12); }
__forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32(11); }
__forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32(10); }
__forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32( 9); }
__forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32( 8); }
__forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32( 7); }
__forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32( 6); }
__forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32( 5); }
__forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32( 4); }
__forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32( 3); }
__forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32( 2); }
__forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32( 1); }
__forceinline static GSVector8i x80000000(const GSVector8i& v) { return xffffffff(v).sll32<31>(); }
__forceinline static GSVector8i xc0000000(const GSVector8i& v) { return xffffffff(v).sll32<30>(); }
__forceinline static GSVector8i xe0000000(const GSVector8i& v) { return xffffffff(v).sll32<29>(); }
__forceinline static GSVector8i xf0000000(const GSVector8i& v) { return xffffffff(v).sll32<28>(); }
__forceinline static GSVector8i xf8000000(const GSVector8i& v) { return xffffffff(v).sll32<27>(); }
__forceinline static GSVector8i xfc000000(const GSVector8i& v) { return xffffffff(v).sll32<26>(); }
__forceinline static GSVector8i xfe000000(const GSVector8i& v) { return xffffffff(v).sll32<25>(); }
__forceinline static GSVector8i xff000000(const GSVector8i& v) { return xffffffff(v).sll32<24>(); }
__forceinline static GSVector8i xff800000(const GSVector8i& v) { return xffffffff(v).sll32<23>(); }
__forceinline static GSVector8i xffc00000(const GSVector8i& v) { return xffffffff(v).sll32<22>(); }
__forceinline static GSVector8i xffe00000(const GSVector8i& v) { return xffffffff(v).sll32<21>(); }
__forceinline static GSVector8i xfff00000(const GSVector8i& v) { return xffffffff(v).sll32<20>(); }
__forceinline static GSVector8i xfff80000(const GSVector8i& v) { return xffffffff(v).sll32<19>(); }
__forceinline static GSVector8i xfffc0000(const GSVector8i& v) { return xffffffff(v).sll32<18>(); }
__forceinline static GSVector8i xfffe0000(const GSVector8i& v) { return xffffffff(v).sll32<17>(); }
__forceinline static GSVector8i xffff0000(const GSVector8i& v) { return xffffffff(v).sll32<16>(); }
__forceinline static GSVector8i xffff8000(const GSVector8i& v) { return xffffffff(v).sll32<15>(); }
__forceinline static GSVector8i xffffc000(const GSVector8i& v) { return xffffffff(v).sll32<14>(); }
__forceinline static GSVector8i xffffe000(const GSVector8i& v) { return xffffffff(v).sll32<13>(); }
__forceinline static GSVector8i xfffff000(const GSVector8i& v) { return xffffffff(v).sll32<12>(); }
__forceinline static GSVector8i xfffff800(const GSVector8i& v) { return xffffffff(v).sll32<11>(); }
__forceinline static GSVector8i xfffffc00(const GSVector8i& v) { return xffffffff(v).sll32<10>(); }
__forceinline static GSVector8i xfffffe00(const GSVector8i& v) { return xffffffff(v).sll32< 9>(); }
__forceinline static GSVector8i xffffff00(const GSVector8i& v) { return xffffffff(v).sll32< 8>(); }
__forceinline static GSVector8i xffffff80(const GSVector8i& v) { return xffffffff(v).sll32< 7>(); }
__forceinline static GSVector8i xffffffc0(const GSVector8i& v) { return xffffffff(v).sll32< 6>(); }
__forceinline static GSVector8i xffffffe0(const GSVector8i& v) { return xffffffff(v).sll32< 5>(); }
__forceinline static GSVector8i xfffffff0(const GSVector8i& v) { return xffffffff(v).sll32< 4>(); }
__forceinline static GSVector8i xfffffff8(const GSVector8i& v) { return xffffffff(v).sll32< 3>(); }
__forceinline static GSVector8i xfffffffc(const GSVector8i& v) { return xffffffff(v).sll32< 2>(); }
__forceinline static GSVector8i xfffffffe(const GSVector8i& v) { return xffffffff(v).sll32< 1>(); }
__forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16(15); }
__forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16(14); }
__forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16(13); }
__forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16(12); }
__forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16(11); }
__forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16(10); }
__forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16( 9); }
__forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16( 8); }
__forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16( 7); }
__forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16( 6); }
__forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16( 5); }
__forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16( 4); }
__forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16( 3); }
__forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16( 2); }
__forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16( 1); }
__forceinline static GSVector8i x0001(const GSVector8i& v) { return xffffffff(v).srl16<15>(); }
__forceinline static GSVector8i x0003(const GSVector8i& v) { return xffffffff(v).srl16<14>(); }
__forceinline static GSVector8i x0007(const GSVector8i& v) { return xffffffff(v).srl16<13>(); }
__forceinline static GSVector8i x000f(const GSVector8i& v) { return xffffffff(v).srl16<12>(); }
__forceinline static GSVector8i x001f(const GSVector8i& v) { return xffffffff(v).srl16<11>(); }
__forceinline static GSVector8i x003f(const GSVector8i& v) { return xffffffff(v).srl16<10>(); }
__forceinline static GSVector8i x007f(const GSVector8i& v) { return xffffffff(v).srl16< 9>(); }
__forceinline static GSVector8i x00ff(const GSVector8i& v) { return xffffffff(v).srl16< 8>(); }
__forceinline static GSVector8i x01ff(const GSVector8i& v) { return xffffffff(v).srl16< 7>(); }
__forceinline static GSVector8i x03ff(const GSVector8i& v) { return xffffffff(v).srl16< 6>(); }
__forceinline static GSVector8i x07ff(const GSVector8i& v) { return xffffffff(v).srl16< 5>(); }
__forceinline static GSVector8i x0fff(const GSVector8i& v) { return xffffffff(v).srl16< 4>(); }
__forceinline static GSVector8i x1fff(const GSVector8i& v) { return xffffffff(v).srl16< 3>(); }
__forceinline static GSVector8i x3fff(const GSVector8i& v) { return xffffffff(v).srl16< 2>(); }
__forceinline static GSVector8i x7fff(const GSVector8i& v) { return xffffffff(v).srl16< 1>(); }
__forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16(15); }
__forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16(14); }
__forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16(13); }
__forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16(12); }
__forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16(11); }
__forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16(10); }
__forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16( 9); }
__forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16( 8); }
__forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16( 7); }
__forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16( 6); }
__forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16( 5); }
__forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16( 4); }
__forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16( 3); }
__forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16( 2); }
__forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16( 1); }
__forceinline static GSVector8i x8000(const GSVector8i& v) { return xffffffff(v).sll16<15>(); }
__forceinline static GSVector8i xc000(const GSVector8i& v) { return xffffffff(v).sll16<14>(); }
__forceinline static GSVector8i xe000(const GSVector8i& v) { return xffffffff(v).sll16<13>(); }
__forceinline static GSVector8i xf000(const GSVector8i& v) { return xffffffff(v).sll16<12>(); }
__forceinline static GSVector8i xf800(const GSVector8i& v) { return xffffffff(v).sll16<11>(); }
__forceinline static GSVector8i xfc00(const GSVector8i& v) { return xffffffff(v).sll16<10>(); }
__forceinline static GSVector8i xfe00(const GSVector8i& v) { return xffffffff(v).sll16< 9>(); }
__forceinline static GSVector8i xff00(const GSVector8i& v) { return xffffffff(v).sll16< 8>(); }
__forceinline static GSVector8i xff80(const GSVector8i& v) { return xffffffff(v).sll16< 7>(); }
__forceinline static GSVector8i xffc0(const GSVector8i& v) { return xffffffff(v).sll16< 6>(); }
__forceinline static GSVector8i xffe0(const GSVector8i& v) { return xffffffff(v).sll16< 5>(); }
__forceinline static GSVector8i xfff0(const GSVector8i& v) { return xffffffff(v).sll16< 4>(); }
__forceinline static GSVector8i xfff8(const GSVector8i& v) { return xffffffff(v).sll16< 3>(); }
__forceinline static GSVector8i xfffc(const GSVector8i& v) { return xffffffff(v).sll16< 2>(); }
__forceinline static GSVector8i xfffe(const GSVector8i& v) { return xffffffff(v).sll16< 1>(); }
__forceinline static GSVector8i xff(int n) { return m_xff[n]; }
__forceinline static GSVector8i x0f(int n) { return m_x0f[n]; }

View File

@ -320,7 +320,7 @@ void GSRendererHW::ExpandLineIndices()
read -= 1;
write -= expansion_factor;
const GSVector4i in = read->sll16(2);
const GSVector4i in = read->sll16<2>();
write[0] = in.shuffle8(mask0) | low0;
write[1] = in.shuffle8(mask1) | low1;
write[2] = in.shuffle8(mask2) | low2;
@ -373,7 +373,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
static_cast<int>(m_vt.m_min.p.x), static_cast<int>(m_vt.m_min.p.y), static_cast<int>(m_vt.m_min.p.z),
static_cast<int>(m_vt.m_min.p.w), r.x, r.y, r.z, r.w);
const GSVector4i fpr = r.sll32(4);
const GSVector4i fpr = r.sll32<4>();
v[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + fpr.x);
v[0].XYZ.Y = static_cast<u16>(m_context->XYOFFSET.OFY + fpr.y);
@ -487,7 +487,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
const GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
tmp = GSVector4i(tmp - offset).srl32<1>() + offset;
v[i].XYZ.Y = static_cast<u16>(tmp.x);
v[i + 1].XYZ.Y = static_cast<u16>(tmp.z);
@ -525,7 +525,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba, GS
const GSVector4i offset(o.OFY, o.OFY);
GSVector4i tmp(v[i].XYZ.Y, v[i + 1].XYZ.Y);
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
tmp = GSVector4i(tmp - offset).srl32<1>() + offset;
//fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y);
v[i].XYZ.Y = static_cast<u16>(tmp.x);
@ -1472,7 +1472,7 @@ void GSRendererHW::SwSpriteRender()
// Apply TFX
pxAssert(tex0_tfx == 0 || tex0_tfx == 1);
if (tex0_tfx == 0)
sc = sc.mul16l(vc).srl16(7).clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned
sc = sc.mul16l(vc).srl16<7>().clamp8(); // clamp((sc * vc) >> 7, 0, 255), srl16 is ok because 16 bit values are unsigned
if (tex0_tcc == 0)
sc = sc.blend(vc, a_mask);
@ -1502,7 +1502,7 @@ void GSRendererHW::SwSpriteRender()
.ps32() // 0x00AA00AA00aa00aa00AA00AA00aa00aa
.xxyy(); // 0x00AA00AA00AA00AA00aa00aa00aa00aa
const GSVector4i D = alpha_d == 0 ? sc : alpha_d == 1 ? dc0 : GSVector4i::zero();
dc = A.sub16(B).mul16l(C).sra16(7).add16(D); // (((A - B) * C) >> 7) + D, must use sra16 due to signed 16 bit values.
dc = A.sub16(B).mul16l(C).sra16<7>().add16(D); // (((A - B) * C) >> 7) + D, must use sra16 due to signed 16 bit values.
// dc alpha channels (dc.u16[3], dc.u16[7]) dirty
}
else
@ -1514,7 +1514,7 @@ void GSRendererHW::SwSpriteRender()
if (m_draw_env->COLCLAMP.CLAMP)
dc = dc.clamp8(); // clamp(dc, 0, 255)
else
dc = dc.sll16(8).srl16(8); // Mask, lower 8 bits enabled per channel
dc = dc.sll16<8>().srl16<8>(); // Mask, lower 8 bits enabled per channel
// No Alpha Correction
pxAssert(m_context->FBA.FBA == 0);
@ -6535,8 +6535,8 @@ bool GSRendererHW::IsReallyDithered() const
void GSRendererHW::ReplaceVerticesWithSprite(const GSVector4i& unscaled_rect, const GSVector4i& unscaled_uv_rect,
const GSVector2i& unscaled_size, const GSVector4i& scissor)
{
const GSVector4i fpr = unscaled_rect.sll32(4);
const GSVector4i fpuv = unscaled_uv_rect.sll32(4);
const GSVector4i fpr = unscaled_rect.sll32<4>();
const GSVector4i fpuv = unscaled_uv_rect.sll32<4>();
GSVertex* v = m_vertex.buff;
v[0].XYZ.X = static_cast<u16>(m_context->XYOFFSET.OFX + fpr.x);
@ -6615,7 +6615,7 @@ GSHWDrawConfig& GSRendererHW::BeginHLEHardwareDraw(
vertices[i].V = v; \
} while (0)
const GSVector4i fp_rect = unscaled_rect.sll32(4);
const GSVector4i fp_rect = unscaled_rect.sll32<4>();
V(0, fp_rect.x, fp_rect.y, fp_rect.x, fp_rect.y); // top-left
V(1, fp_rect.z, fp_rect.y, fp_rect.z, fp_rect.y); // top-right
V(2, fp_rect.x, fp_rect.w, fp_rect.x, fp_rect.w); // bottom-left

View File

@ -323,7 +323,7 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u16* index, cons
c = c.upl16(c.zwxy());
if (sel.tfx == TFX_NONE)
c = c.srl16(7);
c = c.srl16<7>();
local.c.rb = c.xxxx();
local.c.ga = c.zzzz();
@ -516,9 +516,9 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
if (sel.edge)
{
#if _M_SSE >= 0x501
cov = GSVector8i::broadcast16(GSVector4i::cast(scan.p)).srl16(9);
cov = GSVector8i::broadcast16(GSVector4i::cast(scan.p)).srl16<9>();
#else
cov = GSVector4i::cast(scan.p).xxxxl().xxxx().srl16(9);
cov = GSVector4i::cast(scan.p).xxxxl().xxxx().srl16<9>();
#endif
}
@ -537,7 +537,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
else if (sel.ltf)
{
vf = v.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16<12>();
}
s = VectorF::cast(u);
@ -649,8 +649,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
switch (sel.zpsm)
{
case 1: zdo = zdo.sll32( 8).srl32( 8); break;
case 2: zdo = zdo.sll32(16).srl32(16); break;
case 1: zdo = zdo.sll32< 8>().srl32<8>(); break;
case 2: zdo = zdo.sll32<16>().srl32<16>(); break;
default: break;
}
@ -705,7 +705,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
lod += 0x8000;
}
lodi = lod.srl32(16);
lodi = lod.srl32<16>();
if (sel.mmin == 2) // trilinear mode
{
@ -787,11 +787,11 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16(12);
uf = u.xxzzlh().srl16<12>();
vf = v.xxzzlh().srl16<12>();
}
VectorI uv0 = u.sra32(16).ps32(v.sra32(16));
VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>());
VectorI uv1 = uv0;
{
@ -849,18 +849,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
VectorI rb00 = c00.sll16(8).srl16(8);
VectorI ga00 = c00.srl16(8);
VectorI rb01 = c01.sll16(8).srl16(8);
VectorI ga01 = c01.srl16(8);
VectorI rb00 = c00.sll16<8>().srl16<8>();
VectorI ga00 = c00.srl16<8>();
VectorI rb01 = c01.sll16<8>().srl16<8>();
VectorI ga01 = c01.srl16<8>();
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
VectorI rb10 = c10.sll16(8).srl16(8);
VectorI ga10 = c10.srl16(8);
VectorI rb11 = c11.sll16(8).srl16(8);
VectorI ga11 = c11.srl16(8);
VectorI rb10 = c10.sll16<8>().srl16<8>();
VectorI ga10 = c10.srl16<8>();
VectorI rb11 = c11.sll16<8>().srl16<8>();
VectorI ga11 = c11.srl16<8>();
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
@ -887,8 +887,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
rb = c00.sll16(8).srl16(8);
ga = c00.srl16(8);
rb = c00.sll16<8>().srl16<8>();
ga = c00.srl16<8>();
}
if (sel.mmin != 1) // !round-off mode
@ -897,22 +897,22 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
lodi += VectorI::x00000001();
u = uv[0].sra32(1);
v = uv[1].sra32(1);
u = uv[0].sra32<1>();
v = uv[1].sra32<1>();
minuv = minuv.srl16(1);
maxuv = maxuv.srl16(1);
minuv = minuv.srl16<1>();
maxuv = maxuv.srl16<1>();
if (sel.ltf)
{
u -= 0x8000;
v -= 0x8000;
uf = u.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16(12);
uf = u.xxzzlh().srl16<12>();
vf = v.xxzzlh().srl16<12>();
}
VectorI uv0 = u.sra32(16).ps32(v.sra32(16));
VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>());
VectorI uv1 = uv0;
{
@ -970,18 +970,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
VectorI rb00 = c00.sll16(8).srl16(8);
VectorI ga00 = c00.srl16(8);
VectorI rb01 = c01.sll16(8).srl16(8);
VectorI ga01 = c01.srl16(8);
VectorI rb00 = c00.sll16<8>().srl16<8>();
VectorI ga00 = c00.srl16<8>();
VectorI rb01 = c01.sll16<8>().srl16<8>();
VectorI ga01 = c01.srl16<8>();
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
VectorI rb10 = c10.sll16(8).srl16(8);
VectorI ga10 = c10.srl16(8);
VectorI rb11 = c11.sll16(8).srl16(8);
VectorI ga11 = c11.srl16(8);
VectorI rb10 = c10.sll16<8>().srl16<8>();
VectorI ga10 = c10.srl16<8>();
VectorI rb11 = c11.sll16<8>().srl16<8>();
VectorI ga11 = c11.srl16<8>();
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
@ -1008,14 +1008,14 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
}
rb2 = c00.sll16(8).srl16(8);
ga2 = c00.srl16(8);
rb2 = c00.sll16<8>().srl16<8>();
ga2 = c00.srl16<8>();
}
if (sel.lcm)
lodf = global.lod.f;
lodf = lodf.srl16(1);
lodf = lodf.srl16<1>();
rb = rb.lerp16<0>(rb2, lodf);
ga = ga.lerp16<0>(ga2, lodf);
@ -1042,15 +1042,15 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
if (sel.ltf)
{
uf = u.xxzzlh().srl16(12);
uf = u.xxzzlh().srl16<12>();
if (sel.prim != GS_SPRITE_CLASS)
{
vf = v.xxzzlh().srl16(12);
vf = v.xxzzlh().srl16<12>();
}
}
VectorI uv0 = u.sra32(16).ps32(v.sra32(16));
VectorI uv0 = u.sra32<16>().ps32(v.sra32<16>());
VectorI uv1 = uv0;
VectorI tmin = VectorI::broadcast128(global.t.min);
@ -1105,18 +1105,18 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
c11 = addr11.gather32_32(tex);
}
VectorI rb00 = c00.sll16(8).srl16(8);
VectorI ga00 = c00.srl16(8);
VectorI rb01 = c01.sll16(8).srl16(8);
VectorI ga01 = c01.srl16(8);
VectorI rb00 = c00.sll16<8>().srl16<8>();
VectorI ga00 = c00.srl16<8>();
VectorI rb01 = c01.sll16<8>().srl16<8>();
VectorI ga01 = c01.srl16<8>();
rb00 = rb00.lerp16_4(rb01, uf);
ga00 = ga00.lerp16_4(ga01, uf);
VectorI rb10 = c10.sll16(8).srl16(8);
VectorI ga10 = c10.srl16(8);
VectorI rb11 = c11.sll16(8).srl16(8);
VectorI ga11 = c11.srl16(8);
VectorI rb10 = c10.sll16<8>().srl16<8>();
VectorI ga10 = c10.srl16<8>();
VectorI rb11 = c11.sll16<8>().srl16<8>();
VectorI ga11 = c11.srl16<8>();
rb10 = rb10.lerp16_4(rb11, uf);
ga10 = ga10.lerp16_4(ga11, uf);
@ -1137,8 +1137,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
c00 = addr00.gather32_32((const u32*)global.tex[0]);
}
rb = c00.sll16(8).srl16(8);
ga = c00.srl16(8);
rb = c00.sll16<8>().srl16<8>();
ga = c00.srl16<8>();
}
}
}
@ -1152,21 +1152,21 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
case TFX_MODULATE:
ga = ga.modulate16<1>(gaf).clamp8();
if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
ga = ga.mix16(gaf.srl16<7>());
break;
case TFX_DECAL:
if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
ga = ga.mix16(gaf.srl16<7>());
break;
case TFX_HIGHLIGHT:
ga = ga.mix16(!sel.tcc ? gaf.srl16(7) : ga.addus8(gaf.srl16(7)));
ga = ga.mix16(!sel.tcc ? gaf.srl16<7>() : ga.addus8(gaf.srl16<7>()));
break;
case TFX_HIGHLIGHT2:
if (!sel.tcc)
ga = ga.mix16(gaf.srl16(7));
ga = ga.mix16(gaf.srl16<7>());
break;
case TFX_NONE:
ga = sel.iip ? gaf.srl16(7) : gaf;
ga = sel.iip ? gaf.srl16<7>() : gaf;
break;
}
@ -1182,7 +1182,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
}
else
{
ga = ga.blend8(a, ga.eq16(x00800080).srl32(16).sll32(16));
ga = ga.blend8(a, ga.eq16(x00800080).srl32<16>().sll32<16>());
}
}
}
@ -1219,12 +1219,12 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
break;
case TFX_HIGHLIGHT:
case TFX_HIGHLIGHT2:
af = gaf.yywwlh().srl16(7);
af = gaf.yywwlh().srl16<7>();
rb = rb.modulate16<1>(rbf).add16(af).clamp8();
ga = ga.modulate16<1>(gaf).add16(af).clamp8().mix16(ga);
break;
case TFX_NONE:
rb = sel.iip ? rbf.srl16(7) : rbf;
rb = sel.iip ? rbf.srl16<7>() : rbf;
break;
}
}
@ -1249,12 +1249,12 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
ga = fga.lerp16<0>(ga, fog).mix16(ga);
/*
fog = fog.srl16(7);
fog = fog.srl16<7>();
VectorI ifog = VectorI::x00ff().sub16(fog);
rb = rb.mul16l(fog).add16(frb.mul16l(ifog)).srl16(8);
ga = ga.mul16l(fog).add16(fga.mul16l(ifog)).srl16(8).mix16(ga);
rb = rb.mul16l(fog).add16(frb.mul16l(ifog)).srl16<8>();
ga = ga.mul16l(fog).add16(fga.mul16l(ifog)).srl16<8>().mix16(ga);
*/
}
@ -1285,22 +1285,22 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
if (sel.fpsm == 2)
{
// test |= fd.srl32(15) == VectorI::zero();
test |= fd.sll32(16).sra32(31) == VectorI::zero();
test |= fd.sll32<16>().sra32<31>() == VectorI::zero();
}
else
{
test |= (~fd).sra32(31);
test |= (~fd).sra32<31>();
}
}
else
{
if (sel.fpsm == 2)
{
test |= fd.sll32(16).sra32(31); // == VectorI::xffffffff();
test |= fd.sll32<16>().sra32<31>(); // == VectorI::xffffffff();
}
else
{
test |= fd.sra32(31);
test |= fd.sra32<31>();
}
}
@ -1419,8 +1419,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
{
case 0:
case 1:
rbd = fd.sll16(8).srl16(8);
gad = fd.srl16(8);
rbd = fd.sll16<8>().srl16<8>();
gad = fd.srl16<8>();
break;
case 2:
rbd = ((fd & 0x7c00) << 9) | ((fd & 0x001f) << 3);
@ -1449,8 +1449,8 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
{
switch(sel.abc)
{
case 0: a = gas.yywwlh().sll16(7); break;
case 1: a = gad.yywwlh().sll16(7); break;
case 0: a = gas.yywwlh().sll16<7>(); break;
case 1: a = gad.yywwlh().sll16<7>(); break;
case 2: a = global.afix; break;
}
@ -1476,7 +1476,7 @@ __ri void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSV
if (sel.pabe)
{
mask = (gas << 8).sra32(31);
mask = (gas << 8).sra32<31>();
rb = rbs.blend8(rb, mask);
}