mirror of https://github.com/PCSX2/pcsx2.git
GSdx: Don't use constexpr vector constructors at runtime
The MSVC implementation is slow
This commit is contained in:
parent
f8b8b5db13
commit
fcee1e5496
|
@ -31,15 +31,15 @@ CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3,
|
|||
CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
CONSTINIT const GSVector8i GSBlock::m_xxxa(GSVector8i::cxpr_set1_epi32(0x00008000));
|
||||
CONSTINIT const GSVector8i GSBlock::m_xxbx(GSVector8i::cxpr_set1_epi32(0x00007c00));
|
||||
CONSTINIT const GSVector8i GSBlock::m_xgxx(GSVector8i::cxpr_set1_epi32(0x000003e0));
|
||||
CONSTINIT const GSVector8i GSBlock::m_rxxx(GSVector8i::cxpr_set1_epi32(0x0000001f));
|
||||
CONSTINIT const GSVector8i GSBlock::m_xxxa = GSVector8i::cxpr(0x00008000);
|
||||
CONSTINIT const GSVector8i GSBlock::m_xxbx = GSVector8i::cxpr(0x00007c00);
|
||||
CONSTINIT const GSVector8i GSBlock::m_xgxx = GSVector8i::cxpr(0x000003e0);
|
||||
CONSTINIT const GSVector8i GSBlock::m_rxxx = GSVector8i::cxpr(0x0000001f);
|
||||
#else
|
||||
CONSTINIT const GSVector4i GSBlock::m_xxxa(GSVector4i::cxpr_set1_epi32(0x00008000));
|
||||
CONSTINIT const GSVector4i GSBlock::m_xxbx(GSVector4i::cxpr_set1_epi32(0x00007c00));
|
||||
CONSTINIT const GSVector4i GSBlock::m_xgxx(GSVector4i::cxpr_set1_epi32(0x000003e0));
|
||||
CONSTINIT const GSVector4i GSBlock::m_rxxx(GSVector4i::cxpr_set1_epi32(0x0000001f));
|
||||
CONSTINIT const GSVector4i GSBlock::m_xxxa = GSVector4i::cxpr(0x00008000);
|
||||
CONSTINIT const GSVector4i GSBlock::m_xxbx = GSVector4i::cxpr(0x00007c00);
|
||||
CONSTINIT const GSVector4i GSBlock::m_xgxx = GSVector4i::cxpr(0x000003e0);
|
||||
CONSTINIT const GSVector4i GSBlock::m_rxxx = GSVector4i::cxpr(0x0000001f);
|
||||
#endif
|
||||
|
||||
CONSTINIT const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
||||
|
|
|
@ -731,9 +731,9 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
|
|||
|
||||
// TODO
|
||||
|
||||
CONSTINIT const GSVector4i GSClut::m_bm(GSVector4i::cxpr_set1_epi32(0x00007c00));
|
||||
CONSTINIT const GSVector4i GSClut::m_gm(GSVector4i::cxpr_set1_epi32(0x000003e0));
|
||||
CONSTINIT const GSVector4i GSClut::m_rm(GSVector4i::cxpr_set1_epi32(0x0000001f));
|
||||
CONSTINIT const GSVector4i GSClut::m_bm = GSVector4i::cxpr(0x00007c00);
|
||||
CONSTINIT const GSVector4i GSClut::m_gm = GSVector4i::cxpr(0x000003e0);
|
||||
CONSTINIT const GSVector4i GSClut::m_rm = GSVector4i::cxpr(0x0000001f);
|
||||
|
||||
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
|
|
|
@ -24,143 +24,143 @@
|
|||
|
||||
CONSTINIT const GSVector4i GSVector4i::m_xff[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
CONSTINIT const GSVector4i GSVector4i::m_x0f[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
|
||||
CONSTINIT const GSVector4 GSVector4::m_ps0123(cxpr_setr_ps(0.0f, 1.0f, 2.0f, 3.0f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_ps4567(cxpr_setr_ps(4.0f, 5.0f, 6.0f, 7.0f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_half(cxpr_set1_ps(0.5f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_one(cxpr_set1_ps(1.0f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_two(cxpr_set1_ps(2.0f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_four(cxpr_set1_ps(4.0f));
|
||||
CONSTINIT const GSVector4 GSVector4::m_x4b000000(cxpr_set1_epi32(0x4b000000));
|
||||
CONSTINIT const GSVector4 GSVector4::m_x4f800000(cxpr_set1_epi32(0x4f800000));
|
||||
CONSTINIT const GSVector4 GSVector4::m_max(cxpr_set1_ps(FLT_MAX));
|
||||
CONSTINIT const GSVector4 GSVector4::m_min(cxpr_set1_ps(FLT_MIN));
|
||||
CONSTINIT const GSVector4 GSVector4::m_ps0123 = cxpr(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_ps4567 = cxpr(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_half = cxpr(0.5f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_one = cxpr(1.0f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_two = cxpr(2.0f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_four = cxpr(4.0f);
|
||||
CONSTINIT const GSVector4 GSVector4::m_x4b000000 = cxpr(0x4b000000);
|
||||
CONSTINIT const GSVector4 GSVector4::m_x4f800000 = cxpr(0x4f800000);
|
||||
CONSTINIT const GSVector4 GSVector4::m_max = cxpr(FLT_MAX);
|
||||
CONSTINIT const GSVector4 GSVector4::m_min = cxpr(FLT_MIN);
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
CONSTINIT const GSVector8 GSVector8::m_half(cxpr_set1_ps(0.5f));
|
||||
CONSTINIT const GSVector8 GSVector8::m_one(cxpr_set1_ps(1.0f));
|
||||
CONSTINIT const GSVector8 GSVector8::m_x7fffffff(cxpr_set1_epi32(0x7fffffff));
|
||||
CONSTINIT const GSVector8 GSVector8::m_x80000000(cxpr_set1_epi32(0x80000000));
|
||||
CONSTINIT const GSVector8 GSVector8::m_x4b000000(cxpr_set1_epi32(0x4b000000));
|
||||
CONSTINIT const GSVector8 GSVector8::m_x4f800000(cxpr_set1_epi32(0x4f800000));
|
||||
CONSTINIT const GSVector8 GSVector8::m_max(cxpr_set1_ps(FLT_MAX));
|
||||
CONSTINIT const GSVector8 GSVector8::m_min(cxpr_set1_ps(FLT_MAX));
|
||||
CONSTINIT const GSVector8 GSVector8::m_half = cxpr(0.5f);
|
||||
CONSTINIT const GSVector8 GSVector8::m_one = cxpr(1.0f);
|
||||
CONSTINIT const GSVector8 GSVector8::m_x7fffffff = cxpr(0x7fffffff);
|
||||
CONSTINIT const GSVector8 GSVector8::m_x80000000 = cxpr(0x80000000);
|
||||
CONSTINIT const GSVector8 GSVector8::m_x4b000000 = cxpr(0x4b000000);
|
||||
CONSTINIT const GSVector8 GSVector8::m_x4f800000 = cxpr(0x4f800000);
|
||||
CONSTINIT const GSVector8 GSVector8::m_max = cxpr(FLT_MAX);
|
||||
CONSTINIT const GSVector8 GSVector8::m_min = cxpr(FLT_MAX);
|
||||
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
CONSTINIT const GSVector8i GSVector8i::m_xff[33] =
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
CONSTINIT const GSVector8i GSVector8i::m_x0f[33] =
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
|
||||
class alignas(16) GSVector4
|
||||
{
|
||||
public:
|
||||
constexpr static __m128 cxpr_setr_ps(float x, float y, float z, float w)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
|
@ -35,29 +34,22 @@ public:
|
|||
return m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m128 cxpr_set1_ps(float x)
|
||||
{
|
||||
return cxpr_setr_ps(x, x, x, x);
|
||||
}
|
||||
|
||||
constexpr static __m128 cxpr_setr_epi32(uint32 x, uint32 y, uint32 z, uint32 w)
|
||||
constexpr static __m128 cxpr_setr_epi32(int x, int y, int z, int w)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m128)(__v4su{x, y, z, w});
|
||||
return (__m128)(__v4si{x, y, z, w});
|
||||
#else
|
||||
__m128 m = {};
|
||||
m.m128_u32[0] = x;
|
||||
m.m128_u32[1] = y;
|
||||
m.m128_u32[2] = z;
|
||||
m.m128_u32[3] = w;
|
||||
m.m128_i32[0] = x;
|
||||
m.m128_i32[1] = y;
|
||||
m.m128_i32[2] = z;
|
||||
m.m128_i32[3] = w;
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m128 cxpr_set1_epi32(uint32 x)
|
||||
{
|
||||
return cxpr_setr_epi32(x, x, x, x);
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct {float x, y, z, w;};
|
||||
|
@ -91,9 +83,29 @@ public:
|
|||
|
||||
constexpr GSVector4(const GSVector4&) = default;
|
||||
|
||||
constexpr GSVector4(float x, float y, float z, float w)
|
||||
: m(cxpr_setr_ps(x, y, z, w))
|
||||
constexpr static GSVector4 cxpr(float x, float y, float z, float w)
|
||||
{
|
||||
return GSVector4(cxpr_setr_ps(x, y, z, w));
|
||||
}
|
||||
|
||||
constexpr static GSVector4 cxpr(float x)
|
||||
{
|
||||
return GSVector4(cxpr_setr_ps(x, x, x, x));
|
||||
}
|
||||
|
||||
constexpr static GSVector4 cxpr(int x, int y, int z, int w)
|
||||
{
|
||||
return GSVector4(cxpr_setr_epi32(x, y, z, w));
|
||||
}
|
||||
|
||||
constexpr static GSVector4 cxpr(int x)
|
||||
{
|
||||
return GSVector4(cxpr_setr_epi32(x, x, x, x));
|
||||
}
|
||||
|
||||
__forceinline GSVector4(float x, float y, float z, float w)
|
||||
{
|
||||
m = _mm_set_ps(w, z, y, x);
|
||||
}
|
||||
|
||||
__forceinline GSVector4(float x, float y)
|
||||
|
@ -113,12 +125,6 @@ public:
|
|||
m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y)));
|
||||
}
|
||||
|
||||
//Not currently used, just causes a compiler warning
|
||||
/*__forceinline GSVector4(const GSVector4& v)
|
||||
{
|
||||
m = v.m;
|
||||
}*/
|
||||
|
||||
__forceinline explicit GSVector4(const GSVector2& v)
|
||||
{
|
||||
m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v));
|
||||
|
|
|
@ -24,38 +24,34 @@ class alignas(16) GSVector4i
|
|||
static const GSVector4i m_xff[17];
|
||||
static const GSVector4i m_x0f[17];
|
||||
|
||||
public:
|
||||
constexpr static __m128i cxpr_setr_epi32(uint32 x, uint32 y, uint32 z, uint32 w)
|
||||
constexpr static __m128i cxpr_setr_epi32(int x, int y, int z, int w)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m128i)(__v4su{x, y, z, w});
|
||||
return (__m128i)(__v4si{x, y, z, w});
|
||||
#else
|
||||
__m128i m = {};
|
||||
m.m128i_u32[0] = x;
|
||||
m.m128i_u32[1] = y;
|
||||
m.m128i_u32[2] = z;
|
||||
m.m128i_u32[3] = w;
|
||||
m.m128i_i32[0] = x;
|
||||
m.m128i_i32[1] = y;
|
||||
m.m128i_i32[2] = z;
|
||||
m.m128i_i32[3] = w;
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m128i cxpr_set1_epi32(uint32 x)
|
||||
{
|
||||
return cxpr_setr_epi32(x, x, x, x);
|
||||
}
|
||||
constexpr static __m128i cxpr_setr_epi8(uint8 b0, uint8 b1, uint8 b2, uint8 b3, uint8 b4, uint8 b5, uint8 b6, uint8 b7, uint8 b8, uint8 b9, uint8 b10, uint8 b11, uint8 b12, uint8 b13, uint8 b14, uint8 b15)
|
||||
constexpr static __m128i cxpr_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m128i)__v16qu{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15};
|
||||
return (__m128i)__v16qi{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15};
|
||||
#else
|
||||
__m128i m = {};
|
||||
m.m128i_u8[0] = b0; m.m128i_u8[1] = b1; m.m128i_u8[2] = b2; m.m128i_u8[3] = b3;
|
||||
m.m128i_u8[4] = b4; m.m128i_u8[5] = b5; m.m128i_u8[6] = b6; m.m128i_u8[7] = b7;
|
||||
m.m128i_u8[8] = b8; m.m128i_u8[9] = b9; m.m128i_u8[10] = b10; m.m128i_u8[11] = b11;
|
||||
m.m128i_u8[12] = b12; m.m128i_u8[13] = b13; m.m128i_u8[14] = b14; m.m128i_u8[15] = b15;
|
||||
m.m128i_i8[0] = b0; m.m128i_i8[1] = b1; m.m128i_i8[2] = b2; m.m128i_i8[3] = b3;
|
||||
m.m128i_i8[4] = b4; m.m128i_i8[5] = b5; m.m128i_i8[6] = b6; m.m128i_i8[7] = b7;
|
||||
m.m128i_i8[8] = b8; m.m128i_i8[9] = b9; m.m128i_i8[10] = b10; m.m128i_i8[11] = b11;
|
||||
m.m128i_i8[12] = b12; m.m128i_i8[13] = b13; m.m128i_i8[14] = b14; m.m128i_i8[15] = b15;
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct {int x, y, z, w;};
|
||||
|
@ -74,13 +70,32 @@ public:
|
|||
__m128i m;
|
||||
};
|
||||
|
||||
constexpr GSVector4i(): m(cxpr_set1_epi32(0))
|
||||
constexpr GSVector4i(): x(0), y(0), z(0), w(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr GSVector4i(int x, int y, int z, int w)
|
||||
: m(cxpr_setr_epi32(x, y, z, w))
|
||||
constexpr static GSVector4i cxpr(int x, int y, int z, int w)
|
||||
{
|
||||
return GSVector4i(cxpr_setr_epi32(x, y, z, w));
|
||||
}
|
||||
|
||||
constexpr static GSVector4i cxpr(int x)
|
||||
{
|
||||
return GSVector4i(cxpr_setr_epi32(x, x, x, x));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i(int x, int y, int z, int w)
|
||||
{
|
||||
// 4 gprs
|
||||
|
||||
// m = _mm_set_epi32(w, z, y, x);
|
||||
|
||||
// 2 gprs
|
||||
|
||||
GSVector4i xz = load(x).upl32(load(z));
|
||||
GSVector4i yw = load(y).upl32(load(w));
|
||||
|
||||
*this = xz.upl32(yw);
|
||||
}
|
||||
|
||||
__forceinline GSVector4i(int x, int y)
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
|
||||
class alignas(32) GSVector8
|
||||
{
|
||||
public:
|
||||
constexpr static __m256 cxpr_setr_ps(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
|
@ -41,33 +40,26 @@ public:
|
|||
return m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m256 cxpr_set1_ps(float x)
|
||||
{
|
||||
return cxpr_setr_ps(x, x, x, x, x, x, x, x);
|
||||
}
|
||||
|
||||
constexpr static __m256 cxpr_setr_epi32(uint32 x0, uint32 y0, uint32 z0, uint32 w0, uint32 x1, uint32 y1, uint32 z1, uint32 w1)
|
||||
constexpr static __m256 cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m256)__v8su{x0, y0, z0, w0, x1, y1, z1, w1};
|
||||
return (__m256)__v8si{x0, y0, z0, w0, x1, y1, z1, w1};
|
||||
#else
|
||||
union { __m256 m; uint32 u[8]; } t = {};
|
||||
t.u[0] = x0;
|
||||
t.u[1] = y0;
|
||||
t.u[2] = z0;
|
||||
t.u[3] = w0;
|
||||
t.u[4] = x1;
|
||||
t.u[5] = y1;
|
||||
t.u[6] = z1;
|
||||
t.u[7] = w1;
|
||||
union { __m256 m; int i[8]; } t = {};
|
||||
t.i[0] = x0;
|
||||
t.i[1] = y0;
|
||||
t.i[2] = z0;
|
||||
t.i[3] = w0;
|
||||
t.i[4] = x1;
|
||||
t.i[5] = y1;
|
||||
t.i[6] = z1;
|
||||
t.i[7] = w1;
|
||||
return t.m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m256 cxpr_set1_epi32(uint32 x)
|
||||
{
|
||||
return cxpr_setr_epi32(x, x, x, x, x, x, x, x);
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct {float x0, y0, z0, w0, x1, y1, z1, w1;};
|
||||
|
@ -97,9 +89,34 @@ public:
|
|||
|
||||
GSVector8() = default;
|
||||
|
||||
constexpr GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
||||
: m(cxpr_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1))
|
||||
static constexpr GSVector8 cxpr(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
||||
{
|
||||
return GSVector8(cxpr_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1));
|
||||
}
|
||||
|
||||
static constexpr GSVector8 cxpr(float x)
|
||||
{
|
||||
return GSVector8(cxpr_setr_ps(x, x, x, x, x, x, x, x));
|
||||
}
|
||||
|
||||
static constexpr GSVector8 cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
{
|
||||
return GSVector8(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1));
|
||||
}
|
||||
|
||||
static constexpr GSVector8 cxpr(int x)
|
||||
{
|
||||
return GSVector8(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
|
||||
}
|
||||
|
||||
static constexpr GSVector8 cxpr(uint32 x)
|
||||
{
|
||||
return cxpr(static_cast<int>(x));
|
||||
}
|
||||
|
||||
__forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
||||
{
|
||||
m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
|
||||
}
|
||||
|
||||
__forceinline GSVector8(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
|
|
|
@ -26,54 +26,50 @@ class alignas(32) GSVector8i
|
|||
static const GSVector8i m_xff[33];
|
||||
static const GSVector8i m_x0f[33];
|
||||
|
||||
public:
|
||||
constexpr static __m256i cxpr_setr_epi32(uint32 x0, uint32 y0, uint32 z0, uint32 w0, uint32 x1, uint32 y1, uint32 z1, uint32 w1)
|
||||
constexpr static __m256i cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m256i)__v8su{x0, y0, z0, w0, x1, y1, z1, w1};
|
||||
return (__m256i)__v8si{x0, y0, z0, w0, x1, y1, z1, w1};
|
||||
#else
|
||||
__m256i m = {};
|
||||
m.m256i_u32[0] = x0;
|
||||
m.m256i_u32[1] = y0;
|
||||
m.m256i_u32[2] = z0;
|
||||
m.m256i_u32[3] = w0;
|
||||
m.m256i_u32[4] = x1;
|
||||
m.m256i_u32[5] = y1;
|
||||
m.m256i_u32[6] = z1;
|
||||
m.m256i_u32[7] = w1;
|
||||
m.m256i_i32[0] = x0;
|
||||
m.m256i_i32[1] = y0;
|
||||
m.m256i_i32[2] = z0;
|
||||
m.m256i_i32[3] = w0;
|
||||
m.m256i_i32[4] = x1;
|
||||
m.m256i_i32[5] = y1;
|
||||
m.m256i_i32[6] = z1;
|
||||
m.m256i_i32[7] = w1;
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
constexpr static __m256i cxpr_set1_epi32(uint32 x)
|
||||
{
|
||||
return cxpr_setr_epi32(x, x, x, x, x, x, x, x);
|
||||
}
|
||||
constexpr static __m256i cxpr_setr_epi8(
|
||||
uint8 b0, uint8 b1, uint8 b2, uint8 b3, uint8 b4, uint8 b5, uint8 b6, uint8 b7,
|
||||
uint8 b8, uint8 b9, uint8 b10, uint8 b11, uint8 b12, uint8 b13, uint8 b14, uint8 b15,
|
||||
uint8 b16, uint8 b17, uint8 b18, uint8 b19, uint8 b20, uint8 b21, uint8 b22, uint8 b23,
|
||||
uint8 b24, uint8 b25, uint8 b26, uint8 b27, uint8 b28, uint8 b29, uint8 b30, uint8 b31)
|
||||
char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7,
|
||||
char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15,
|
||||
char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23,
|
||||
char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31)
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
return (__m256i)__v32qu
|
||||
return (__m256i)__v32qi
|
||||
{
|
||||
b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15,
|
||||
b16, b17, b18, b19, b20, b21, b22, b23, b24, b25, b26, b27, b28, b29, b30, b31,
|
||||
};
|
||||
#else
|
||||
__m256i m = {};
|
||||
m.m256i_u8[0] = b0; m.m256i_u8[1] = b1; m.m256i_u8[2] = b2; m.m256i_u8[3] = b3;
|
||||
m.m256i_u8[4] = b4; m.m256i_u8[5] = b5; m.m256i_u8[6] = b6; m.m256i_u8[7] = b7;
|
||||
m.m256i_u8[8] = b8; m.m256i_u8[9] = b9; m.m256i_u8[10] = b10; m.m256i_u8[11] = b11;
|
||||
m.m256i_u8[12] = b12; m.m256i_u8[13] = b13; m.m256i_u8[14] = b14; m.m256i_u8[15] = b15;
|
||||
m.m256i_u8[16] = b16; m.m256i_u8[17] = b17; m.m256i_u8[18] = b18; m.m256i_u8[19] = b19;
|
||||
m.m256i_u8[20] = b20; m.m256i_u8[21] = b21; m.m256i_u8[22] = b22; m.m256i_u8[23] = b23;
|
||||
m.m256i_u8[24] = b24; m.m256i_u8[25] = b25; m.m256i_u8[26] = b26; m.m256i_u8[27] = b27;
|
||||
m.m256i_u8[28] = b28; m.m256i_u8[29] = b29; m.m256i_u8[30] = b30; m.m256i_u8[31] = b31;
|
||||
m.m256i_i8[0] = b0; m.m256i_i8[1] = b1; m.m256i_i8[2] = b2; m.m256i_i8[3] = b3;
|
||||
m.m256i_i8[4] = b4; m.m256i_i8[5] = b5; m.m256i_i8[6] = b6; m.m256i_i8[7] = b7;
|
||||
m.m256i_i8[8] = b8; m.m256i_i8[9] = b9; m.m256i_i8[10] = b10; m.m256i_i8[11] = b11;
|
||||
m.m256i_i8[12] = b12; m.m256i_i8[13] = b13; m.m256i_i8[14] = b14; m.m256i_i8[15] = b15;
|
||||
m.m256i_i8[16] = b16; m.m256i_i8[17] = b17; m.m256i_i8[18] = b18; m.m256i_i8[19] = b19;
|
||||
m.m256i_i8[20] = b20; m.m256i_i8[21] = b21; m.m256i_i8[22] = b22; m.m256i_i8[23] = b23;
|
||||
m.m256i_i8[24] = b24; m.m256i_i8[25] = b25; m.m256i_i8[26] = b26; m.m256i_i8[27] = b27;
|
||||
m.m256i_i8[28] = b28; m.m256i_i8[29] = b29; m.m256i_i8[30] = b30; m.m256i_i8[31] = b31;
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct {int x0, y0, z0, w0, x1, y1, z1, w1;};
|
||||
|
@ -94,15 +90,25 @@ public:
|
|||
|
||||
GSVector8i() = default;
|
||||
|
||||
static constexpr GSVector8i cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
{
|
||||
return GSVector8i(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1));
|
||||
}
|
||||
|
||||
static constexpr GSVector8i cxpr(int x)
|
||||
{
|
||||
return GSVector8i(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
|
||||
}
|
||||
|
||||
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
|
||||
|
||||
__forceinline static GSVector8i cast(const GSVector8& v);
|
||||
__forceinline static GSVector8i cast(const GSVector4& v);
|
||||
__forceinline static GSVector8i cast(const GSVector4i& v);
|
||||
|
||||
constexpr GSVector8i(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
: m(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1))
|
||||
__forceinline GSVector8i(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
|
||||
{
|
||||
m = _mm256_set_epi32(x0, y0, z0, w0, x1, y1, z1, w1);
|
||||
}
|
||||
|
||||
__forceinline GSVector8i(
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "GSUtil.h"
|
||||
#include "GSState.h"
|
||||
|
||||
CONSTINIT const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX, 0.f, 0.f);
|
||||
CONSTINIT const GSVector4 GSVertexTrace::s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f);
|
||||
|
||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_accurate_stq(false), m_state(state), m_primclass(GS_INVALID_CLASS)
|
||||
|
|
|
@ -26,9 +26,9 @@
|
|||
|
||||
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
||||
|
||||
CONSTINIT const GSVector4 GSRendererSW::m_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
CONSTINIT const GSVector4 GSRendererSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
#if _M_SSE >= 0x501
|
||||
CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
#endif
|
||||
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
|
|
Loading…
Reference in New Issue