diff --git a/plugins/GSdx/GSBlock.cpp b/plugins/GSdx/GSBlock.cpp index d6751bc2fa..d92c5aa172 100644 --- a/plugins/GSdx/GSBlock.cpp +++ b/plugins/GSdx/GSBlock.cpp @@ -31,15 +31,15 @@ CONSTINIT const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, CONSTINIT const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); #if _M_SSE >= 0x501 -CONSTINIT const GSVector8i GSBlock::m_xxxa(GSVector8i::cxpr_set1_epi32(0x00008000)); -CONSTINIT const GSVector8i GSBlock::m_xxbx(GSVector8i::cxpr_set1_epi32(0x00007c00)); -CONSTINIT const GSVector8i GSBlock::m_xgxx(GSVector8i::cxpr_set1_epi32(0x000003e0)); -CONSTINIT const GSVector8i GSBlock::m_rxxx(GSVector8i::cxpr_set1_epi32(0x0000001f)); +CONSTINIT const GSVector8i GSBlock::m_xxxa = GSVector8i::cxpr(0x00008000); +CONSTINIT const GSVector8i GSBlock::m_xxbx = GSVector8i::cxpr(0x00007c00); +CONSTINIT const GSVector8i GSBlock::m_xgxx = GSVector8i::cxpr(0x000003e0); +CONSTINIT const GSVector8i GSBlock::m_rxxx = GSVector8i::cxpr(0x0000001f); #else -CONSTINIT const GSVector4i GSBlock::m_xxxa(GSVector4i::cxpr_set1_epi32(0x00008000)); -CONSTINIT const GSVector4i GSBlock::m_xxbx(GSVector4i::cxpr_set1_epi32(0x00007c00)); -CONSTINIT const GSVector4i GSBlock::m_xgxx(GSVector4i::cxpr_set1_epi32(0x000003e0)); -CONSTINIT const GSVector4i GSBlock::m_rxxx(GSVector4i::cxpr_set1_epi32(0x0000001f)); +CONSTINIT const GSVector4i GSBlock::m_xxxa = GSVector4i::cxpr(0x00008000); +CONSTINIT const GSVector4i GSBlock::m_xxbx = GSVector4i::cxpr(0x00007c00); +CONSTINIT const GSVector4i GSBlock::m_xgxx = GSVector4i::cxpr(0x000003e0); +CONSTINIT const GSVector4i GSBlock::m_rxxx = GSVector4i::cxpr(0x0000001f); #endif CONSTINIT const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); diff --git a/plugins/GSdx/GSClut.cpp b/plugins/GSdx/GSClut.cpp index 86abe123fb..d40139be73 100644 --- a/plugins/GSdx/GSClut.cpp +++ b/plugins/GSdx/GSClut.cpp @@ -731,9 +731,9 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector // TODO -CONSTINIT const GSVector4i GSClut::m_bm(GSVector4i::cxpr_set1_epi32(0x00007c00)); -CONSTINIT const GSVector4i GSClut::m_gm(GSVector4i::cxpr_set1_epi32(0x000003e0)); -CONSTINIT const GSVector4i GSClut::m_rm(GSVector4i::cxpr_set1_epi32(0x0000001f)); +CONSTINIT const GSVector4i GSClut::m_bm = GSVector4i::cxpr(0x00007c00); +CONSTINIT const GSVector4i GSClut::m_gm = GSVector4i::cxpr(0x000003e0); +CONSTINIT const GSVector4i GSClut::m_rm = GSVector4i::cxpr(0x0000001f); void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA) { diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index 0139b96200..c238af0ab6 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -24,143 +24,143 @@ CONSTINIT const GSVector4i GSVector4i::m_xff[17] = { - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), + cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x000000ff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), }; CONSTINIT const GSVector4i GSVector4i::m_x0f[17] = { - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), + cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0000000f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), }; -CONSTINIT const GSVector4 GSVector4::m_ps0123(cxpr_setr_ps(0.0f, 1.0f, 2.0f, 3.0f)); -CONSTINIT const GSVector4 GSVector4::m_ps4567(cxpr_setr_ps(4.0f, 5.0f, 6.0f, 7.0f)); -CONSTINIT const GSVector4 GSVector4::m_half(cxpr_set1_ps(0.5f)); -CONSTINIT const GSVector4 GSVector4::m_one(cxpr_set1_ps(1.0f)); -CONSTINIT const GSVector4 GSVector4::m_two(cxpr_set1_ps(2.0f)); -CONSTINIT const GSVector4 GSVector4::m_four(cxpr_set1_ps(4.0f)); -CONSTINIT const GSVector4 GSVector4::m_x4b000000(cxpr_set1_epi32(0x4b000000)); -CONSTINIT const GSVector4 GSVector4::m_x4f800000(cxpr_set1_epi32(0x4f800000)); -CONSTINIT const GSVector4 GSVector4::m_max(cxpr_set1_ps(FLT_MAX)); -CONSTINIT const GSVector4 GSVector4::m_min(cxpr_set1_ps(FLT_MIN)); +CONSTINIT const GSVector4 GSVector4::m_ps0123 = cxpr(0.0f, 1.0f, 2.0f, 3.0f); +CONSTINIT const GSVector4 GSVector4::m_ps4567 = cxpr(4.0f, 5.0f, 6.0f, 7.0f); +CONSTINIT const GSVector4 GSVector4::m_half = cxpr(0.5f); +CONSTINIT const GSVector4 GSVector4::m_one = cxpr(1.0f); +CONSTINIT const GSVector4 GSVector4::m_two = cxpr(2.0f); +CONSTINIT const GSVector4 GSVector4::m_four = cxpr(4.0f); +CONSTINIT const GSVector4 GSVector4::m_x4b000000 = cxpr(0x4b000000); +CONSTINIT const GSVector4 GSVector4::m_x4f800000 = cxpr(0x4f800000); +CONSTINIT const GSVector4 GSVector4::m_max = cxpr(FLT_MAX); +CONSTINIT const GSVector4 GSVector4::m_min = cxpr(FLT_MIN); #if _M_SSE >= 0x500 -CONSTINIT const GSVector8 GSVector8::m_half(cxpr_set1_ps(0.5f)); -CONSTINIT const GSVector8 GSVector8::m_one(cxpr_set1_ps(1.0f)); -CONSTINIT const GSVector8 GSVector8::m_x7fffffff(cxpr_set1_epi32(0x7fffffff)); -CONSTINIT const GSVector8 GSVector8::m_x80000000(cxpr_set1_epi32(0x80000000)); -CONSTINIT const GSVector8 GSVector8::m_x4b000000(cxpr_set1_epi32(0x4b000000)); -CONSTINIT const GSVector8 GSVector8::m_x4f800000(cxpr_set1_epi32(0x4f800000)); -CONSTINIT const GSVector8 GSVector8::m_max(cxpr_set1_ps(FLT_MAX)); -CONSTINIT const GSVector8 GSVector8::m_min(cxpr_set1_ps(FLT_MAX)); +CONSTINIT const GSVector8 GSVector8::m_half = cxpr(0.5f); +CONSTINIT const GSVector8 GSVector8::m_one = cxpr(1.0f); +CONSTINIT const GSVector8 GSVector8::m_x7fffffff = cxpr(0x7fffffff); +CONSTINIT const GSVector8 GSVector8::m_x80000000 = cxpr(0x80000000); +CONSTINIT const GSVector8 GSVector8::m_x4b000000 = cxpr(0x4b000000); +CONSTINIT const GSVector8 GSVector8::m_x4f800000 = cxpr(0x4f800000); +CONSTINIT const GSVector8 GSVector8::m_max = cxpr(FLT_MAX); +CONSTINIT const GSVector8 GSVector8::m_min = cxpr(FLT_MAX); #endif #if _M_SSE >= 0x501 CONSTINIT const GSVector8i GSVector8i::m_xff[33] = { - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), + cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), + cxpr(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), }; CONSTINIT const GSVector8i GSVector8i::m_x0f[33] = { - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), + cxpr(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), + cxpr(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), }; #endif diff --git a/plugins/GSdx/GSVector4.h b/plugins/GSdx/GSVector4.h index d009eb999d..eb4a464162 100644 --- a/plugins/GSdx/GSVector4.h +++ b/plugins/GSdx/GSVector4.h @@ -21,7 +21,6 @@ class alignas(16) GSVector4 { -public: constexpr static __m128 cxpr_setr_ps(float x, float y, float z, float w) { #ifdef __GNUC__ @@ -35,29 +34,22 @@ public: return m; #endif } - constexpr static __m128 cxpr_set1_ps(float x) - { - return cxpr_setr_ps(x, x, x, x); - } - constexpr static __m128 cxpr_setr_epi32(uint32 x, uint32 y, uint32 z, uint32 w) + constexpr static __m128 cxpr_setr_epi32(int x, int y, int z, int w) { #ifdef __GNUC__ - return (__m128)(__v4su{x, y, z, w}); + return (__m128)(__v4si{x, y, z, w}); #else __m128 m = {}; - m.m128_u32[0] = x; - m.m128_u32[1] = y; - m.m128_u32[2] = z; - m.m128_u32[3] = w; + m.m128_i32[0] = x; + m.m128_i32[1] = y; + m.m128_i32[2] = z; + m.m128_i32[3] = w; return m; #endif } - constexpr static __m128 cxpr_set1_epi32(uint32 x) - { - return cxpr_setr_epi32(x, x, x, x); - } +public: union { struct {float x, y, z, w;}; @@ -91,9 +83,29 @@ public: constexpr GSVector4(const GSVector4&) = default; - constexpr GSVector4(float x, float y, float z, float w) - : m(cxpr_setr_ps(x, y, z, w)) + constexpr static GSVector4 cxpr(float x, float y, float z, float w) { + return GSVector4(cxpr_setr_ps(x, y, z, w)); + } + + constexpr static GSVector4 cxpr(float x) + { + return GSVector4(cxpr_setr_ps(x, x, x, x)); + } + + constexpr static GSVector4 cxpr(int x, int y, int z, int w) + { + return GSVector4(cxpr_setr_epi32(x, y, z, w)); + } + + constexpr static GSVector4 cxpr(int x) + { + return GSVector4(cxpr_setr_epi32(x, x, x, x)); + } + + __forceinline GSVector4(float x, float y, float z, float w) + { + m = _mm_set_ps(w, z, y, x); } __forceinline GSVector4(float x, float y) @@ -113,12 +125,6 @@ public: m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y))); } - //Not currently used, just causes a compiler warning - /*__forceinline GSVector4(const GSVector4& v) - { - m = v.m; - }*/ - __forceinline explicit GSVector4(const GSVector2& v) { m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v)); diff --git a/plugins/GSdx/GSVector4i.h b/plugins/GSdx/GSVector4i.h index 0de0b1df74..87166038fa 100644 --- a/plugins/GSdx/GSVector4i.h +++ b/plugins/GSdx/GSVector4i.h @@ -24,38 +24,34 @@ class alignas(16) GSVector4i static const GSVector4i m_xff[17]; static const GSVector4i m_x0f[17]; -public: - constexpr static __m128i cxpr_setr_epi32(uint32 x, uint32 y, uint32 z, uint32 w) + constexpr static __m128i cxpr_setr_epi32(int x, int y, int z, int w) { #ifdef __GNUC__ - return (__m128i)(__v4su{x, y, z, w}); + return (__m128i)(__v4si{x, y, z, w}); #else __m128i m = {}; - m.m128i_u32[0] = x; - m.m128i_u32[1] = y; - m.m128i_u32[2] = z; - m.m128i_u32[3] = w; + m.m128i_i32[0] = x; + m.m128i_i32[1] = y; + m.m128i_i32[2] = z; + m.m128i_i32[3] = w; return m; #endif } - constexpr static __m128i cxpr_set1_epi32(uint32 x) - { - return cxpr_setr_epi32(x, x, x, x); - } - constexpr static __m128i cxpr_setr_epi8(uint8 b0, uint8 b1, uint8 b2, uint8 b3, uint8 b4, uint8 b5, uint8 b6, uint8 b7, uint8 b8, uint8 b9, uint8 b10, uint8 b11, uint8 b12, uint8 b13, uint8 b14, uint8 b15) + constexpr static __m128i cxpr_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) { #ifdef __GNUC__ - return (__m128i)__v16qu{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}; + return (__m128i)__v16qi{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}; #else __m128i m = {}; - m.m128i_u8[0] = b0; m.m128i_u8[1] = b1; m.m128i_u8[2] = b2; m.m128i_u8[3] = b3; - m.m128i_u8[4] = b4; m.m128i_u8[5] = b5; m.m128i_u8[6] = b6; m.m128i_u8[7] = b7; - m.m128i_u8[8] = b8; m.m128i_u8[9] = b9; m.m128i_u8[10] = b10; m.m128i_u8[11] = b11; - m.m128i_u8[12] = b12; m.m128i_u8[13] = b13; m.m128i_u8[14] = b14; m.m128i_u8[15] = b15; + m.m128i_i8[0] = b0; m.m128i_i8[1] = b1; m.m128i_i8[2] = b2; m.m128i_i8[3] = b3; + m.m128i_i8[4] = b4; m.m128i_i8[5] = b5; m.m128i_i8[6] = b6; m.m128i_i8[7] = b7; + m.m128i_i8[8] = b8; m.m128i_i8[9] = b9; m.m128i_i8[10] = b10; m.m128i_i8[11] = b11; + m.m128i_i8[12] = b12; m.m128i_i8[13] = b13; m.m128i_i8[14] = b14; m.m128i_i8[15] = b15; return m; #endif } +public: union { struct {int x, y, z, w;}; @@ -74,13 +70,32 @@ public: __m128i m; }; - constexpr GSVector4i(): m(cxpr_set1_epi32(0)) + constexpr GSVector4i(): x(0), y(0), z(0), w(0) { } - constexpr GSVector4i(int x, int y, int z, int w) - : m(cxpr_setr_epi32(x, y, z, w)) + constexpr static GSVector4i cxpr(int x, int y, int z, int w) { + return GSVector4i(cxpr_setr_epi32(x, y, z, w)); + } + + constexpr static GSVector4i cxpr(int x) + { + return GSVector4i(cxpr_setr_epi32(x, x, x, x)); + } + + __forceinline GSVector4i(int x, int y, int z, int w) + { + // 4 gprs + + // m = _mm_set_epi32(w, z, y, x); + + // 2 gprs + + GSVector4i xz = load(x).upl32(load(z)); + GSVector4i yw = load(y).upl32(load(w)); + + *this = xz.upl32(yw); } __forceinline GSVector4i(int x, int y) diff --git a/plugins/GSdx/GSVector8.h b/plugins/GSdx/GSVector8.h index 74fd713f67..2798c09491 100644 --- a/plugins/GSdx/GSVector8.h +++ b/plugins/GSdx/GSVector8.h @@ -23,7 +23,6 @@ class alignas(32) GSVector8 { -public: constexpr static __m256 cxpr_setr_ps(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) { #ifdef __GNUC__ @@ -41,33 +40,26 @@ public: return m; #endif } - constexpr static __m256 cxpr_set1_ps(float x) - { - return cxpr_setr_ps(x, x, x, x, x, x, x, x); - } - constexpr static __m256 cxpr_setr_epi32(uint32 x0, uint32 y0, uint32 z0, uint32 w0, uint32 x1, uint32 y1, uint32 z1, uint32 w1) + constexpr static __m256 cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) { #ifdef __GNUC__ - return (__m256)__v8su{x0, y0, z0, w0, x1, y1, z1, w1}; + return (__m256)__v8si{x0, y0, z0, w0, x1, y1, z1, w1}; #else - union { __m256 m; uint32 u[8]; } t = {}; - t.u[0] = x0; - t.u[1] = y0; - t.u[2] = z0; - t.u[3] = w0; - t.u[4] = x1; - t.u[5] = y1; - t.u[6] = z1; - t.u[7] = w1; + union { __m256 m; int i[8]; } t = {}; + t.i[0] = x0; + t.i[1] = y0; + t.i[2] = z0; + t.i[3] = w0; + t.i[4] = x1; + t.i[5] = y1; + t.i[6] = z1; + t.i[7] = w1; return t.m; #endif } - constexpr static __m256 cxpr_set1_epi32(uint32 x) - { - return cxpr_setr_epi32(x, x, x, x, x, x, x, x); - } +public: union { struct {float x0, y0, z0, w0, x1, y1, z1, w1;}; @@ -97,9 +89,34 @@ public: GSVector8() = default; - constexpr GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) - : m(cxpr_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1)) + static constexpr GSVector8 cxpr(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) { + return GSVector8(cxpr_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1)); + } + + static constexpr GSVector8 cxpr(float x) + { + return GSVector8(cxpr_setr_ps(x, x, x, x, x, x, x, x)); + } + + static constexpr GSVector8 cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) + { + return GSVector8(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1)); + } + + static constexpr GSVector8 cxpr(int x) + { + return GSVector8(cxpr_setr_epi32(x, x, x, x, x, x, x, x)); + } + + static constexpr GSVector8 cxpr(uint32 x) + { + return cxpr(static_cast(x)); + } + + __forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1) + { + m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0); } __forceinline GSVector8(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) diff --git a/plugins/GSdx/GSVector8i.h b/plugins/GSdx/GSVector8i.h index 7f74ce9331..baa096bf74 100644 --- a/plugins/GSdx/GSVector8i.h +++ b/plugins/GSdx/GSVector8i.h @@ -26,54 +26,50 @@ class alignas(32) GSVector8i static const GSVector8i m_xff[33]; static const GSVector8i m_x0f[33]; -public: - constexpr static __m256i cxpr_setr_epi32(uint32 x0, uint32 y0, uint32 z0, uint32 w0, uint32 x1, uint32 y1, uint32 z1, uint32 w1) + constexpr static __m256i cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) { #ifdef __GNUC__ - return (__m256i)__v8su{x0, y0, z0, w0, x1, y1, z1, w1}; + return (__m256i)__v8si{x0, y0, z0, w0, x1, y1, z1, w1}; #else __m256i m = {}; - m.m256i_u32[0] = x0; - m.m256i_u32[1] = y0; - m.m256i_u32[2] = z0; - m.m256i_u32[3] = w0; - m.m256i_u32[4] = x1; - m.m256i_u32[5] = y1; - m.m256i_u32[6] = z1; - m.m256i_u32[7] = w1; + m.m256i_i32[0] = x0; + m.m256i_i32[1] = y0; + m.m256i_i32[2] = z0; + m.m256i_i32[3] = w0; + m.m256i_i32[4] = x1; + m.m256i_i32[5] = y1; + m.m256i_i32[6] = z1; + m.m256i_i32[7] = w1; return m; #endif } - constexpr static __m256i cxpr_set1_epi32(uint32 x) - { - return cxpr_setr_epi32(x, x, x, x, x, x, x, x); - } constexpr static __m256i cxpr_setr_epi8( - uint8 b0, uint8 b1, uint8 b2, uint8 b3, uint8 b4, uint8 b5, uint8 b6, uint8 b7, - uint8 b8, uint8 b9, uint8 b10, uint8 b11, uint8 b12, uint8 b13, uint8 b14, uint8 b15, - uint8 b16, uint8 b17, uint8 b18, uint8 b19, uint8 b20, uint8 b21, uint8 b22, uint8 b23, - uint8 b24, uint8 b25, uint8 b26, uint8 b27, uint8 b28, uint8 b29, uint8 b30, uint8 b31) + char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, + char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15, + char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23, + char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31) { #ifdef __GNUC__ - return (__m256i)__v32qu + return (__m256i)__v32qi { b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15, b16, b17, b18, b19, b20, b21, b22, b23, b24, b25, b26, b27, b28, b29, b30, b31, }; #else __m256i m = {}; - m.m256i_u8[0] = b0; m.m256i_u8[1] = b1; m.m256i_u8[2] = b2; m.m256i_u8[3] = b3; - m.m256i_u8[4] = b4; m.m256i_u8[5] = b5; m.m256i_u8[6] = b6; m.m256i_u8[7] = b7; - m.m256i_u8[8] = b8; m.m256i_u8[9] = b9; m.m256i_u8[10] = b10; m.m256i_u8[11] = b11; - m.m256i_u8[12] = b12; m.m256i_u8[13] = b13; m.m256i_u8[14] = b14; m.m256i_u8[15] = b15; - m.m256i_u8[16] = b16; m.m256i_u8[17] = b17; m.m256i_u8[18] = b18; m.m256i_u8[19] = b19; - m.m256i_u8[20] = b20; m.m256i_u8[21] = b21; m.m256i_u8[22] = b22; m.m256i_u8[23] = b23; - m.m256i_u8[24] = b24; m.m256i_u8[25] = b25; m.m256i_u8[26] = b26; m.m256i_u8[27] = b27; - m.m256i_u8[28] = b28; m.m256i_u8[29] = b29; m.m256i_u8[30] = b30; m.m256i_u8[31] = b31; + m.m256i_i8[0] = b0; m.m256i_i8[1] = b1; m.m256i_i8[2] = b2; m.m256i_i8[3] = b3; + m.m256i_i8[4] = b4; m.m256i_i8[5] = b5; m.m256i_i8[6] = b6; m.m256i_i8[7] = b7; + m.m256i_i8[8] = b8; m.m256i_i8[9] = b9; m.m256i_i8[10] = b10; m.m256i_i8[11] = b11; + m.m256i_i8[12] = b12; m.m256i_i8[13] = b13; m.m256i_i8[14] = b14; m.m256i_i8[15] = b15; + m.m256i_i8[16] = b16; m.m256i_i8[17] = b17; m.m256i_i8[18] = b18; m.m256i_i8[19] = b19; + m.m256i_i8[20] = b20; m.m256i_i8[21] = b21; m.m256i_i8[22] = b22; m.m256i_i8[23] = b23; + m.m256i_i8[24] = b24; m.m256i_i8[25] = b25; m.m256i_i8[26] = b26; m.m256i_i8[27] = b27; + m.m256i_i8[28] = b28; m.m256i_i8[29] = b29; m.m256i_i8[30] = b30; m.m256i_i8[31] = b31; return m; #endif } +public: union { struct {int x0, y0, z0, w0, x1, y1, z1, w1;}; @@ -94,15 +90,25 @@ public: GSVector8i() = default; + static constexpr GSVector8i cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) + { + return GSVector8i(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1)); + } + + static constexpr GSVector8i cxpr(int x) + { + return GSVector8i(cxpr_setr_epi32(x, x, x, x, x, x, x, x)); + } + __forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true); __forceinline static GSVector8i cast(const GSVector8& v); __forceinline static GSVector8i cast(const GSVector4& v); __forceinline static GSVector8i cast(const GSVector4i& v); - constexpr GSVector8i(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) - : m(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1)) + __forceinline GSVector8i(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1) { + m = _mm256_set_epi32(x0, y0, z0, w0, x1, y1, z1, w1); } __forceinline GSVector8i( diff --git a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp index 0576f43842..4df7c0e32b 100644 --- a/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp +++ b/plugins/GSdx/Renderers/Common/GSVertexTrace.cpp @@ -24,7 +24,7 @@ #include "GSUtil.h" #include "GSState.h" -CONSTINIT const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX, 0.f, 0.f); +CONSTINIT const GSVector4 GSVertexTrace::s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f); GSVertexTrace::GSVertexTrace(const GSState* state) : m_accurate_stq(false), m_state(state), m_primclass(GS_INVALID_CLASS) diff --git a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp index b2d608bca6..fd424400c1 100644 --- a/plugins/GSdx/Renderers/SW/GSRendererSW.cpp +++ b/plugins/GSdx/Renderers/SW/GSRendererSW.cpp @@ -26,9 +26,9 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; -CONSTINIT const GSVector4 GSRendererSW::m_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); +CONSTINIT const GSVector4 GSRendererSW::m_pos_scale = GSVector4::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #if _M_SSE >= 0x501 -CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); +CONSTINIT const GSVector8 GSRendererSW::m_pos_scale2 = GSVector8::cxpr(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #endif GSRendererSW::GSRendererSW(int threads)