GS: Better cxpr GSVector init

This commit is contained in:
TellowKrinkle 2022-04-27 23:19:55 -05:00 committed by tellowkrinkle
parent 92dd6fb575
commit 82de13d95a
4 changed files with 39 additions and 132 deletions

View File

@ -15,32 +15,17 @@
class alignas(16) GSVector4
{
constexpr static __m128 cxpr_setr_ps(float x, float y, float z, float w)
struct cxpr_init_tag {};
static constexpr cxpr_init_tag cxpr_init{};
constexpr GSVector4(cxpr_init_tag, float x, float y, float z, float w)
: F32{x, y, z, w}
{
#ifdef __GNUC__
return __m128{x, y, z, w};
#else
__m128 m = {};
m.m128_f32[0] = x;
m.m128_f32[1] = y;
m.m128_f32[2] = z;
m.m128_f32[3] = w;
return m;
#endif
}
constexpr static __m128 cxpr_setr_epi32(int x, int y, int z, int w)
constexpr GSVector4(cxpr_init_tag, int x, int y, int z, int w)
: I32{x, y, z, w}
{
#ifdef __GNUC__
return (__m128)(__v4si{x, y, z, w});
#else
__m128 m = {};
m.m128_i32[0] = x;
m.m128_i32[1] = y;
m.m128_i32[2] = z;
m.m128_i32[3] = w;
return m;
#endif
}
public:
@ -79,22 +64,22 @@ public:
constexpr static GSVector4 cxpr(float x, float y, float z, float w)
{
return GSVector4(cxpr_setr_ps(x, y, z, w));
return GSVector4(cxpr_init, x, y, z, w);
}
constexpr static GSVector4 cxpr(float x)
{
return GSVector4(cxpr_setr_ps(x, x, x, x));
return GSVector4(cxpr_init, x, x, x, x);
}
constexpr static GSVector4 cxpr(int x, int y, int z, int w)
{
return GSVector4(cxpr_setr_epi32(x, y, z, w));
return GSVector4(cxpr_init, x, y, z, w);
}
constexpr static GSVector4 cxpr(int x)
{
return GSVector4(cxpr_setr_epi32(x, x, x, x));
return GSVector4(cxpr_init, x, x, x, x);
}
__forceinline GSVector4(float x, float y, float z, float w)

View File

@ -20,31 +20,12 @@ class alignas(16) GSVector4i
static const GSVector4i m_xff[17];
static const GSVector4i m_x0f[17];
constexpr static __m128i cxpr_setr_epi32(int x, int y, int z, int w)
struct cxpr_init_tag {};
static constexpr cxpr_init_tag cxpr_init{};
constexpr GSVector4i(cxpr_init_tag, int x, int y, int z, int w)
: I32{x, y, z, w}
{
#ifdef __GNUC__
return (__m128i)(__v4si{x, y, z, w});
#else
__m128i m = {};
m.m128i_i32[0] = x;
m.m128i_i32[1] = y;
m.m128i_i32[2] = z;
m.m128i_i32[3] = w;
return m;
#endif
}
constexpr static __m128i cxpr_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
{
#ifdef __GNUC__
return (__m128i)__v16qi{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15};
#else
__m128i m = {};
m.m128i_i8[0] = b0; m.m128i_i8[1] = b1; m.m128i_i8[2] = b2; m.m128i_i8[3] = b3;
m.m128i_i8[4] = b4; m.m128i_i8[5] = b5; m.m128i_i8[6] = b6; m.m128i_i8[7] = b7;
m.m128i_i8[8] = b8; m.m128i_i8[9] = b9; m.m128i_i8[10] = b10; m.m128i_i8[11] = b11;
m.m128i_i8[12] = b12; m.m128i_i8[13] = b13; m.m128i_i8[14] = b14; m.m128i_i8[15] = b15;
return m;
#endif
}
public:
@ -73,12 +54,12 @@ public:
constexpr static GSVector4i cxpr(int x, int y, int z, int w)
{
return GSVector4i(cxpr_setr_epi32(x, y, z, w));
return GSVector4i(cxpr_init, x, y, z, w);
}
constexpr static GSVector4i cxpr(int x)
{
return GSVector4i(cxpr_setr_epi32(x, x, x, x));
return GSVector4i(cxpr_init, x, x, x, x);
}
__forceinline GSVector4i(int x, int y, int z, int w)
@ -97,7 +78,7 @@ public:
}
constexpr GSVector4i(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
: m(cxpr_setr_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15))
: I8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
{
}

View File

@ -19,40 +19,17 @@
class alignas(32) GSVector8
{
constexpr static __m256 cxpr_setr_ps(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
struct cxpr_init_tag {};
static constexpr cxpr_init_tag cxpr_init{};
constexpr GSVector8(cxpr_init_tag, float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
: F32{x0, y0, z0, w0, x1, y1, z1, w1}
{
#ifdef __GNUC__
return __m256{x0, y0, z0, w0, x1, y1, z1, w1};
#else
__m256 m = {};
m.m256_f32[0] = x0;
m.m256_f32[1] = y0;
m.m256_f32[2] = z0;
m.m256_f32[3] = w0;
m.m256_f32[4] = x1;
m.m256_f32[5] = y1;
m.m256_f32[6] = z1;
m.m256_f32[7] = w1;
return m;
#endif
}
constexpr static __m256 cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
constexpr GSVector8(cxpr_init_tag, int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
: I32{x0, y0, z0, w0, x1, y1, z1, w1}
{
#ifdef __GNUC__
return (__m256)__v8si{x0, y0, z0, w0, x1, y1, z1, w1};
#else
union { __m256 m; int i[8]; } t = {};
t.i[0] = x0;
t.i[1] = y0;
t.i[2] = z0;
t.i[3] = w0;
t.i[4] = x1;
t.i[5] = y1;
t.i[6] = z1;
t.i[7] = w1;
return t.m;
#endif
}
public:
@ -87,22 +64,22 @@ public:
static constexpr GSVector8 cxpr(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
{
return GSVector8(cxpr_setr_ps(x0, y0, z0, w0, x1, y1, z1, w1));
return GSVector8(cxpr_init, x0, y0, z0, w0, x1, y1, z1, w1);
}
static constexpr GSVector8 cxpr(float x)
{
return GSVector8(cxpr_setr_ps(x, x, x, x, x, x, x, x));
return GSVector8(cxpr_init, x, x, x, x, x, x, x, x);
}
static constexpr GSVector8 cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
{
return GSVector8(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1));
return GSVector8(cxpr_init, x0, y0, z0, w0, x1, y1, z1, w1);
}
static constexpr GSVector8 cxpr(int x)
{
return GSVector8(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
return GSVector8(cxpr_init, x, x, x, x, x, x, x, x);
}
static constexpr GSVector8 cxpr(u32 x)

View File

@ -22,47 +22,12 @@ class alignas(32) GSVector8i
static const GSVector8i m_xff[33];
static const GSVector8i m_x0f[33];
constexpr static __m256i cxpr_setr_epi32(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
struct cxpr_init_tag {};
static constexpr cxpr_init_tag cxpr_init{};
constexpr GSVector8i(cxpr_init_tag, int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
: I32{x0, y0, z0, w0, x1, y1, z1, w1}
{
#ifdef __GNUC__
return (__m256i)__v8si{x0, y0, z0, w0, x1, y1, z1, w1};
#else
__m256i m = {};
m.m256i_i32[0] = x0;
m.m256i_i32[1] = y0;
m.m256i_i32[2] = z0;
m.m256i_i32[3] = w0;
m.m256i_i32[4] = x1;
m.m256i_i32[5] = y1;
m.m256i_i32[6] = z1;
m.m256i_i32[7] = w1;
return m;
#endif
}
constexpr static __m256i cxpr_setr_epi8(
char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7,
char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15,
char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23,
char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31)
{
#ifdef __GNUC__
return (__m256i)__v32qi
{
b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15,
b16, b17, b18, b19, b20, b21, b22, b23, b24, b25, b26, b27, b28, b29, b30, b31,
};
#else
__m256i m = {};
m.m256i_i8[0] = b0; m.m256i_i8[1] = b1; m.m256i_i8[2] = b2; m.m256i_i8[3] = b3;
m.m256i_i8[4] = b4; m.m256i_i8[5] = b5; m.m256i_i8[6] = b6; m.m256i_i8[7] = b7;
m.m256i_i8[8] = b8; m.m256i_i8[9] = b9; m.m256i_i8[10] = b10; m.m256i_i8[11] = b11;
m.m256i_i8[12] = b12; m.m256i_i8[13] = b13; m.m256i_i8[14] = b14; m.m256i_i8[15] = b15;
m.m256i_i8[16] = b16; m.m256i_i8[17] = b17; m.m256i_i8[18] = b18; m.m256i_i8[19] = b19;
m.m256i_i8[20] = b20; m.m256i_i8[21] = b21; m.m256i_i8[22] = b22; m.m256i_i8[23] = b23;
m.m256i_i8[24] = b24; m.m256i_i8[25] = b25; m.m256i_i8[26] = b26; m.m256i_i8[27] = b27;
m.m256i_i8[28] = b28; m.m256i_i8[29] = b29; m.m256i_i8[30] = b30; m.m256i_i8[31] = b31;
return m;
#endif
}
public:
@ -88,12 +53,12 @@ public:
static constexpr GSVector8i cxpr(int x0, int y0, int z0, int w0, int x1, int y1, int z1, int w1)
{
return GSVector8i(cxpr_setr_epi32(x0, y0, z0, w0, x1, y1, z1, w1));
return GSVector8i(cxpr_init, x0, y0, z0, w0, x1, y1, z1, w1);
}
static constexpr GSVector8i cxpr(int x)
{
return GSVector8i(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
return GSVector8i(cxpr_init, x, x, x, x, x, x, x, x);
}
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
@ -119,9 +84,8 @@ public:
char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15,
char b16, char b17, char b18, char b19, char b20, char b21, char b22, char b23,
char b24, char b25, char b26, char b27, char b28, char b29, char b30, char b31)
: m(cxpr_setr_epi8(
b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15,
b16, b17, b18, b19, b20, b21, b22, b23, b24, b25, b26, b27, b28, b29, b30, b31))
: I8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15,
b16, b17, b18, b19, b20, b21, b22, b23, b24, b25, b26, b27, b28, b29, b30, b31}
{
}