mirror of https://github.com/PCSX2/pcsx2.git
GS: Add double operations to GSVector/GSNewCodeGenerator
This commit is contained in:
parent
82de13d95a
commit
b6c1b3fb96
|
@ -67,6 +67,7 @@ CONSTINIT const GSVector4 GSVector4::m_two = cxpr(2.0f);
|
||||||
CONSTINIT const GSVector4 GSVector4::m_four = cxpr(4.0f);
|
CONSTINIT const GSVector4 GSVector4::m_four = cxpr(4.0f);
|
||||||
CONSTINIT const GSVector4 GSVector4::m_x4b000000 = cxpr(0x4b000000);
|
CONSTINIT const GSVector4 GSVector4::m_x4b000000 = cxpr(0x4b000000);
|
||||||
CONSTINIT const GSVector4 GSVector4::m_x4f800000 = cxpr(0x4f800000);
|
CONSTINIT const GSVector4 GSVector4::m_x4f800000 = cxpr(0x4f800000);
|
||||||
|
CONSTINIT const GSVector4 GSVector4::m_xc1e00000000fffff = cxpr64(0xc1e00000000fffffull);
|
||||||
CONSTINIT const GSVector4 GSVector4::m_max = cxpr(FLT_MAX);
|
CONSTINIT const GSVector4 GSVector4::m_max = cxpr(FLT_MAX);
|
||||||
CONSTINIT const GSVector4 GSVector4::m_min = cxpr(FLT_MIN);
|
CONSTINIT const GSVector4 GSVector4::m_min = cxpr(FLT_MIN);
|
||||||
|
|
||||||
|
@ -78,6 +79,7 @@ CONSTINIT const GSVector8 GSVector8::m_x7fffffff = cxpr(0x7fffffff);
|
||||||
CONSTINIT const GSVector8 GSVector8::m_x80000000 = cxpr(0x80000000);
|
CONSTINIT const GSVector8 GSVector8::m_x80000000 = cxpr(0x80000000);
|
||||||
CONSTINIT const GSVector8 GSVector8::m_x4b000000 = cxpr(0x4b000000);
|
CONSTINIT const GSVector8 GSVector8::m_x4b000000 = cxpr(0x4b000000);
|
||||||
CONSTINIT const GSVector8 GSVector8::m_x4f800000 = cxpr(0x4f800000);
|
CONSTINIT const GSVector8 GSVector8::m_x4f800000 = cxpr(0x4f800000);
|
||||||
|
CONSTINIT const GSVector8 GSVector8::m_xc1e00000000fffff = cxpr64(0xc1e00000000fffffull);
|
||||||
CONSTINIT const GSVector8 GSVector8::m_max = cxpr(FLT_MAX);
|
CONSTINIT const GSVector8 GSVector8::m_max = cxpr(FLT_MAX);
|
||||||
CONSTINIT const GSVector8 GSVector8::m_min = cxpr(FLT_MAX);
|
CONSTINIT const GSVector8 GSVector8::m_min = cxpr(FLT_MAX);
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,11 @@ class alignas(16) GSVector4
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GSVector4(cxpr_init_tag, u64 x, u64 y)
|
||||||
|
: U64{x, y}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
@ -36,6 +41,7 @@ public:
|
||||||
struct { float left, top, right, bottom; };
|
struct { float left, top, right, bottom; };
|
||||||
float v[4];
|
float v[4];
|
||||||
float F32[4];
|
float F32[4];
|
||||||
|
double F64[2];
|
||||||
s8 I8[16];
|
s8 I8[16];
|
||||||
s16 I16[8];
|
s16 I16[8];
|
||||||
s32 I32[4];
|
s32 I32[4];
|
||||||
|
@ -55,6 +61,7 @@ public:
|
||||||
static const GSVector4 m_four;
|
static const GSVector4 m_four;
|
||||||
static const GSVector4 m_x4b000000;
|
static const GSVector4 m_x4b000000;
|
||||||
static const GSVector4 m_x4f800000;
|
static const GSVector4 m_x4f800000;
|
||||||
|
static const GSVector4 m_xc1e00000000fffff;
|
||||||
static const GSVector4 m_max;
|
static const GSVector4 m_max;
|
||||||
static const GSVector4 m_min;
|
static const GSVector4 m_min;
|
||||||
|
|
||||||
|
@ -82,6 +89,16 @@ public:
|
||||||
return GSVector4(cxpr_init, x, x, x, x);
|
return GSVector4(cxpr_init, x, x, x, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector4 cxpr64(u64 x, u64 y)
|
||||||
|
{
|
||||||
|
return GSVector4(cxpr_init, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector4 cxpr64(u64 x)
|
||||||
|
{
|
||||||
|
return GSVector4(cxpr_init, x, x);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline GSVector4(float x, float y, float z, float w)
|
__forceinline GSVector4(float x, float y, float z, float w)
|
||||||
{
|
{
|
||||||
m = _mm_set_ps(w, z, y, x);
|
m = _mm_set_ps(w, z, y, x);
|
||||||
|
@ -119,6 +136,11 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline explicit GSVector4(__m128d m)
|
||||||
|
: m(_mm_castpd_ps(m))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline explicit GSVector4(float f)
|
__forceinline explicit GSVector4(float f)
|
||||||
{
|
{
|
||||||
*this = f;
|
*this = f;
|
||||||
|
@ -162,6 +184,11 @@ public:
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
__forceinline static GSVector4 f64(double x, double y)
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_castpd_ps(_mm_set_pd(y, x)));
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline void operator=(const GSVector4& v)
|
__forceinline void operator=(const GSVector4& v)
|
||||||
{
|
{
|
||||||
m = v.m;
|
m = v.m;
|
||||||
|
@ -858,6 +885,36 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
|
||||||
return GSVector4(_mm_cmple_ps(v1, v2));
|
return GSVector4(_mm_cmple_ps(v1, v2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4 mul64(const GSVector4& v) const
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_mul_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4 add64(const GSVector4& v) const
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_add_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4 sub64(const GSVector4& v) const
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_sub_pd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4 f32to64(const GSVector4& v)
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_cvtps_pd(v.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4 f32to64(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_cvtps_pd(_mm_castpd_ps(_mm_load_sd(static_cast<const double*>(p)))));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4i f64toi32(bool truncate = true) const
|
||||||
|
{
|
||||||
|
return GSVector4i(truncate ? _mm_cvttpd_epi32(_mm_castps_pd(m)) : _mm_cvtpd_epi32(_mm_castps_pd(m)));
|
||||||
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
||||||
|
@ -907,4 +964,9 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
__forceinline static GSVector4 broadcast64(const void* d)
|
||||||
|
{
|
||||||
|
return GSVector4(_mm_loaddup_pd(static_cast<const double*>(d)));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -32,6 +32,11 @@ class alignas(32) GSVector8
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GSVector8(cxpr_init_tag, u64 x, u64 y, u64 z, u64 w)
|
||||||
|
: U64{x, y, z, w}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
@ -39,6 +44,7 @@ public:
|
||||||
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
|
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
|
||||||
float v[8];
|
float v[8];
|
||||||
float F32[8];
|
float F32[8];
|
||||||
|
double F64[4];
|
||||||
s8 I8[32];
|
s8 I8[32];
|
||||||
s16 I16[16];
|
s16 I16[16];
|
||||||
s32 I32[8];
|
s32 I32[8];
|
||||||
|
@ -57,6 +63,7 @@ public:
|
||||||
static const GSVector8 m_x80000000;
|
static const GSVector8 m_x80000000;
|
||||||
static const GSVector8 m_x4b000000;
|
static const GSVector8 m_x4b000000;
|
||||||
static const GSVector8 m_x4f800000;
|
static const GSVector8 m_x4f800000;
|
||||||
|
static const GSVector8 m_xc1e00000000fffff;
|
||||||
static const GSVector8 m_max;
|
static const GSVector8 m_max;
|
||||||
static const GSVector8 m_min;
|
static const GSVector8 m_min;
|
||||||
|
|
||||||
|
@ -87,6 +94,16 @@ public:
|
||||||
return cxpr(static_cast<int>(x));
|
return cxpr(static_cast<int>(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector8 cxpr64(u64 x, u64 y, u64 z, u64 w)
|
||||||
|
{
|
||||||
|
return GSVector8(cxpr_init, x, y, z, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector8 cxpr64(u64 x)
|
||||||
|
{
|
||||||
|
return GSVector8(cxpr_init, x, x, x, x);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
__forceinline GSVector8(float x0, float y0, float z0, float w0, float x1, float y1, float z1, float w1)
|
||||||
{
|
{
|
||||||
m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
|
m = _mm256_set_ps(w1, z1, y1, x1, w0, z0, y0, x0);
|
||||||
|
@ -142,6 +159,11 @@ public:
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline explicit GSVector8(__m256d m)
|
||||||
|
: m(_mm256_castpd_ps(m))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
__forceinline explicit GSVector8(const GSVector8i& v);
|
__forceinline explicit GSVector8(const GSVector8i& v);
|
||||||
|
@ -773,6 +795,36 @@ public:
|
||||||
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ));
|
return GSVector8(_mm256_cmp_ps(v1, v2, _CMP_LE_OQ));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector8 mul64(const GSVector8& v) const
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_mul_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector8 add64(const GSVector8& v) const
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_add_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector8 sub64(const GSVector8& v) const
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_sub_pd(_mm256_castps_pd(m), _mm256_castps_pd(v.m)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector8 f32to64(const GSVector4& v)
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_cvtps_pd(v.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector8 f32to64(const void* p)
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_cvtps_pd(_mm_load_ps(static_cast<const float*>(p))));
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4i f64toi32(bool truncate = true) const
|
||||||
|
{
|
||||||
|
return GSVector4i(truncate ? _mm256_cvttpd_epi32(_mm256_castps_pd(m)) : _mm256_cvtpd_epi32(_mm256_castps_pd(m)));
|
||||||
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
// x = v[31:0] / v[159:128]
|
// x = v[31:0] / v[159:128]
|
||||||
|
@ -888,6 +940,11 @@ public:
|
||||||
return GSVector8(_mm256_broadcastss_ps(_mm_load_ss((const float*)f)));
|
return GSVector8(_mm256_broadcastss_ps(_mm_load_ss((const float*)f)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector8 broadcast64(const void* d)
|
||||||
|
{
|
||||||
|
return GSVector8(_mm256_broadcast_sd(static_cast<const double*>(d)));
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element
|
// TODO: v.(x0|y0|z0|w0|x1|y1|z1|w1) // broadcast element
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -312,9 +312,18 @@ public:
|
||||||
FORWARD_JUMP(jmp)
|
FORWARD_JUMP(jmp)
|
||||||
|
|
||||||
AFORWARD(2, addps, ARGS_XO)
|
AFORWARD(2, addps, ARGS_XO)
|
||||||
|
AFORWARD(2, addpd, ARGS_XO)
|
||||||
SFORWARD(2, cvtdq2ps, ARGS_XO)
|
SFORWARD(2, cvtdq2ps, ARGS_XO)
|
||||||
|
SFORWARD(2, cvtpd2dq, ARGS_XO)
|
||||||
|
SFORWARD(2, cvtpd2ps, ARGS_XO)
|
||||||
|
SFORWARD(2, cvttpd2dq, ARGS_XO)
|
||||||
SFORWARD(2, cvtps2dq, ARGS_XO)
|
SFORWARD(2, cvtps2dq, ARGS_XO)
|
||||||
|
SFORWARD(2, cvtps2pd, ARGS_XO)
|
||||||
|
SFORWARD(2, cvtsd2si, const AddressReg&, const Operand&);
|
||||||
|
AFORWARD(2, cvtsd2ss, ARGS_XO)
|
||||||
|
AFORWARD(2, cvtss2sd, ARGS_XO)
|
||||||
SFORWARD(2, cvttps2dq, ARGS_XO)
|
SFORWARD(2, cvttps2dq, ARGS_XO)
|
||||||
|
SFORWARD(2, cvttsd2si, const AddressReg&, const Operand&);
|
||||||
SFORWARD(3, extractps, const Operand&, const Xmm&, u8)
|
SFORWARD(3, extractps, const Operand&, const Xmm&, u8)
|
||||||
AFORWARD(2, maxps, ARGS_XO)
|
AFORWARD(2, maxps, ARGS_XO)
|
||||||
AFORWARD(2, minps, ARGS_XO)
|
AFORWARD(2, minps, ARGS_XO)
|
||||||
|
@ -324,13 +333,21 @@ public:
|
||||||
SFORWARD(2, movd, const Reg32&, const Xmm&)
|
SFORWARD(2, movd, const Reg32&, const Xmm&)
|
||||||
SFORWARD(2, movd, const Xmm&, const Address&)
|
SFORWARD(2, movd, const Xmm&, const Address&)
|
||||||
SFORWARD(2, movd, const Xmm&, const Reg32&)
|
SFORWARD(2, movd, const Xmm&, const Reg32&)
|
||||||
|
SFORWARD(2, movddup, ARGS_XO);
|
||||||
SFORWARD(2, movdqa, ARGS_XO)
|
SFORWARD(2, movdqa, ARGS_XO)
|
||||||
SFORWARD(2, movdqa, const Address&, const Xmm&)
|
SFORWARD(2, movdqa, const Address&, const Xmm&)
|
||||||
SFORWARD(2, movhps, ARGS_XO)
|
SFORWARD(2, movhps, ARGS_XO)
|
||||||
SFORWARD(2, movhps, const Address&, const Xmm&)
|
SFORWARD(2, movhps, const Address&, const Xmm&)
|
||||||
SFORWARD(2, movq, const Address&, const Xmm&)
|
SFORWARD(2, movq, const Address&, const Xmm&)
|
||||||
SFORWARD(2, movq, const Xmm&, const Address&)
|
SFORWARD(2, movq, const Xmm&, const Address&)
|
||||||
|
SFORWARD(2, movsd, const Address&, const Xmm&)
|
||||||
|
SFORWARD(2, movsd, const Xmm&, const Address&)
|
||||||
|
SFORWARD(2, movss, const Address&, const Xmm&)
|
||||||
|
SFORWARD(2, movss, const Xmm&, const Address&)
|
||||||
|
AFORWARD(2, mulpd, ARGS_XO)
|
||||||
AFORWARD(2, mulps, ARGS_XO)
|
AFORWARD(2, mulps, ARGS_XO)
|
||||||
|
AFORWARD(2, mulsd, ARGS_XO)
|
||||||
|
AFORWARD(2, mulss, ARGS_XO)
|
||||||
AFORWARD(2, orps, ARGS_XO)
|
AFORWARD(2, orps, ARGS_XO)
|
||||||
AFORWARD(2, packssdw, ARGS_XO)
|
AFORWARD(2, packssdw, ARGS_XO)
|
||||||
AFORWARD(2, packusdw, ARGS_XO)
|
AFORWARD(2, packusdw, ARGS_XO)
|
||||||
|
@ -382,11 +399,14 @@ public:
|
||||||
SFORWARD(2, rcpps, ARGS_XO)
|
SFORWARD(2, rcpps, ARGS_XO)
|
||||||
AFORWARD(3, shufps, ARGS_XOI)
|
AFORWARD(3, shufps, ARGS_XOI)
|
||||||
AFORWARD(2, subps, ARGS_XO)
|
AFORWARD(2, subps, ARGS_XO)
|
||||||
|
AFORWARD(2, unpcklps, ARGS_XO)
|
||||||
|
AFORWARD(2, unpcklpd, ARGS_XO)
|
||||||
AFORWARD(2, xorps, ARGS_XO)
|
AFORWARD(2, xorps, ARGS_XO)
|
||||||
|
|
||||||
FORWARD_SSE_XMM0(pblendvb)
|
FORWARD_SSE_XMM0(pblendvb)
|
||||||
|
|
||||||
FORWARD(2, AVX, vbroadcastss, ARGS_XO)
|
FORWARD(2, AVX, vbroadcastss, ARGS_XO)
|
||||||
|
FORWARD(2, AVX, vbroadcastsd, const Ymm&, const Address&)
|
||||||
FORWARD(2, AVX2, vbroadcasti128, const Ymm&, const Address&)
|
FORWARD(2, AVX2, vbroadcasti128, const Ymm&, const Address&)
|
||||||
FORWARD(2, AVX, vbroadcastf128, const Ymm&, const Address&)
|
FORWARD(2, AVX, vbroadcastf128, const Ymm&, const Address&)
|
||||||
FORWARD(3, FMA, vfmadd213ps, ARGS_XXO)
|
FORWARD(3, FMA, vfmadd213ps, ARGS_XXO)
|
||||||
|
|
Loading…
Reference in New Issue