Common: Fix ARM64 build on clang-cl 19
Also fix Vector4 extract32().
This commit is contained in:
parent
cba9d00019
commit
9928d01ac4
|
@ -2754,9 +2754,15 @@ public:
|
|||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE int extract32() const
|
||||
ALWAYS_INLINE GSVector4 insert32(float v) const
|
||||
{
|
||||
return vgetq_lane_s32(vreinterpretq_s32_f32(v4s), i);
|
||||
return GSVector4(vsetq_lane_f32(v, v4s, i));
|
||||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE float extract32() const
|
||||
{
|
||||
return vgetq_lane_f32(v4s, i);
|
||||
}
|
||||
|
||||
template<int dst>
|
||||
|
|
|
@ -1951,9 +1951,17 @@ public:
|
|||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE int extract32() const
|
||||
ALWAYS_INLINE GSVector4 insert32(float v) const
|
||||
{
|
||||
return I32[i];
|
||||
GSVector4 ret(*this);
|
||||
ret.F32[i] = v;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE float extract32() const
|
||||
{
|
||||
return F32[i];
|
||||
}
|
||||
|
||||
template<int dst>
|
||||
|
|
|
@ -325,7 +325,7 @@ public:
|
|||
#else
|
||||
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
|
||||
constexpr s32 bit0 = (mask & 1) * 3;
|
||||
return blend16<bit1 | bit0>(v);
|
||||
return blend16 < bit1 | bit0 > (v);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1334,7 +1334,7 @@ public:
|
|||
constexpr s32 bit2 = ((mask & 4) * 3) << 2;
|
||||
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
|
||||
constexpr s32 bit0 = (mask & 1) * 3;
|
||||
return blend16<bit3 | bit2 | bit1 | bit0>(v);
|
||||
return blend16 < bit3 | bit2 | bit1 | bit0 > (v);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -2037,17 +2037,17 @@ public:
|
|||
|
||||
ALWAYS_INLINE GSVector4 hsub(const GSVector4& v) const { return GSVector4(_mm_hsub_ps(m, v.m)); }
|
||||
|
||||
NEVER_INLINE float dot(const GSVector4& v) const
|
||||
{
|
||||
#ifdef CPU_ARCH_SSE41
|
||||
return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1));
|
||||
ALWAYS_INLINE float dot(const GSVector4& v) const { return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1)); }
|
||||
#else
|
||||
float dot(const GSVector4& v) const
|
||||
{
|
||||
__m128 tmp = _mm_mul_ps(m, v.m);
|
||||
tmp = _mm_add_ps(tmp, _mm_movehl_ps(tmp, tmp)); // (x+z, y+w, ..., ...)
|
||||
tmp = _mm_add_ss(tmp, _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(3, 2, 1, 1)));
|
||||
return _mm_cvtss_f32(tmp);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const
|
||||
{
|
||||
|
@ -2135,10 +2135,28 @@ public:
|
|||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE int extract32() const
|
||||
ALWAYS_INLINE GSVector4 insert32(float v) const
|
||||
{
|
||||
#ifdef CPU_ARCH_SSE41
|
||||
return _mm_extract_ps(m, i);
|
||||
if constexpr (i == 0)
|
||||
return GSVector4(_mm_move_ss(m, _mm_load_ss(&v)));
|
||||
else
|
||||
return GSVector4(_mm_insert_ps(m, _mm_load_ss(&v), _MM_MK_INSERTPS_NDX(0, i, 0)));
|
||||
#else
|
||||
GSVector4 ret(*this);
|
||||
ret.F32[i] = v;
|
||||
return ret;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE float extract32() const
|
||||
{
|
||||
#ifdef CPU_ARCH_SSE41
|
||||
if constexpr (i == 0)
|
||||
return _mm_cvtss_f32(m);
|
||||
else
|
||||
return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
|
||||
#else
|
||||
return F32[i];
|
||||
#endif
|
||||
|
|
|
@ -119,7 +119,7 @@ ALWAYS_INLINE static void MultiPause()
|
|||
_mm_pause();
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER)
|
||||
#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER) && !defined(__clang__)
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
|
|
Loading…
Reference in New Issue