Common: Fix ARM64 build on clang-cl 19

Also fix Vector4 extract32().
This commit is contained in:
Stenzek 2025-02-16 18:03:21 +10:00
parent cba9d00019
commit 9928d01ac4
No known key found for this signature in database
4 changed files with 45 additions and 13 deletions

View File

@ -2754,9 +2754,15 @@ public:
}
template<int i>
ALWAYS_INLINE int extract32() const
ALWAYS_INLINE GSVector4 insert32(float v) const
{
return vgetq_lane_s32(vreinterpretq_s32_f32(v4s), i);
return GSVector4(vsetq_lane_f32(v, v4s, i));
}
template<int i>
ALWAYS_INLINE float extract32() const
{
return vgetq_lane_f32(v4s, i);
}
template<int dst>

View File

@ -1951,9 +1951,17 @@ public:
}
template<int i>
ALWAYS_INLINE int extract32() const
ALWAYS_INLINE GSVector4 insert32(float v) const
{
return I32[i];
GSVector4 ret(*this);
ret.F32[i] = v;
return ret;
}
template<int i>
ALWAYS_INLINE float extract32() const
{
return F32[i];
}
template<int dst>

View File

@ -325,7 +325,7 @@ public:
#else
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
constexpr s32 bit0 = (mask & 1) * 3;
return blend16<bit1 | bit0>(v);
return blend16 < bit1 | bit0 > (v);
#endif
}
@ -1334,7 +1334,7 @@ public:
constexpr s32 bit2 = ((mask & 4) * 3) << 2;
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
constexpr s32 bit0 = (mask & 1) * 3;
return blend16<bit3 | bit2 | bit1 | bit0>(v);
return blend16 < bit3 | bit2 | bit1 | bit0 > (v);
#endif
}
@ -2037,17 +2037,17 @@ public:
ALWAYS_INLINE GSVector4 hsub(const GSVector4& v) const { return GSVector4(_mm_hsub_ps(m, v.m)); }
NEVER_INLINE float dot(const GSVector4& v) const
{
#ifdef CPU_ARCH_SSE41
return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1));
ALWAYS_INLINE float dot(const GSVector4& v) const { return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1)); }
#else
float dot(const GSVector4& v) const
{
__m128 tmp = _mm_mul_ps(m, v.m);
tmp = _mm_add_ps(tmp, _mm_movehl_ps(tmp, tmp)); // (x+z, y+w, ..., ...)
tmp = _mm_add_ss(tmp, _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(3, 2, 1, 1)));
return _mm_cvtss_f32(tmp);
#endif
}
#endif
ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const
{
@ -2135,10 +2135,28 @@ public:
}
template<int i>
ALWAYS_INLINE int extract32() const
ALWAYS_INLINE GSVector4 insert32(float v) const
{
#ifdef CPU_ARCH_SSE41
return _mm_extract_ps(m, i);
if constexpr (i == 0)
return GSVector4(_mm_move_ss(m, _mm_load_ss(&v)));
else
return GSVector4(_mm_insert_ps(m, _mm_load_ss(&v), _MM_MK_INSERTPS_NDX(0, i, 0)));
#else
GSVector4 ret(*this);
ret.F32[i] = v;
return ret;
#endif
}
template<int i>
ALWAYS_INLINE float extract32() const
{
#ifdef CPU_ARCH_SSE41
if constexpr (i == 0)
return _mm_cvtss_f32(m);
else
return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
#else
return F32[i];
#endif

View File

@ -119,7 +119,7 @@ ALWAYS_INLINE static void MultiPause()
_mm_pause();
_mm_pause();
_mm_pause();
#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER)
#elif defined(CPU_ARCH_ARM64) && defined(_MSC_VER) && !defined(__clang__)
__isb(_ARM64_BARRIER_SY);
__isb(_ARM64_BARRIER_SY);
__isb(_ARM64_BARRIER_SY);