From b0f96628114460f1662b04cd790c30c9b593670c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 21 Feb 2021 04:30:39 -0600 Subject: [PATCH] GS: Add srav and blend backwards compat to GSVector4i --- pcsx2/GS/GSVector4i.h | 52 ++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h index 1c70c52453..82a10fdd96 100644 --- a/pcsx2/GS/GSVector4i.h +++ b/pcsx2/GS/GSVector4i.h @@ -234,13 +234,9 @@ public: return sat_i32(a); } - template - __forceinline GSVector4i ralign(const GSVector2i& a) const + template + GSVector4i _ralign_helper(const GSVector4i& mask) const { - // a must be 1 << n - - GSVector4i mask = GSVector4i(a) - GSVector4i(1, 1); - GSVector4i v; switch (mode) @@ -248,13 +244,28 @@ public: case Align_Inside: v = *this + mask; break; case Align_Outside: v = *this + mask.zwxy(); break; case Align_NegInf: v = *this; break; - case Align_PosInf: v = *this + mask.zwzw(); break; + case Align_PosInf: v = *this + mask.xyxy(); break; default: pxAssert(0); break; } return v.andnot(mask.xyxy()); } + /// Align the rect using mask values that already have one subtracted (1 << n - 1 aligns to 1 << n) + template + GSVector4i ralign_presub(const GSVector2i& a) const + { + return _ralign_helper(GSVector4i(a)); + } + + template + GSVector4i ralign(const GSVector2i& a) const + { + // a must be 1 << n + + return _ralign_helper(GSVector4i(a) - GSVector4i(1, 1)); + } + GSVector4i fit(int arx, int ary) const; GSVector4i fit(int preset) const; @@ -426,15 +437,19 @@ public: return GSVector4i(_mm_blend_epi16(m, a, mask)); } -#if _M_SSE >= 0x501 - template __forceinline GSVector4i blend32(const GSVector4i& v) const { +#if _M_SSE >= 0x501 return GSVector4i(_mm_blend_epi32(m, v.m, mask)); - } - +#else + constexpr int bit3 = ((mask & 8) * 3) << 3; + constexpr int bit2 = ((mask & 4) * 3) << 2; + constexpr int bit1 = ((mask & 2) * 3) << 1; + constexpr int bit0 = (mask & 1) * 3; + return blend16(v); #endif + } __forceinline GSVector4i blend(const GSVector4i& a, const GSVector4i& mask) const { @@ -698,6 +713,13 @@ public: return GSVector4i(_mm_sra_epi32(m, i)); } +#if _M_SSE >= 0x501 + __forceinline GSVector4i srav32(const GSVector4i& v) const + { + return GSVector4i(_mm_srav_epi32(m, v.m)); + } +#endif + __forceinline GSVector4i sll16(int i) const { return GSVector4i(_mm_slli_epi16(m, i)); @@ -719,9 +741,9 @@ public: } #if _M_SSE >= 0x501 - __forceinline GSVector4i sllv32(__m128i i) const + __forceinline GSVector4i sllv32(const GSVector4i& v) const { - return GSVector4i(_mm_sllv_epi32(m, i)); + return GSVector4i(_mm_sllv_epi32(m, v.m)); } #endif @@ -756,9 +778,9 @@ public: } #if _M_SSE >= 0x501 - __forceinline GSVector4i srlv32(__m128i i) const + __forceinline GSVector4i srlv32(const GSVector4i& v) const { - return GSVector4i(_mm_srlv_epi32(m, i)); + return GSVector4i(_mm_srlv_epi32(m, v.m)); } #endif