From e22d67f4aac4859bce000685645b08273bf8098d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 13 Nov 2024 14:34:01 +1000 Subject: [PATCH] GSVector: Add 4x4 matrix class --- src/common/gsvector.cpp | 161 +++++++++++++++++++++++++++++++++++ src/common/gsvector.h | 30 +++++++ src/common/gsvector_neon.h | 11 +++ src/common/gsvector_nosimd.h | 15 +--- src/common/gsvector_sse.h | 17 +++- 5 files changed, 218 insertions(+), 16 deletions(-) diff --git a/src/common/gsvector.cpp b/src/common/gsvector.cpp index 392b9da44..787fb7d66 100644 --- a/src/common/gsvector.cpp +++ b/src/common/gsvector.cpp @@ -65,3 +65,164 @@ void GSMatrix2x2::store(void* m) { std::memcpy(m, E, sizeof(E)); } + +GSMatrix4x4::GSMatrix4x4(float e00, float e01, float e02, float e03, float e10, float e11, float e12, float e13, + float e20, float e21, float e22, float e23, float e30, float e31, float e32, float e33) +{ + E[0][0] = e00; + E[0][1] = e01; + E[0][2] = e02; + E[0][3] = e03; + E[1][0] = e10; + E[1][1] = e11; + E[1][2] = e12; + E[1][3] = e13; + E[2][0] = e20; + E[2][1] = e21; + E[2][2] = e22; + E[2][3] = e23; + E[3][0] = e30; + E[3][1] = e31; + E[3][2] = e32; + E[3][3] = e33; +} + + GSMatrix4x4::GSMatrix4x4(const GSMatrix2x2& m) +{ + E[0][0] = m.E[0][0]; + E[0][1] = m.E[0][1]; + E[0][2] = 0.0f; + E[0][3] = 0.0f; + E[1][0] = m.E[1][0]; + E[1][1] = m.E[1][1]; + E[1][2] = 0.0f; + E[1][3] = 0.0f; + E[2][0] = 0.0f; + E[2][1] = 0.0f; + E[2][2] = 1.0f; + E[2][3] = 0.0f; + E[3][0] = 0.0f; + E[3][1] = 0.0f; + E[3][2] = 0.0f; + E[3][3] = 1.0f; + } + +GSMatrix4x4 GSMatrix4x4::operator*(const GSMatrix4x4& m) const +{ + // This isn't speedy by any means, but it's not hot code either. + GSMatrix4x4 res; + +#define MultRC(rw, cl) E[rw][0] * m.E[0][cl] + E[rw][1] * m.E[1][cl] + E[rw][2] * m.E[2][cl] + E[rw][3] * m.E[3][cl] + + res.E[0][0] = MultRC(0, 0); + res.E[0][1] = MultRC(0, 1); + res.E[0][2] = MultRC(0, 2); + res.E[0][3] = MultRC(0, 3); + res.E[1][0] = MultRC(1, 0); + res.E[1][1] = MultRC(1, 1); + res.E[1][2] = MultRC(1, 2); + res.E[1][3] = MultRC(1, 3); + res.E[2][0] = MultRC(2, 0); + res.E[2][1] = MultRC(2, 1); + res.E[2][2] = MultRC(2, 2); + res.E[2][3] = MultRC(2, 3); + res.E[3][0] = MultRC(3, 0); + res.E[3][1] = MultRC(3, 1); + res.E[3][2] = MultRC(3, 2); + res.E[3][3] = MultRC(3, 3); + +#undef MultRC + + return res; +} + +GSVector4 GSMatrix4x4::operator*(const GSVector4& v) const +{ + const GSVector4 r0 = row(0); + const GSVector4 r1 = row(1); + const GSVector4 r2 = row(2); + const GSVector4 r3 = row(4); + + return GSVector4(r0.dot(v), r1.dot(v), r2.dot(v), r3.dot(v)); +} + +GSMatrix4x4 GSMatrix4x4::Identity() +{ + GSMatrix4x4 res; + +#define MultRC(rw, cl) E[rw][0] * m.E[0][cl] + E[rw][1] * m.E[1][cl] + E[rw][2] * m.E[2][cl] + E[rw][3] * m.E[3][cl] + + res.E[0][0] = 1.0f; + res.E[0][1] = 0.0f; + res.E[0][2] = 0.0f; + res.E[0][3] = 0.0f; + res.E[1][0] = 0.0f; + res.E[1][1] = 1.0f; + res.E[1][2] = 0.0f; + res.E[1][3] = 0.0f; + res.E[2][0] = 0.0f; + res.E[2][1] = 0.0f; + res.E[2][2] = 1.0f; + res.E[2][3] = 0.0f; + res.E[3][0] = 0.0f; + res.E[3][1] = 0.0f; + res.E[3][2] = 0.0f; + res.E[3][3] = 1.0f; + + return res; +} + +GSMatrix4x4 GSMatrix4x4::RotationX(float angle_in_radians) +{ + const float sin_angle = std::sin(angle_in_radians); + const float cos_angle = std::cos(angle_in_radians); + + return GSMatrix4x4(1.0f, 0.0f, 0.0f, 0.0f, 0.0f, cos_angle, -sin_angle, 0.0f, 0.0f, sin_angle, cos_angle, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f); +} + +GSMatrix4x4 GSMatrix4x4::RotationY(float angle_in_radians) +{ + const float sin_angle = std::sin(angle_in_radians); + const float cos_angle = std::cos(angle_in_radians); + + return GSMatrix4x4(cos_angle, 0.0f, sin_angle, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, -sin_angle, 0.0f, cos_angle, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f); +} + +GSMatrix4x4 GSMatrix4x4::RotationZ(float angle_in_radians) +{ + const float sin_angle = std::sin(angle_in_radians); + const float cos_angle = std::cos(angle_in_radians); + + return GSMatrix4x4(cos_angle, -sin_angle, 0.0f, 0.0f, sin_angle, cos_angle, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f); +} + +GSMatrix4x4 GSMatrix4x4::OffCenterOrthographicProjection(float left, float top, float right, float bottom, float zNear, + float zFar) +{ + return GSMatrix4x4(2.0f / (right - left), 0.0f, 0.0f, (left + right) / (left - right), 0.0f, 2.0f / (top - bottom), + 0.0f, (top + bottom) / (bottom - top), 0.0f, 0.0f, 1.0f / (zNear - zFar), zNear / (zNear - zFar), + 0.0f, 0.0f, 0.0f, 1.0f); +} + +GSMatrix4x4 GSMatrix4x4::OffCenterOrthographicProjection(float width, float height, float zNear, float zFar) +{ + return OffCenterOrthographicProjection(0.0f, 0.0f, width, height, zNear, zFar); +} + +GSVector4 GSMatrix4x4::row(size_t i) const +{ + return GSVector4::load(&E[i][0]); +} + +GSVector4 GSMatrix4x4::col(size_t i) const +{ + return GSVector4(E[0][i], E[1][i], E[2][i], E[3][i]); +} + +void GSMatrix4x4::store(void* m) +{ + std::memcpy(m, &E[0][0], sizeof(E)); +} diff --git a/src/common/gsvector.h b/src/common/gsvector.h index 3bccfa3ca..106e29b35 100644 --- a/src/common/gsvector.h +++ b/src/common/gsvector.h @@ -37,3 +37,33 @@ public: alignas(8) float E[2][2]; }; + +class alignas(VECTOR_ALIGNMENT) GSMatrix4x4 +{ +public: + GSMatrix4x4() = default; + GSMatrix4x4(float e00, float e01, float e02, float e03, float e10, float e11, float e12, float e13, float e20, + float e21, float e22, float e23, float e30, float e31, float e32, float e33); + GSMatrix4x4(const GSMatrix2x2& m); + + GSMatrix4x4 operator*(const GSMatrix4x4& m) const; + + GSVector4 operator*(const GSVector4& v) const; + + static GSMatrix4x4 Identity(); + + static GSMatrix4x4 RotationX(float angle_in_radians); + static GSMatrix4x4 RotationY(float angle_in_radians); + static GSMatrix4x4 RotationZ(float angle_in_radians); + + static GSMatrix4x4 OffCenterOrthographicProjection(float left, float top, float right, float bottom, float zNear, + float zFar); + static GSMatrix4x4 OffCenterOrthographicProjection(float width, float height, float zNear, float zFar); + + GSVector4 row(size_t i) const; + GSVector4 col(size_t i) const; + + void store(void* m); + + float E[4][4]; +}; diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index c8efec076..c77a1f529 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -2574,6 +2574,17 @@ public: #endif + ALWAYS_INLINE float dot(const GSVector4& v) const + { +#ifdef CPU_ARCH_ARM64 + return vaddvq_f32(vmulq_f32(v4s, v.v4s)); +#else + const float32x4_t dp = vmulq_f32(v4s, v.v4s); + float32x2_t tmp = vadd_f32(vget_low_f32(dp), vget_high_f32(dp)); // (x+z, y+w) + return vget_lane_f32(vadd_f32(tmp, vdup_lane_f32(tmp, 1)), 0); +#endif + } + ALWAYS_INLINE GSVector4 sat(const GSVector4& a, const GSVector4& b) const { return max(a).min(b); } ALWAYS_INLINE GSVector4 sat(const GSVector4& a) const diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index 6d73592d9..9c47424b3 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -1845,20 +1845,9 @@ public: GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); } - template - GSVector4 dp(const GSVector4& v) const + ALWAYS_INLINE float dot(const GSVector4& v) const { - float res = 0.0f; - if constexpr (i & 0x10) - res += x * v.x; - if constexpr (i & 0x20) - res += y * v.y; - if constexpr (i & 0x40) - res += z * v.z; - if constexpr (i & 0x80) - res += w * v.w; - return GSVector4((i & 0x01) ? res : 0.0f, (i & 0x02) ? res : 0.0f, (i & 0x04) ? res : 0.0f, - (i & 0x08) ? res : 0.0f); + return (x * v.x) + (y * v.y) + (z * v.z) + (w * v.w); } GSVector4 sat(const GSVector4& min, const GSVector4& max) const diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index c37d50d2f..5dc49a76d 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -2007,10 +2007,16 @@ public: ALWAYS_INLINE GSVector4 hsub(const GSVector4& v) const { return GSVector4(_mm_hsub_ps(m, v.m)); } - template - ALWAYS_INLINE GSVector4 dp(const GSVector4& v) const + ALWAYS_INLINE float dot(const GSVector4& v) const { - return GSVector4(_mm_dp_ps(m, v.m, i)); +#ifdef CPU_ARCH_SSE41 + return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1)); +#else + __m128 tmp = _mm_mul_ps(m, v.m); + tmp = _mm_add_ps(tmp, _mm_unpackhi_ps(tmp, tmp)); // (x+z, y+w, ..., ...) + tmp = _mm_add_ss(tmp, _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(3, 2, 1, 1))); + return _mm_cvtss_f32(tmp); +#endif } ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const @@ -2393,6 +2399,11 @@ public: ALWAYS_INLINE GSVector2 zw() const { return GSVector2(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 2, 3, 2))); } + ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h) + { + return GSVector4(_mm_movelh_ps(l.m, h.m)); + } + #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ { \