GSVector: Add 4x4 matrix class

This commit is contained in:
Stenzek 2024-11-13 14:34:01 +10:00
parent 9c327af280
commit e22d67f4aa
No known key found for this signature in database
5 changed files with 218 additions and 16 deletions

View File

@ -65,3 +65,164 @@ void GSMatrix2x2::store(void* m)
{ {
std::memcpy(m, E, sizeof(E)); std::memcpy(m, E, sizeof(E));
} }
GSMatrix4x4::GSMatrix4x4(float e00, float e01, float e02, float e03, float e10, float e11, float e12, float e13,
float e20, float e21, float e22, float e23, float e30, float e31, float e32, float e33)
{
E[0][0] = e00;
E[0][1] = e01;
E[0][2] = e02;
E[0][3] = e03;
E[1][0] = e10;
E[1][1] = e11;
E[1][2] = e12;
E[1][3] = e13;
E[2][0] = e20;
E[2][1] = e21;
E[2][2] = e22;
E[2][3] = e23;
E[3][0] = e30;
E[3][1] = e31;
E[3][2] = e32;
E[3][3] = e33;
}
GSMatrix4x4::GSMatrix4x4(const GSMatrix2x2& m)
{
E[0][0] = m.E[0][0];
E[0][1] = m.E[0][1];
E[0][2] = 0.0f;
E[0][3] = 0.0f;
E[1][0] = m.E[1][0];
E[1][1] = m.E[1][1];
E[1][2] = 0.0f;
E[1][3] = 0.0f;
E[2][0] = 0.0f;
E[2][1] = 0.0f;
E[2][2] = 1.0f;
E[2][3] = 0.0f;
E[3][0] = 0.0f;
E[3][1] = 0.0f;
E[3][2] = 0.0f;
E[3][3] = 1.0f;
}
GSMatrix4x4 GSMatrix4x4::operator*(const GSMatrix4x4& m) const
{
// This isn't speedy by any means, but it's not hot code either.
GSMatrix4x4 res;
#define MultRC(rw, cl) E[rw][0] * m.E[0][cl] + E[rw][1] * m.E[1][cl] + E[rw][2] * m.E[2][cl] + E[rw][3] * m.E[3][cl]
res.E[0][0] = MultRC(0, 0);
res.E[0][1] = MultRC(0, 1);
res.E[0][2] = MultRC(0, 2);
res.E[0][3] = MultRC(0, 3);
res.E[1][0] = MultRC(1, 0);
res.E[1][1] = MultRC(1, 1);
res.E[1][2] = MultRC(1, 2);
res.E[1][3] = MultRC(1, 3);
res.E[2][0] = MultRC(2, 0);
res.E[2][1] = MultRC(2, 1);
res.E[2][2] = MultRC(2, 2);
res.E[2][3] = MultRC(2, 3);
res.E[3][0] = MultRC(3, 0);
res.E[3][1] = MultRC(3, 1);
res.E[3][2] = MultRC(3, 2);
res.E[3][3] = MultRC(3, 3);
#undef MultRC
return res;
}
GSVector4 GSMatrix4x4::operator*(const GSVector4& v) const
{
const GSVector4 r0 = row(0);
const GSVector4 r1 = row(1);
const GSVector4 r2 = row(2);
const GSVector4 r3 = row(4);
return GSVector4(r0.dot(v), r1.dot(v), r2.dot(v), r3.dot(v));
}
GSMatrix4x4 GSMatrix4x4::Identity()
{
GSMatrix4x4 res;
#define MultRC(rw, cl) E[rw][0] * m.E[0][cl] + E[rw][1] * m.E[1][cl] + E[rw][2] * m.E[2][cl] + E[rw][3] * m.E[3][cl]
res.E[0][0] = 1.0f;
res.E[0][1] = 0.0f;
res.E[0][2] = 0.0f;
res.E[0][3] = 0.0f;
res.E[1][0] = 0.0f;
res.E[1][1] = 1.0f;
res.E[1][2] = 0.0f;
res.E[1][3] = 0.0f;
res.E[2][0] = 0.0f;
res.E[2][1] = 0.0f;
res.E[2][2] = 1.0f;
res.E[2][3] = 0.0f;
res.E[3][0] = 0.0f;
res.E[3][1] = 0.0f;
res.E[3][2] = 0.0f;
res.E[3][3] = 1.0f;
return res;
}
GSMatrix4x4 GSMatrix4x4::RotationX(float angle_in_radians)
{
const float sin_angle = std::sin(angle_in_radians);
const float cos_angle = std::cos(angle_in_radians);
return GSMatrix4x4(1.0f, 0.0f, 0.0f, 0.0f, 0.0f, cos_angle, -sin_angle, 0.0f, 0.0f, sin_angle, cos_angle, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f);
}
GSMatrix4x4 GSMatrix4x4::RotationY(float angle_in_radians)
{
const float sin_angle = std::sin(angle_in_radians);
const float cos_angle = std::cos(angle_in_radians);
return GSMatrix4x4(cos_angle, 0.0f, sin_angle, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, -sin_angle, 0.0f, cos_angle, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f);
}
GSMatrix4x4 GSMatrix4x4::RotationZ(float angle_in_radians)
{
const float sin_angle = std::sin(angle_in_radians);
const float cos_angle = std::cos(angle_in_radians);
return GSMatrix4x4(cos_angle, -sin_angle, 0.0f, 0.0f, sin_angle, cos_angle, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f);
}
GSMatrix4x4 GSMatrix4x4::OffCenterOrthographicProjection(float left, float top, float right, float bottom, float zNear,
float zFar)
{
return GSMatrix4x4(2.0f / (right - left), 0.0f, 0.0f, (left + right) / (left - right), 0.0f, 2.0f / (top - bottom),
0.0f, (top + bottom) / (bottom - top), 0.0f, 0.0f, 1.0f / (zNear - zFar), zNear / (zNear - zFar),
0.0f, 0.0f, 0.0f, 1.0f);
}
GSMatrix4x4 GSMatrix4x4::OffCenterOrthographicProjection(float width, float height, float zNear, float zFar)
{
return OffCenterOrthographicProjection(0.0f, 0.0f, width, height, zNear, zFar);
}
GSVector4 GSMatrix4x4::row(size_t i) const
{
return GSVector4::load<true>(&E[i][0]);
}
GSVector4 GSMatrix4x4::col(size_t i) const
{
return GSVector4(E[0][i], E[1][i], E[2][i], E[3][i]);
}
void GSMatrix4x4::store(void* m)
{
std::memcpy(m, &E[0][0], sizeof(E));
}

View File

@ -37,3 +37,33 @@ public:
alignas(8) float E[2][2]; alignas(8) float E[2][2];
}; };
class alignas(VECTOR_ALIGNMENT) GSMatrix4x4
{
public:
GSMatrix4x4() = default;
GSMatrix4x4(float e00, float e01, float e02, float e03, float e10, float e11, float e12, float e13, float e20,
float e21, float e22, float e23, float e30, float e31, float e32, float e33);
GSMatrix4x4(const GSMatrix2x2& m);
GSMatrix4x4 operator*(const GSMatrix4x4& m) const;
GSVector4 operator*(const GSVector4& v) const;
static GSMatrix4x4 Identity();
static GSMatrix4x4 RotationX(float angle_in_radians);
static GSMatrix4x4 RotationY(float angle_in_radians);
static GSMatrix4x4 RotationZ(float angle_in_radians);
static GSMatrix4x4 OffCenterOrthographicProjection(float left, float top, float right, float bottom, float zNear,
float zFar);
static GSMatrix4x4 OffCenterOrthographicProjection(float width, float height, float zNear, float zFar);
GSVector4 row(size_t i) const;
GSVector4 col(size_t i) const;
void store(void* m);
float E[4][4];
};

View File

@ -2574,6 +2574,17 @@ public:
#endif #endif
ALWAYS_INLINE float dot(const GSVector4& v) const
{
#ifdef CPU_ARCH_ARM64
return vaddvq_f32(vmulq_f32(v4s, v.v4s));
#else
const float32x4_t dp = vmulq_f32(v4s, v.v4s);
float32x2_t tmp = vadd_f32(vget_low_f32(dp), vget_high_f32(dp)); // (x+z, y+w)
return vget_lane_f32(vadd_f32(tmp, vdup_lane_f32(tmp, 1)), 0);
#endif
}
ALWAYS_INLINE GSVector4 sat(const GSVector4& a, const GSVector4& b) const { return max(a).min(b); } ALWAYS_INLINE GSVector4 sat(const GSVector4& a, const GSVector4& b) const { return max(a).min(b); }
ALWAYS_INLINE GSVector4 sat(const GSVector4& a) const ALWAYS_INLINE GSVector4 sat(const GSVector4& a) const

View File

@ -1845,20 +1845,9 @@ public:
GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); } GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); }
template<int i> ALWAYS_INLINE float dot(const GSVector4& v) const
GSVector4 dp(const GSVector4& v) const
{ {
float res = 0.0f; return (x * v.x) + (y * v.y) + (z * v.z) + (w * v.w);
if constexpr (i & 0x10)
res += x * v.x;
if constexpr (i & 0x20)
res += y * v.y;
if constexpr (i & 0x40)
res += z * v.z;
if constexpr (i & 0x80)
res += w * v.w;
return GSVector4((i & 0x01) ? res : 0.0f, (i & 0x02) ? res : 0.0f, (i & 0x04) ? res : 0.0f,
(i & 0x08) ? res : 0.0f);
} }
GSVector4 sat(const GSVector4& min, const GSVector4& max) const GSVector4 sat(const GSVector4& min, const GSVector4& max) const

View File

@ -2007,10 +2007,16 @@ public:
ALWAYS_INLINE GSVector4 hsub(const GSVector4& v) const { return GSVector4(_mm_hsub_ps(m, v.m)); } ALWAYS_INLINE GSVector4 hsub(const GSVector4& v) const { return GSVector4(_mm_hsub_ps(m, v.m)); }
template<int i> ALWAYS_INLINE float dot(const GSVector4& v) const
ALWAYS_INLINE GSVector4 dp(const GSVector4& v) const
{ {
return GSVector4(_mm_dp_ps(m, v.m, i)); #ifdef CPU_ARCH_SSE41
return _mm_cvtss_f32(_mm_dp_ps(m, v.m, 0xf1));
#else
__m128 tmp = _mm_mul_ps(m, v.m);
tmp = _mm_add_ps(tmp, _mm_unpackhi_ps(tmp, tmp)); // (x+z, y+w, ..., ...)
tmp = _mm_add_ss(tmp, _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(3, 2, 1, 1)));
return _mm_cvtss_f32(tmp);
#endif
} }
ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const
@ -2393,6 +2399,11 @@ public:
ALWAYS_INLINE GSVector2 zw() const { return GSVector2(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 2, 3, 2))); } ALWAYS_INLINE GSVector2 zw() const { return GSVector2(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 2, 3, 2))); }
ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l, const GSVector2& h)
{
return GSVector4(_mm_movelh_ps(l.m, h.m));
}
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \
{ \ { \