mirror of https://github.com/PCSX2/pcsx2.git
GSdx: finally, some use for hsubps (SSE3).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4504 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e72b095e4c
commit
686b6da8e5
|
@ -80,7 +80,7 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
|
|
||||||
m_stats.Reset();
|
m_stats.Reset();
|
||||||
|
|
||||||
int64 start = __rdtsc();
|
uint64 start = __rdtsc();
|
||||||
|
|
||||||
// NOTE: data->scissor_test with templated Draw* speeds up large point lists (ffxii videos), but do not seem to make any difference for others
|
// NOTE: data->scissor_test with templated Draw* speeds up large point lists (ffxii videos), but do not seem to make any difference for others
|
||||||
|
|
||||||
|
@ -232,16 +232,16 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const int s_abc[8][4] =
|
static const uint8 s_ysort[8][4] =
|
||||||
{
|
{
|
||||||
{0, 1, 2, 0}, // c >= b >= a
|
{0, 1, 2, 0}, // y0 <= y1 <= y2
|
||||||
{1, 0, 2, 0}, // c >= a > b
|
{1, 0, 2, 0}, // y1 < y0 <= y2
|
||||||
{0, 0, 0, 0},
|
{0, 0, 0, 0},
|
||||||
{1, 2, 0, 0}, // a > c >= b
|
{1, 2, 0, 0}, // y1 <= y2 < y0
|
||||||
{0, 2, 1, 0}, // b > c >= a
|
{0, 2, 1, 0}, // y0 <= y2 < y1
|
||||||
{0, 0, 0, 0},
|
{0, 0, 0, 0},
|
||||||
{2, 0, 1, 0}, // b >= a > c
|
{2, 0, 1, 0}, // y2 < y0 <= y1
|
||||||
{2, 1, 0, 0}, // a > b > c
|
{2, 1, 0, 0}, // y2 < y1 < y0
|
||||||
};
|
};
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
|
@ -252,23 +252,23 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
GSVertexSW dedge;
|
GSVertexSW dedge;
|
||||||
GSVertexSW dscan;
|
GSVertexSW dscan;
|
||||||
|
|
||||||
GSVector4 aabb = vertices[0].p.yyyy(vertices[1].p);
|
GSVector4 y0011 = vertices[0].p.yyyy(vertices[1].p);
|
||||||
GSVector4 bccb = vertices[1].p.yyyy(vertices[2].p).xzzx();
|
GSVector4 y1221 = vertices[1].p.yyyy(vertices[2].p).xzzx();
|
||||||
|
|
||||||
int abc = (aabb > bccb).mask() & 7;
|
int mask = (y0011 > y1221).mask() & 7;
|
||||||
|
|
||||||
v[0] = vertices[s_abc[abc][0]];
|
v[0] = vertices[s_ysort[mask][0]];
|
||||||
v[1] = vertices[s_abc[abc][1]];
|
v[1] = vertices[s_ysort[mask][1]];
|
||||||
v[2] = vertices[s_abc[abc][2]];
|
v[2] = vertices[s_ysort[mask][2]];
|
||||||
|
|
||||||
aabb = v[0].p.yyyy(v[1].p);
|
y0011 = v[0].p.yyyy(v[1].p);
|
||||||
bccb = v[1].p.yyyy(v[2].p).xzzx();
|
y1221 = v[1].p.yyyy(v[2].p).xzzx();
|
||||||
|
|
||||||
int i = (aabb == bccb).mask() & 7;
|
int i = (y0011 == y1221).mask() & 7;
|
||||||
|
|
||||||
if(i == 7) return; // a == b == c
|
if(i == 7) return; // y0 == y1 == y2
|
||||||
|
|
||||||
GSVector4 tbf = aabb.xzxz(bccb).ceil();
|
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
||||||
GSVector4 tbmax = tbf.max(m_fscissor.ywyw());
|
GSVector4 tbmax = tbf.max(m_fscissor.ywyw());
|
||||||
GSVector4 tbmin = tbf.min(m_fscissor.ywyw());
|
GSVector4 tbmin = tbf.min(m_fscissor.ywyw());
|
||||||
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
||||||
|
@ -291,39 +291,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
|
|
||||||
cross = cross.rcpnr();
|
cross = cross.rcpnr();
|
||||||
|
|
||||||
GSVector4 dv01xy = dv[0].p.xyxy(dv[1].p);
|
GSVector4 dxy01 = dv[0].p.xyxy(dv[1].p);
|
||||||
|
|
||||||
GSVector4 _z = dv01xy * dv[1].p.zzzz(dv[0].p);
|
GSVector4 dx = dxy01.xzxy(dv[2].p);
|
||||||
GSVector4 _f = dv01xy * dv[1].p.wwww(dv[0].p);
|
GSVector4 dy = dxy01.ywyx(dv[2].p);
|
||||||
|
|
||||||
GSVector4 _zf = (_z.yzyz(_f) - _z.wxwx(_f)) * cross;
|
|
||||||
|
|
||||||
dscan.p = _zf.xzxz();
|
|
||||||
dedge.p = _zf.ywyw();
|
|
||||||
|
|
||||||
GSVector4 _s = dv01xy * dv[1].t.xxxx(dv[0].t);
|
|
||||||
GSVector4 _t = dv01xy * dv[1].t.yyyy(dv[0].t);
|
|
||||||
GSVector4 _q = dv01xy * dv[1].t.zzzz(dv[0].t);
|
|
||||||
|
|
||||||
GSVector4 _st = (_s.yzyz(_t) - _s.wxwx(_t)) * cross;
|
|
||||||
GSVector4 _q_ = (_q.yzyz() - _q.wxwx()) * cross;
|
|
||||||
|
|
||||||
dscan.t = _st.xzxz(_q_);
|
|
||||||
dedge.t = _st.ywyw(_q_);
|
|
||||||
|
|
||||||
GSVector4 _r = dv01xy * dv[1].c.xxxx(dv[0].c);
|
|
||||||
GSVector4 _g = dv01xy * dv[1].c.yyyy(dv[0].c);
|
|
||||||
GSVector4 _b = dv01xy * dv[1].c.zzzz(dv[0].c);
|
|
||||||
GSVector4 _a = dv01xy * dv[1].c.wwww(dv[0].c);
|
|
||||||
|
|
||||||
GSVector4 _rg = (_r.yzyz(_g) - _r.wxwx(_g)) * cross;
|
|
||||||
GSVector4 _ba = (_b.yzyz(_a) - _b.wxwx(_a)) * cross;
|
|
||||||
|
|
||||||
dscan.c = _rg.xzxz(_ba);
|
|
||||||
dedge.c = _rg.ywyw(_ba);
|
|
||||||
|
|
||||||
GSVector4 dx = dv01xy.xzxy(dv[2].p);
|
|
||||||
GSVector4 dy = dv01xy.ywyx(dv[2].p);
|
|
||||||
|
|
||||||
GSVector4 ddx[3];
|
GSVector4 ddx[3];
|
||||||
|
|
||||||
|
@ -331,12 +302,37 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
ddx[1] = ddx[0].yxzw();
|
ddx[1] = ddx[0].yxzw();
|
||||||
ddx[2] = ddx[0].xzyw();
|
ddx[2] = ddx[0].xzyw();
|
||||||
|
|
||||||
|
GSVector4 dxy01c = dxy01 * cross;
|
||||||
|
|
||||||
|
GSVector4 _z = dxy01c * dv[1].p.zzzz(dv[0].p); // dx0 * z1, dy0 * z1, dx1 * z0, dy1 * z0
|
||||||
|
GSVector4 _f = dxy01c * dv[1].p.wwww(dv[0].p); // dx0 * f1, dy0 * f1, dx1 * f0, dy1 * f0
|
||||||
|
|
||||||
|
GSVector4 _zf = _z.ywyw(_f).hsub(_z.zxzx(_f)); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0, dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
|
||||||
|
|
||||||
|
dscan.p = _zf.zwxy(); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0
|
||||||
|
dedge.p = _zf; // dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
|
||||||
|
|
||||||
|
GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0
|
||||||
|
GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0
|
||||||
|
GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0
|
||||||
|
|
||||||
|
dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0
|
||||||
|
dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1
|
||||||
|
|
||||||
|
GSVector4 _r = dxy01c * dv[1].c.xxxx(dv[0].c); // dx0 * r1, dy0 * r1, dx1 * r0, dy1 * r0
|
||||||
|
GSVector4 _g = dxy01c * dv[1].c.yyyy(dv[0].c); // dx0 * g1, dy0 * g1, dx1 * g0, dy1 * g0
|
||||||
|
GSVector4 _b = dxy01c * dv[1].c.zzzz(dv[0].c); // dx0 * b1, dy0 * b1, dx1 * b0, dy1 * b0
|
||||||
|
GSVector4 _a = dxy01c * dv[1].c.wwww(dv[0].c); // dx0 * a1, dy0 * a1, dx1 * a0, dy1 * a0
|
||||||
|
|
||||||
|
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
|
||||||
|
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
|
||||||
|
|
||||||
GSVector4 x0;
|
GSVector4 x0;
|
||||||
|
|
||||||
switch(i)
|
switch(i)
|
||||||
{
|
{
|
||||||
case 0: // a < b < c
|
case 0: // y0 < y1 < y2
|
||||||
case 4: // a < b == c
|
case 4: // y0 < y1 == y2
|
||||||
|
|
||||||
x0 = v[0].p.xxxx();
|
x0 = v[0].p.xxxx();
|
||||||
|
|
||||||
|
@ -370,7 +366,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1: // a == b < c
|
case 1: // y0 == y1 < y2
|
||||||
|
|
||||||
if(tb.y < tb.w)
|
if(tb.y < tb.w)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2581,18 +2581,52 @@ public:
|
||||||
__forceinline GSVector4 hadd() const
|
__forceinline GSVector4 hadd() const
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x300
|
#if _M_SSE >= 0x300
|
||||||
|
|
||||||
return GSVector4(_mm_hadd_ps(m, m));
|
return GSVector4(_mm_hadd_ps(m, m));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
return xzxz() + ywyw();
|
return xzxz() + ywyw();
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector4 hadd(const GSVector4& v) const
|
__forceinline GSVector4 hadd(const GSVector4& v) const
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x300
|
#if _M_SSE >= 0x300
|
||||||
|
|
||||||
return GSVector4(_mm_hadd_ps(m, v.m));
|
return GSVector4(_mm_hadd_ps(m, v.m));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
return xzxz(v) + ywyw(v);
|
return xzxz(v) + ywyw(v);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4 hsub() const
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x300
|
||||||
|
|
||||||
|
return GSVector4(_mm_hsub_ps(m, m));
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
return xzxz() - ywyw();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline GSVector4 hsub(const GSVector4& v) const
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x300
|
||||||
|
|
||||||
|
return GSVector4(_mm_hsub_ps(m, v.m));
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
return xzxz(v) - ywyw(v);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue