mirror of https://github.com/PCSX2/pcsx2.git
GS:SW: Use floats for zskip
This commit is contained in:
parent
8ba745030a
commit
0cec99361b
|
@ -131,11 +131,9 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, cons
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
const GSVector8* shift = (GSVector8*)g_const->m_shift_256b;
|
||||
const GSVector4* half_shift = reinterpret_cast<const GSVector4*>(shift);
|
||||
const GSVector4 step_shift = GSVector4::broadcast32(&shift[0]);
|
||||
#else
|
||||
const GSVector4* shift = (GSVector4*)g_const->m_shift_128b;
|
||||
const u64* half_shift = reinterpret_cast<const u64*>(shift);
|
||||
const GSVector4 step_shift = shift[0];
|
||||
#endif
|
||||
|
||||
|
@ -165,16 +163,16 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, cons
|
|||
|
||||
if (has_z)
|
||||
{
|
||||
const VectorF dz = VectorF::broadcast64(&dscan.p.z);
|
||||
const GSVector4 dz = GSVector4::broadcast64(&dscan.p.z);
|
||||
const VectorF dzf(static_cast<float>(dscan.p.F64[1]));
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector4::storel(&local.d8.p.z, dz.extract<0>().mul64(GSVector4::f32to64(shift)));
|
||||
GSVector4::storel(&local.d8.p.z, dz.mul64(GSVector4::f32to64(shift)));
|
||||
#else
|
||||
local.d4.z = dz.mul64(GSVector4::f32to64(shift));
|
||||
#endif
|
||||
for (int i = 0; i < vlen; i++)
|
||||
{
|
||||
local.d[i].z0 = dz.mul64(VectorF::f32to64(&half_shift[2 * i + 2]));
|
||||
local.d[i].z1 = dz.mul64(VectorF::f32to64(&half_shift[2 * i + 3]));
|
||||
local.d[i].z = dzf * shift[i + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -359,8 +357,8 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
|
|||
if (sel.zb)
|
||||
{
|
||||
VectorF zbase = VectorF::broadcast64(&scan.p.z);
|
||||
z0 = zbase.add64(local.d[skip].z0);
|
||||
z1 = zbase.add64(local.d[skip].z1);
|
||||
z0 = zbase.add64(VectorF::f32to64(&local.d[skip].z.F32[0]));
|
||||
z1 = zbase.add64(VectorF::f32to64(&local.d[skip].z.F32[vlen/2]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -723,19 +723,12 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
if (m_sel.zb)
|
||||
{
|
||||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
broadcastsd(_z, ptr[a3 + offsetof(GSVertexSW, p.z)]); // v.p.z
|
||||
if (hasAVX)
|
||||
{
|
||||
vaddpd(xym7, _z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z0)]);
|
||||
vaddpd(_z, _z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z1)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
movaps(xym7, ptr[a1 + offsetof(GSScanlineLocalData::skip, z0)]);
|
||||
addpd(xym7, _z);
|
||||
addpd(_z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z1)]);
|
||||
}
|
||||
broadcastsd(xym1, ptr[a3 + offsetof(GSVertexSW, p.z)]); // v.p.z
|
||||
cvtps2pd(xym7, ptr[a1 + offsetof(GSScanlineLocalData::skip, z.I8[0])]);
|
||||
addpd(xym7, xym1);
|
||||
movaps(_rip_local(temp.z0), xym7);
|
||||
cvtps2pd(_z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z.I8[vecsize/2])]);
|
||||
addpd(_z, xym1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -170,7 +170,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
|||
{
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
struct skip { GSVector8 z0, z1, s, t, q; GSVector8i rb, ga, f; } d[8];
|
||||
struct skip { GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad; } d[8];
|
||||
struct step { GSVector4 stq; struct { u32 rb, ga; } c; struct { u64 z; u32 f; } p; } d8;
|
||||
struct { u32 z, f; } p;
|
||||
struct { GSVector8i rb, ga; } c;
|
||||
|
@ -198,7 +198,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
|||
|
||||
#else
|
||||
|
||||
struct skip { GSVector4 z0, z1, s, t, q; GSVector4i rb, ga, f; } d[4];
|
||||
struct skip { GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad; } d[4];
|
||||
struct step { GSVector4 z, stq; GSVector4i c, f; } d4;
|
||||
struct { GSVector4i rb, ga; } c;
|
||||
struct { GSVector4i z, f; } p;
|
||||
|
|
|
@ -200,18 +200,16 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
|||
mulpd(xmm1, xmm0);
|
||||
movaps(_rip_local_d_p(z), xmm1);
|
||||
|
||||
cvtpd2ps(xmm0, xmm0);
|
||||
unpcklpd(xmm0, xmm0);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z0 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 2]));
|
||||
// m_local.d[i].z1 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 3]));
|
||||
|
||||
cvtps2pd(xmm1, XYm(4 + i));
|
||||
pshufd(xmm2, XYm(4 + i), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
cvtps2pd(xmm2, xmm2);
|
||||
mulpd(xmm1, xmm0);
|
||||
mulpd(xmm2, xmm0);
|
||||
movaps(_rip_local(d[i].z0), xmm1);
|
||||
movaps(_rip_local(d[i].z1), xmm2);
|
||||
THREEARG(mulps, xmm1, xmm0, XYm(4 + i));
|
||||
movdqa(_rip_local(d[i].z), xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -282,33 +280,25 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
|||
if (m_en.z)
|
||||
{
|
||||
// const VectorF dz = VectorF::broadcast64(&dscan.p.z);
|
||||
vbroadcastsd(ymm0, ptr[_dscan + offsetof(GSVertexSW, p.z)]);
|
||||
movsd(xmm0, ptr[_dscan + offsetof(GSVertexSW, p.z)]);
|
||||
|
||||
// GSVector4::storel(&local.d8.p.z, dz.extract<0>().mul64(GSVector4::f32to64(shift)));
|
||||
cvtss2sd(xmm1, xmm3);
|
||||
vcvtss2sd(xmm1, xmm3, xmm3);
|
||||
vmulsd(xmm1, xmm0, xmm1);
|
||||
movsd(_rip_local_d_p(z), xmm1);
|
||||
|
||||
cvtsd2ss(xmm0, xmm0);
|
||||
vbroadcastss(ymm0, xmm0);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++)
|
||||
{
|
||||
// m_local.d[i].z0 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 2]));
|
||||
// m_local.d[i].z1 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 3]));
|
||||
// m_local.d[i].z = dzf * shift[i + 1];
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
{
|
||||
cvtps2pd(ymm1, Xmm(4 + i));
|
||||
vextracti128(xmm2, Ymm(4 + i), 1);
|
||||
cvtps2pd(ymm2, xmm2);
|
||||
}
|
||||
vmulps(ymm1, Ymm(4 + i), ymm0);
|
||||
else
|
||||
{
|
||||
cvtps2pd(ymm1, ptr[&g_const->m_shift_256b[i + 1][0]]);
|
||||
cvtps2pd(ymm2, ptr[&g_const->m_shift_256b[i + 1][4]]);
|
||||
}
|
||||
mulpd(ymm1, ymm0);
|
||||
mulpd(ymm2, ymm0);
|
||||
movaps(_rip_local(d[i].z0), ymm1);
|
||||
movaps(_rip_local(d[i].z1), ymm2);
|
||||
vmulps(ymm1, ymm0, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
movaps(_rip_local(d[i].z), ymm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue