mirror of https://github.com/PCSX2/pcsx2.git
GS SW: Handle flat prims without float conversion
This commit is contained in:
parent
125b9ea1e1
commit
14e8d840ff
|
@ -98,6 +98,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
sel.fb = m_global.sel.fb;
|
||||
sel.zb = m_global.sel.zb;
|
||||
sel.zoverflow = m_global.sel.zoverflow;
|
||||
sel.zequal = m_global.sel.zequal;
|
||||
sel.notest = m_global.sel.notest;
|
||||
|
||||
m_sp = m_sp_map[sel];
|
||||
|
@ -143,13 +144,20 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const u32* index, const
|
|||
|
||||
if (has_z)
|
||||
{
|
||||
m_local.d8.p.z = dp8.extract32<2>();
|
||||
|
||||
GSVector8 dz = GSVector8::broadcast32(&dscan.p.z);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (sel.zequal)
|
||||
{
|
||||
m_local.d[i].z = dz * shift[1 + i];
|
||||
m_local.p.z = vertex[index[1]].t.U32[3];
|
||||
}
|
||||
|
||||
{
|
||||
m_local.d8.p.z = dp8.extract32<2>();
|
||||
|
||||
const GSVector8 dz = GSVector8::broadcast32(&dscan.p.z);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
m_local.d[i].z = dz * shift[1 + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -545,9 +553,14 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if (sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// Need to handle when the float converts incorrectly
|
||||
GSVector8 z = GSVector8::broadcast32(&scan.p.z) + zo;
|
||||
|
||||
if (sel.zoverflow)
|
||||
if (sel.zequal)
|
||||
{
|
||||
zs = GSVector8i::broadcast32(&m_local.p.z);
|
||||
}
|
||||
else if (sel.zoverflow)
|
||||
{
|
||||
zs = (GSVector8i(z * 0.5f) << 1) | (GSVector8i(z) & GSVector8i::x00000001());
|
||||
}
|
||||
|
@ -2787,7 +2800,7 @@ void GSDrawScanline::WritePixel(const T& src, int addr, int i, u32 psm)
|
|||
*(u32*)dst = (src.U32[i] & 0xffffff) | (*(u32*)dst & 0xff000000);
|
||||
break;
|
||||
case 2:
|
||||
*(u16*)dst = src.u16[i * 2];
|
||||
*(u16*)dst = src.U16[i * 2];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -784,19 +784,24 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
|
||||
if (m_sel.zb)
|
||||
{
|
||||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
shufps(z, z, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
if (is64)
|
||||
if (!m_sel.zequal)
|
||||
{
|
||||
addps(z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
shufps(z, z, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
if (is64)
|
||||
{
|
||||
addps(z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
movaps(ptr[&m_local.temp.z], z);
|
||||
movaps(xym2, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
movaps(ptr[&m_local.temp.zo], xym2);
|
||||
addps(z, xym2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
movaps(ptr[&m_local.temp.z], z);
|
||||
movaps(xym2, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
movaps(ptr[&m_local.temp.zo], xym2);
|
||||
addps(z, xym2);
|
||||
}
|
||||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -995,7 +1000,11 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
|
||||
if (m_sel.zb)
|
||||
{
|
||||
if (is32)
|
||||
if (m_sel.zequal)
|
||||
{
|
||||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
else if (is32)
|
||||
{
|
||||
broadcastssLocal(z, _rip_local_d_p(z));
|
||||
addps(z, _rip_local(temp.zo));
|
||||
|
@ -1185,9 +1194,18 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
|||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if (m_sel.zoverflow)
|
||||
if (m_sel.zequal)
|
||||
{
|
||||
ONLY64(movdqa(xym0, _z));
|
||||
}
|
||||
else if (m_sel.zoverflow)
|
||||
{
|
||||
// zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
/*GSVector8 z = GSVector8::broadcast32(&scan.p.z) + zo;
|
||||
z /= 2;
|
||||
zs = GSVector8i(z, true);
|
||||
zs = zs.min_u32(GSVector8i::x7fffffff());
|
||||
zs = zs.sll32(1) | 1;*/
|
||||
|
||||
auto m_half = loadAddress(rax, &GSVector4::m_half);
|
||||
|
||||
|
@ -1195,6 +1213,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
|||
vbroadcastss(temp1, ptr[m_half]);
|
||||
else
|
||||
movaps(temp1, ptr[m_half]);
|
||||
|
||||
mulps(temp1, z);
|
||||
cvttps2dq(temp1, temp1);
|
||||
pslld(temp1, 1);
|
||||
|
|
|
@ -407,6 +407,7 @@ public:
|
|||
AFORWARD(3, pinsrd, ARGS_XOI)
|
||||
AFORWARD(2, pmaxsw, ARGS_XO)
|
||||
AFORWARD(2, pminsd, ARGS_XO)
|
||||
AFORWARD(2, pminud, ARGS_XO)
|
||||
AFORWARD(2, pminsw, ARGS_XO)
|
||||
SFORWARD(2, pmovsxbd, ARGS_XO)
|
||||
SFORWARD(2, pmovmskb, const Reg32e&, const Xmm&)
|
||||
|
|
|
@ -313,7 +313,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
|
|||
}
|
||||
}
|
||||
|
||||
if (primclass == GS_SPRITE_CLASS)
|
||||
if (primclass == GS_SPRITE_CLASS || m_vt.m_eq.z)
|
||||
{
|
||||
xyzuvf = xyzuvf.min_u32(z_max);
|
||||
t = t.insert32<1, 3>(GSVector4::cast(xyzuvf));
|
||||
|
@ -1341,6 +1341,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
|
||||
gd.sel.zequal = !!m_vt.m_eq.z;
|
||||
gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
|
||||
gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max;
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ union GSScanlineSelector
|
|||
u32 mmin : 2; // 54
|
||||
u32 notest : 1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
||||
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
|
||||
|
||||
u32 zequal : 1; // 56
|
||||
u32 breakpoint : 1; // Insert a trap to stop the program, helpful to stop debugger on a program
|
||||
};
|
||||
|
||||
|
|
|
@ -187,21 +187,43 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
|||
|
||||
if (m_en.z)
|
||||
{
|
||||
// GSVector4 dz = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_local.d4.z = dz * 4.0f;
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, xmm3);
|
||||
movdqa(_rip_local_d_p(z), xmm1);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
if (m_sel.zequal)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
u32 offset = 0;
|
||||
if (m_sel.prim != GS_POINT_CLASS)
|
||||
offset = sizeof(u32) * 1;
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, XYm(4 + i));
|
||||
movdqa(_rip_local(d[i].z), xmm1);
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + offset]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
movdqa(_rip_local(p.z), xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 dz = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_local.d4.z = dz * 4.0f;
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, xmm3);
|
||||
movdqa(_rip_local_d_p(z), xmm1);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, XYm(4 + i));
|
||||
movdqa(_rip_local(d[i].z), xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -257,13 +279,34 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
|||
|
||||
if (m_en.z)
|
||||
{
|
||||
// m_local.d8.p.z = dp8.extract32<2>();
|
||||
if (m_sel.zequal)
|
||||
{
|
||||
u32 offset = 0;
|
||||
if (m_sel.prim != GS_POINT_CLASS)
|
||||
offset = sizeof(u32) * 1;
|
||||
|
||||
extractps(_rip_local_d_p(z), xmm1, 2);
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + offset]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
// GSVector8 dz = GSVector8(dscan.p).zzzz();
|
||||
mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]);
|
||||
mov(_rip_local(p.z), t1.cvt32());
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.d8.p.z = dp8.extract32<2>();
|
||||
|
||||
vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
extractps(_rip_local_d_p(z), xmm1, 2);
|
||||
|
||||
// GSVector8 dz = GSVector8(dscan.p).zzzz();
|
||||
|
||||
vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
}
|
||||
|
||||
if (m_en.f)
|
||||
|
|
Loading…
Reference in New Issue