mirror of https://github.com/PCSX2/pcsx2.git
GS: Replace 6 SetupPrim code generators with one merged one
This commit is contained in:
parent
ed5a7802f3
commit
fd0351ca8f
|
@ -649,6 +649,7 @@ set(pcsx2GSSources
|
|||
GS/Renderers/SW/GSRasterizer.cpp
|
||||
GS/Renderers/SW/GSRendererSW.cpp
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.cpp
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx.cpp
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.x64.avx2.cpp
|
||||
|
@ -719,6 +720,7 @@ set(pcsx2GSHeaders
|
|||
GS/Renderers/SW/GSRendererSW.h
|
||||
GS/Renderers/SW/GSScanlineEnvironment.h
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.h
|
||||
GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h
|
||||
GS/Renderers/SW/GSTextureCacheSW.h
|
||||
GS/Renderers/SW/GSTextureSW.h
|
||||
GS/Renderers/SW/GSVertexSW.h
|
||||
|
|
|
@ -0,0 +1,566 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include "GS/GS_types.h"
|
||||
#include "GSSetupPrimCodeGenerator.all.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
#define _rip_local(field) ((is32 || m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
|
||||
#define _64_m_local _64_t0
|
||||
|
||||
/// On AVX, does a v-prefixed separate destination operation
|
||||
/// On SSE, moves src1 into dst using movdqa, then does the operation
|
||||
#define THREEARG(operation, dst, src1, ...) \
|
||||
do \
|
||||
{ \
|
||||
if (hasAVX) \
|
||||
{ \
|
||||
v##operation(dst, src1, __VA_ARGS__); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
movdqa(dst, src1); \
|
||||
operation(dst, __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
#define _rip_local_d(x) _rip_local(d8.x)
|
||||
#define _rip_local_d_p(x) _rip_local_d(p.x)
|
||||
#else
|
||||
#define _rip_local_d(x) _rip_local(d4.x)
|
||||
#define _rip_local_d_p(x) _rip_local_d(x)
|
||||
#endif
|
||||
|
||||
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key)
|
||||
: _parent(base, cpu)
|
||||
, m_local(*(GSScanlineLocalData*)param)
|
||||
, m_rip(false), many_regs(false)
|
||||
// On x86 arg registers are very temporary but on x64 they aren't, so on x86 some registers overlap
|
||||
#ifdef _WIN32
|
||||
, _64_vertex(is64 ? rcx : r8)
|
||||
, _index(is64 ? rdx : rcx)
|
||||
, _dscan(is64 ? r8 : rdx)
|
||||
, _64_t0(r9), t1(is64 ? r10 : rcx)
|
||||
#else
|
||||
, _64_vertex(is64 ? rdi : r8)
|
||||
, _index(is64 ? rsi : rcx)
|
||||
, _dscan(rdx)
|
||||
, _64_t0(is64 ? rcx : r8), t1(is64 ? r8 : rcx)
|
||||
#endif
|
||||
, _m_local(chooseLocal(&m_local, _64_m_local))
|
||||
{
|
||||
m_sel.key = key;
|
||||
|
||||
m_en.z = m_sel.zb ? 1 : 0;
|
||||
m_en.f = m_sel.fb && m_sel.fge ? 1 : 0;
|
||||
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem)
|
||||
{
|
||||
#if SETUP_PRIM_USING_YMM
|
||||
vbroadcastf128(reg, mem);
|
||||
#else
|
||||
movaps(reg, mem);
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::Generate()
|
||||
{
|
||||
// Technically we just need the delta < 2GB
|
||||
m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000;
|
||||
|
||||
bool needs_shift = (m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip;
|
||||
many_regs = is64 && isYmm && !m_sel.notest && needs_shift;
|
||||
|
||||
#ifdef _WIN64
|
||||
int needs_saving = many_regs ? 6 : m_sel.notest ? 0 : 2;
|
||||
if (needs_saving)
|
||||
{
|
||||
sub(rsp, 8 + 16 * needs_saving);
|
||||
for (int i = 0; i < needs_saving; i++)
|
||||
{
|
||||
movdqa(ptr[rsp + i * 16], Xmm(i + 6));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (is64 && !m_rip)
|
||||
mov(_64_m_local, (size_t)&m_local);
|
||||
|
||||
if (needs_shift)
|
||||
{
|
||||
if (is32)
|
||||
mov(_dscan, ptr[rsp + _32_dscan]);
|
||||
|
||||
if (isXmm)
|
||||
mov(rax, (size_t)g_const->m_shift_128b);
|
||||
else
|
||||
mov(rax, (size_t)g_const->m_shift_256b);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 2 : many_regs ? 9 : 5); i++)
|
||||
{
|
||||
movaps(XYm(3 + i), ptr[rax + i * vecsize]);
|
||||
}
|
||||
}
|
||||
|
||||
if (isXmm)
|
||||
Depth_XMM();
|
||||
else
|
||||
Depth_YMM();
|
||||
|
||||
Texture();
|
||||
|
||||
Color();
|
||||
|
||||
#ifdef _WIN64
|
||||
if (needs_saving)
|
||||
{
|
||||
for (int i = 0; i < needs_saving; i++)
|
||||
{
|
||||
movdqa(Xmm(i + 6), ptr[rsp + i * 16]);
|
||||
}
|
||||
add(rsp, 8 + 16 * needs_saving);
|
||||
}
|
||||
#endif
|
||||
if (isYmm)
|
||||
vzeroupper();
|
||||
ret();
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::Depth_XMM()
|
||||
{
|
||||
if (!m_en.z && !m_en.f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4 p = dscan.p;
|
||||
|
||||
|
||||
movaps(xmm0, ptr[_dscan + offsetof(GSVertexSW, p)]);
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
// GSVector4 df = p.wwww();
|
||||
|
||||
THREEARG(shufps, xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
// m_local.d4.f = GSVector4i(df * 4.0f).xxzzlh();
|
||||
|
||||
THREEARG(mulps, xmm2, xmm1, xmm3);
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(_rip_local_d_p(f), xmm2);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
THREEARG(mulps, xmm2, xmm1, XYm(4 + i));
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(_rip_local(d[i].f), xmm2);
|
||||
}
|
||||
}
|
||||
|
||||
if (m_en.z)
|
||||
{
|
||||
// GSVector4 dz = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
// m_local.d4.z = dz * 4.0f;
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, xmm3);
|
||||
movdqa(_rip_local_d_p(z), xmm1);
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, XYm(4 + i));
|
||||
movdqa(_rip_local(d[i].z), xmm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(uint32) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
// m_local.p.f = GSVector4i(p).zzzzh().zzzz();
|
||||
movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
cvttps2dq(xmm1, xmm0);
|
||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
movdqa(_rip_local(p.f), xmm1);
|
||||
}
|
||||
|
||||
if (m_en.z)
|
||||
{
|
||||
// uint32 z is bypassed in t.w
|
||||
|
||||
movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
movdqa(_rip_local(p.z), xmm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||
{
|
||||
if (!m_en.z && !m_en.f)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
// GSVector4 dp8 = dscan.p * GSVector4::broadcast32(&shift[0]);
|
||||
|
||||
broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vmulps(ymm1, ymm0, ymm3);
|
||||
|
||||
if (m_en.z)
|
||||
{
|
||||
// m_local.d8.p.z = dp8.extract32<2>();
|
||||
|
||||
extractps(_rip_local_d_p(z), xmm1, 2);
|
||||
|
||||
// GSVector8 dz = GSVector8(dscan.p).zzzz();
|
||||
|
||||
vshufps(ymm2, ymm0, ymm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
// m_local.d8.p.f = GSVector4i(dp8).extract32<3>();
|
||||
|
||||
cvtps2dq(ymm1, ymm1);
|
||||
pextrd(_rip_local_d_p(f), xmm1, 3);
|
||||
|
||||
// GSVector8 df = GSVector8(dscan.p).wwww();
|
||||
|
||||
vshufps(ymm1, ymm0, ymm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++)
|
||||
{
|
||||
if (m_en.z)
|
||||
{
|
||||
// m_local.d[i].z = dz * shift[1 + i];
|
||||
|
||||
// Save a byte in the encoding for ymm8-11 by swapping with ymm2 (multiplication is communative)
|
||||
if (i < 4 || many_regs)
|
||||
vmulps(ymm0, Ymm(4 + i), ymm2);
|
||||
else
|
||||
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
movaps(_rip_local(d[i].z), ymm0);
|
||||
}
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
// m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
vmulps(ymm0, Ymm(4 + i), ymm1);
|
||||
else
|
||||
vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
cvttps2dq(ymm0, ymm0);
|
||||
pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(_rip_local(d[i].f), ymm0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(uint32) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
// m_local.p.f = GSVector4i(vertex[index[1]].p).extract32<3>();
|
||||
|
||||
movaps(xmm0, ptr[rax + offsetof(GSVertexSW, p)]);
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pextrd(_rip_local(p.f), xmm0, 3);
|
||||
}
|
||||
|
||||
if (m_en.z)
|
||||
{
|
||||
// m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
|
||||
|
||||
mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]);
|
||||
mov(_rip_local(p.z), t1.cvt32());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::Texture()
|
||||
{
|
||||
if (!m_en.t)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// GSVector4 t = dscan.t;
|
||||
|
||||
broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, t)]);
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, xmm3);
|
||||
|
||||
if (m_sel.fst)
|
||||
{
|
||||
// m_local.d4.stq = GSVector4i(t * 4.0f);
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
|
||||
movdqa(_rip_local_d(stq), xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.d4.stq = t * 4.0f;
|
||||
|
||||
movaps(_rip_local_d(stq), xmm1);
|
||||
}
|
||||
|
||||
for (int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
// GSVector4 dq = t.zzzz();
|
||||
|
||||
THREEARG(shufps, xym1, xym0, xym0, _MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
THREEARG(mulps, xym2, XYm(4 + i), xym1);
|
||||
else
|
||||
vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
|
||||
if (m_sel.fst)
|
||||
{
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
cvttps2dq(xym2, xym2);
|
||||
|
||||
switch (j)
|
||||
{
|
||||
case 0: movdqa(_rip_local(d[i].s), xym2); break;
|
||||
case 1: movdqa(_rip_local(d[i].t), xym2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.d[i].s/t/q = v;
|
||||
|
||||
switch (j)
|
||||
{
|
||||
case 0: movaps(_rip_local(d[i].s), xym2); break;
|
||||
case 1: movaps(_rip_local(d[i].t), xym2); break;
|
||||
case 2: movaps(_rip_local(d[i].q), xym2); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSSetupPrimCodeGenerator2::Color()
|
||||
{
|
||||
if (!m_en.c)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_sel.iip)
|
||||
{
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, c)]);
|
||||
|
||||
// m_local.d4.c = GSVector4i(c * 4.0f).xzyw().ps32();
|
||||
|
||||
THREEARG(mulps, xmm1, xmm0, xmm3);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||
packssdw(xmm1, xmm1);
|
||||
if (isXmm)
|
||||
movdqa(_rip_local_d(c), xmm1);
|
||||
else
|
||||
movq(_rip_local_d(c), xmm1);
|
||||
|
||||
// xym3 is not needed anymore
|
||||
|
||||
// GSVector4 dr = c.xxxx();
|
||||
// GSVector4 db = c.zzzz();
|
||||
|
||||
THREEARG(shufps, xym2, xym0, xym0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
THREEARG(shufps, xym3, xym0, xym0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
||||
else
|
||||
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
cvttps2dq(xym0, xym0);
|
||||
packssdw(xym0, xym0);
|
||||
|
||||
// GSVector4i b = GSVector4i(db * m_shift[i]).ps32();
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
||||
else
|
||||
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
cvttps2dq(xym1, xym1);
|
||||
packssdw(xym1, xym1);
|
||||
|
||||
// m_local.d[i].rb = r.upl16(b);
|
||||
|
||||
punpcklwd(xym0, xym1);
|
||||
movdqa(_rip_local(d[i].rb), xym0);
|
||||
}
|
||||
|
||||
// GSVector4 c = dscan.c;
|
||||
|
||||
broadcastf128(xym0, ptr[_dscan + offsetof(GSVertexSW, c)]); // not enough regs, have to reload it
|
||||
|
||||
// GSVector4 dg = c.yyyy();
|
||||
// GSVector4 da = c.wwww();
|
||||
|
||||
THREEARG(shufps, xym2, xym0, xym0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
THREEARG(shufps, xym3, xym0, xym0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : dsize); i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
||||
else
|
||||
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
cvttps2dq(xym0, xym0);
|
||||
packssdw(xym0, xym0);
|
||||
|
||||
// GSVector4i a = GSVector4i(da * m_shift[i]).ps32();
|
||||
|
||||
if (i < 4 || many_regs)
|
||||
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
||||
else
|
||||
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
cvttps2dq(xym1, xym1);
|
||||
packssdw(xym1, xym1);
|
||||
|
||||
// m_local.d[i].ga = g.upl16(a);
|
||||
|
||||
punpcklwd(xym0, xym1);
|
||||
movdqa(_rip_local(d[i].ga), xym0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector4i c = GSVector4i(vertex[index[last].c);
|
||||
|
||||
int last = 0;
|
||||
|
||||
switch (m_sel.prim)
|
||||
{
|
||||
case GS_POINT_CLASS: last = 0; break;
|
||||
case GS_LINE_CLASS: last = 1; break;
|
||||
case GS_TRIANGLE_CLASS: last = 2; break;
|
||||
case GS_SPRITE_CLASS: last = 1; break;
|
||||
}
|
||||
|
||||
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
|
||||
{
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(uint32) * last]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
}
|
||||
|
||||
if (isXmm)
|
||||
{
|
||||
cvttps2dq(xmm0, ptr[rax + offsetof(GSVertexSW, c)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
vbroadcasti128(ymm0, ptr[rax + offsetof(GSVertexSW, c)]);
|
||||
cvttps2dq(ymm0, ymm0);
|
||||
}
|
||||
|
||||
// c = c.upl16(c.zwxy());
|
||||
|
||||
pshufd(xym1, xym0, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
punpcklwd(xym0, xym1);
|
||||
|
||||
// if(!tme) c = c.srl16(7);
|
||||
|
||||
if (m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
psrlw(xym0, 7);
|
||||
}
|
||||
|
||||
// m_local.c.rb = c.xxxx();
|
||||
// m_local.c.ga = c.zzzz();
|
||||
|
||||
pshufd(xym1, xym0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xym2, xym0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
movdqa(_rip_local(c.rb), xym1);
|
||||
movdqa(_rip_local(c.ga), xym2);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "GSNewCodeGenerator.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
#define SETUP_PRIM_VECTOR_REGISTER Xbyak::Ymm
|
||||
#define SETUP_PRIM_USING_XMM 0
|
||||
#define SETUP_PRIM_USING_YMM 1
|
||||
#else
|
||||
#define SETUP_PRIM_VECTOR_REGISTER Xbyak::Xmm
|
||||
#define SETUP_PRIM_USING_XMM 1
|
||||
#define SETUP_PRIM_USING_YMM 0
|
||||
#endif
|
||||
|
||||
class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
||||
{
|
||||
using _parent = GSNewCodeGenerator;
|
||||
using XYm = SETUP_PRIM_VECTOR_REGISTER;
|
||||
|
||||
using Xmm = Xbyak::Xmm;
|
||||
using Ymm = Xbyak::Ymm;
|
||||
|
||||
/// On x86-64 we reserve a bunch of GPRs for holding addresses of locals that would otherwise be hard to reach
|
||||
/// On x86-32 the same values are just raw 32-bit addresses
|
||||
using LocalAddr = Choose3264<size_t, AddressReg>::type;
|
||||
|
||||
constexpr static bool isXmm = std::is_same<XYm, Xbyak::Xmm>::value;
|
||||
constexpr static bool isYmm = std::is_same<XYm, Xbyak::Ymm>::value;
|
||||
constexpr static int vecsize = isXmm ? 16 : 32;
|
||||
|
||||
constexpr static int dsize = isXmm ? 4 : 8;
|
||||
|
||||
constexpr static int _32_args = 0;
|
||||
constexpr static int _invalid = 0xaaaaaaaa;
|
||||
constexpr static int _32_vertex = is64 ? _invalid : _32_args + 4;
|
||||
constexpr static int _32_index = is64 ? _invalid : _32_args + 8;
|
||||
constexpr static int _32_dscan = is64 ? _invalid : _32_args + 12;
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
bool m_rip;
|
||||
bool many_regs;
|
||||
|
||||
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
|
||||
|
||||
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
|
||||
const AddressReg _64_vertex, _index, _dscan, _64_t0, t1;
|
||||
const LocalAddr _m_local;
|
||||
/// Returns the first arg on 32-bit, second on 64-bit
|
||||
static LocalAddr chooseLocal(const void* addr32, AddressReg reg64)
|
||||
{
|
||||
return choose3264((size_t)addr32, reg64);
|
||||
}
|
||||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key);
|
||||
void Generate();
|
||||
|
||||
private:
|
||||
/// Broadcast 128 bits of floats from memory to the whole register, whatever size that register might be
|
||||
void broadcastf128(const XYm& reg, const Xbyak::Address& mem);
|
||||
|
||||
void Depth_XMM();
|
||||
void Depth_YMM();
|
||||
void Texture();
|
||||
void Color();
|
||||
};
|
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include "GSSetupPrimCodeGenerator.h"
|
||||
#include "GSSetupPrimCodeGenerator.all.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
|
@ -30,12 +31,5 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void
|
|||
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
|
||||
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
Generate_AVX2();
|
||||
#else
|
||||
if (m_cpu.has(util::Cpu::tAVX))
|
||||
Generate_AVX();
|
||||
else
|
||||
Generate_SSE();
|
||||
#endif
|
||||
GSSetupPrimCodeGenerator2(this, CPUInfo(m_cpu), param, key).Generate();
|
||||
}
|
||||
|
|
|
@ -491,6 +491,7 @@
|
|||
<ClCompile Include="GS\Window\GSSetting.cpp" />
|
||||
<ClCompile Include="GS\Window\GSSettingsDlg.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.x64.avx.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.x64.avx2.cpp" />
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.x64.cpp" />
|
||||
|
@ -855,6 +856,7 @@
|
|||
<ClInclude Include="GS\Window\GSSetting.h" />
|
||||
<ClInclude Include="GS\Window\GSSettingsDlg.h" />
|
||||
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.h" />
|
||||
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.h" />
|
||||
<ClInclude Include="GS\Renderers\OpenGL\GSShaderOGL.h" />
|
||||
<ClInclude Include="GS\GSState.h" />
|
||||
<ClInclude Include="GS\GSTables.h" />
|
||||
|
|
|
@ -1544,6 +1544,9 @@
|
|||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.x64.avx.cpp">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClCompile>
|
||||
|
@ -2646,6 +2649,9 @@
|
|||
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\SW\GSSetupPrimCodeGenerator.all.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\SW\GSTextureCacheSW.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Software</Filter>
|
||||
</ClInclude>
|
||||
|
|
Loading…
Reference in New Issue