mirror of https://github.com/PCSX2/pcsx2.git
GS: Remove 32bit code from SW renderer
This commit is contained in:
parent
ccd86a242c
commit
f5afbfd4f5
|
@ -30,22 +30,8 @@ using namespace Xbyak;
|
|||
// If use_lod, m_local.gd->tex, else m_local.gd->tex[0]
|
||||
#define _64_m_local__gd__tex r14
|
||||
|
||||
#define _rip_local(field) ((is32 || m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
#define _rip_global(field) ((is32 || m_rip) ? ptr[rip + (char*)&m_local.gd->field] : ptr[_m_local__gd + OFFSETOF(GSScanlineGlobalData, field)])
|
||||
|
||||
/// Executes the given code only if targeting 32-bit
|
||||
#define ONLY32(code) if (is32) (code)
|
||||
|
||||
/// Executes the given code only if targeting 64-bit
|
||||
#define ONLY64(code) if (is64) (code)
|
||||
|
||||
/// Combines temporary with either dst64 on 64-bit or src32 on 32-bit
|
||||
/// Follow up with an ONLY32 save back to src32
|
||||
#define REG_64_MEM_32(operation, dst64, temporary, src32) \
|
||||
if (is32) \
|
||||
operation(temporary, src32); \
|
||||
else \
|
||||
operation(dst64, temporary)
|
||||
#define _rip_local(field) ((m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
#define _rip_global(field) ((m_rip) ? ptr[rip + (char*)&m_local.gd->field] : ptr[_m_local__gd + OFFSETOF(GSScanlineGlobalData, field)])
|
||||
|
||||
/// On AVX, does a v-prefixed separate destination operation
|
||||
/// On SSE, moves src1 into dst using movdqa, then does the operation
|
||||
|
@ -66,15 +52,8 @@ using namespace Xbyak;
|
|||
/// On x64, does a 3-operand move, on x86 uses a two-operand SSE-style
|
||||
#define MOVE_IF_64(operation, dst, src64, ...) \
|
||||
do \
|
||||
{ \
|
||||
if (is64) \
|
||||
{ \
|
||||
THREEARG(operation, dst, src64, __VA_ARGS__); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
operation(dst, __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define USING_XMM DRAW_SCANLINE_USING_XMM
|
||||
|
@ -106,20 +85,20 @@ GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator*
|
|||
, m_rip(false)
|
||||
#ifdef _WIN32
|
||||
, a0(rcx), a1(rdx)
|
||||
, a2(r8) , a3(is64 ? r9 : rbx)
|
||||
, a2(r8) , a3(r9)
|
||||
, t0(rdi), t1(rsi)
|
||||
, t2(is64 ? r8 : rbp), t3(r9)
|
||||
, t2(r8) , t3(r9)
|
||||
#else
|
||||
, a0(is64 ? rdi : rcx), a1(is64 ? rsi : rdx)
|
||||
, a2(is64 ? rdx : r8), a3(is64 ? rcx : rbx)
|
||||
, t0(is64 ? r8 : rdi), t1(is64 ? r9 : rsi)
|
||||
, t2(is64 ? rcx : rbp), t3(is64 ? rsi : r8)
|
||||
, a0(rdi), a1(rsi)
|
||||
, a2(rdx), a3(rcx)
|
||||
, t0(r8) , t1(r9)
|
||||
, t2(rcx), t3(rsi)
|
||||
#endif
|
||||
, _g_const(chooseLocal(&*g_const, _64_g_const))
|
||||
, _m_local(chooseLocal(&m_local, _64_m_local))
|
||||
, _m_local__gd(chooseLocal(m_local.gd, _64_m_local__gd))
|
||||
, _m_local__gd__vm(chooseLocal(m_local.gd->vm, _64_m_local__gd__vm))
|
||||
, _rb(xym5), _ga(xym6), _fm(xym3), _zm(xym4), _fd(xym2), _test(is64 ? xym15 : xym7)
|
||||
, _rb(xym5), _ga(xym6), _fm(xym3), _zm(xym4), _fd(xym2), _test(xym15)
|
||||
, _z(xym8), _f(xym9), _s(xym10), _t(xym11), _q(xym12), _f_rb(xym13), _f_ga(xym14)
|
||||
{
|
||||
m_sel.key = key;
|
||||
|
@ -132,7 +111,6 @@ GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator*
|
|||
|
||||
GSDrawScanlineCodeGenerator2::LocalAddr GSDrawScanlineCodeGenerator2::loadAddress(AddressReg reg, const void* addr)
|
||||
{
|
||||
if (is64)
|
||||
mov(reg, (size_t)addr);
|
||||
return choose3264((size_t)addr, reg);
|
||||
}
|
||||
|
@ -352,15 +330,6 @@ void GSDrawScanlineCodeGenerator2::Generate()
|
|||
m_rip &= (size_t)&m_local < 0x80000000;
|
||||
m_rip &= (size_t)&m_local.gd < 0x80000000;
|
||||
|
||||
if (is32)
|
||||
{
|
||||
push(rbx);
|
||||
push(rsi);
|
||||
push(rdi);
|
||||
push(rbp);
|
||||
}
|
||||
else
|
||||
{
|
||||
push(rbp);
|
||||
mov(rbp, rsp); // Stack traces look much nicer this way
|
||||
#ifdef _WIN32
|
||||
|
@ -396,7 +365,6 @@ void GSDrawScanlineCodeGenerator2::Generate()
|
|||
|
||||
if (need_clut)
|
||||
mov(_64_m_local__gd__clut, _rip_global(clut));
|
||||
}
|
||||
|
||||
Init();
|
||||
|
||||
|
@ -615,19 +583,6 @@ L("step");
|
|||
|
||||
L("exit");
|
||||
|
||||
|
||||
|
||||
if (is32)
|
||||
{
|
||||
pop(ebp);
|
||||
pop(edi);
|
||||
pop(esi);
|
||||
pop(ebx);
|
||||
|
||||
ret(8);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef _WIN32
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
|
@ -656,7 +611,6 @@ L("exit");
|
|||
vzeroupper();
|
||||
ret();
|
||||
}
|
||||
}
|
||||
|
||||
/// Inputs: a0=pixels, a1=left, a2[x64]=top, a3[x64]=v
|
||||
void GSDrawScanlineCodeGenerator2::Init()
|
||||
|
@ -683,7 +637,7 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
and(eax, a0.cvt32());
|
||||
if (isXmm)
|
||||
shl(eax, 4); // * sizeof(m_test[0])
|
||||
ONLY64(cdqe());
|
||||
cdqe();
|
||||
|
||||
if (isXmm)
|
||||
{
|
||||
|
@ -713,8 +667,6 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
// rbx = left
|
||||
// Free: rax, t0, t1
|
||||
|
||||
if (is64)
|
||||
{
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
mov(rax, _rip_global(fzbr));
|
||||
lea(t1, ptr[rax + a2 * 8]);
|
||||
|
@ -722,35 +674,13 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
||||
mov(rax, _rip_global(fzbc));
|
||||
lea(t0, ptr[rax + rbx * 2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
mov(t1, ptr[rsp + _top]);
|
||||
lea(t1, ptr[t1 * 8]);
|
||||
add(t1, ptr[&m_local.gd->fzbr]);
|
||||
|
||||
// GSVector2i* fza_offset = &m_local.gd->fzbc[left >> 2];
|
||||
lea(t0, ptr[rbx * 2]);
|
||||
add(t0, ptr[(size_t)&m_local.gd->fzbc]);
|
||||
}
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS && (m_sel.fwrite && m_sel.fge || m_sel.zb) || m_sel.fb && (m_sel.edge || m_sel.tfx != TFX_NONE || m_sel.iip))
|
||||
{
|
||||
// a1 = &m_local.d[skip] // note a1 was (skip << 4)
|
||||
|
||||
if (is64)
|
||||
{
|
||||
lea(rax, _rip_local(d));
|
||||
lea(a1, ptr[rax + a1 * 8]);
|
||||
}
|
||||
else
|
||||
{
|
||||
lea(a1, ptr[(size_t)m_local.d + a1 * 8]);
|
||||
// a3 starts on the stack in x86, we want it in a register
|
||||
mov(a3, ptr[rsp + _v]);
|
||||
}
|
||||
}
|
||||
|
||||
// a0 = steps (rcx | rdi)
|
||||
// a1 = skip (rdx | rsi)
|
||||
|
@ -760,8 +690,8 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
// t1 = fza_base (rsi | r9 )
|
||||
// Free: rax
|
||||
|
||||
const XYm& f = is64 ? _f : xym1;
|
||||
const XYm& z = is64 ? _z : xym0;
|
||||
const XYm& f = _f;
|
||||
const XYm& z = _z;
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
|
@ -777,9 +707,6 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
pshufhw(f, f, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(f, f, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
paddw(f, ptr[a1 + offsetof(GSScanlineLocalData::skip, f)]);
|
||||
|
||||
if (is32) // _f is shared on x86
|
||||
movdqa(ptr[&m_local.temp.f], f);
|
||||
}
|
||||
|
||||
if (m_sel.zb)
|
||||
|
@ -788,18 +715,8 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
{
|
||||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
shufps(z, z, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
if (is64)
|
||||
{
|
||||
addps(z, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
movaps(ptr[&m_local.temp.z], z);
|
||||
movaps(xym2, ptr[a1 + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
movaps(ptr[&m_local.temp.zo], xym2);
|
||||
addps(z, xym2);
|
||||
}
|
||||
}
|
||||
else
|
||||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
|
@ -812,7 +729,7 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
|
||||
if (m_sel.fwrite && m_sel.fge && is64)
|
||||
if (m_sel.fwrite && m_sel.fge)
|
||||
pbroadcastwLocal(_f, _rip_local(p.f));
|
||||
}
|
||||
|
||||
|
@ -840,8 +757,8 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
{
|
||||
// a1 = &m_local.d[skip]
|
||||
|
||||
const XYm& s = is64 ? _s : xym2;
|
||||
const XYm& t = is64 ? _t : xym3;
|
||||
const XYm& s = _s;
|
||||
const XYm& t = _t;
|
||||
|
||||
if (m_sel.fst)
|
||||
{
|
||||
|
@ -863,19 +780,16 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
}
|
||||
else if (m_sel.ltf)
|
||||
{
|
||||
XYm vf = is64 ? xym7 : xym6;
|
||||
XYm vf = xym7;
|
||||
pshuflw(vf, t, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(vf, vf, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(vf, 12);
|
||||
movdqa(_rip_local(temp.vf), vf);
|
||||
}
|
||||
|
||||
ONLY32(movdqa(_rip_local(temp.s), s));
|
||||
ONLY32(movdqa(_rip_local(temp.t), t));
|
||||
}
|
||||
else
|
||||
{
|
||||
const XYm& q = is64 ? _q : vt;
|
||||
const XYm& q = _q;
|
||||
|
||||
// s = vt.xxxx() + m_local.d[skip].s;
|
||||
// t = vt.yyyy() + m_local.d[skip].t;
|
||||
|
@ -891,7 +805,7 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
{
|
||||
movaps(s, vt);
|
||||
movaps(t, vt);
|
||||
ONLY64(movaps(q, vt));
|
||||
movaps(q, vt);
|
||||
|
||||
shufps(s, s, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(t, t, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
@ -901,20 +815,13 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
addps(s, ptr[a1 + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
addps(t, ptr[a1 + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
addps(q, ptr[a1 + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
|
||||
if (is32)
|
||||
{
|
||||
movaps(ptr[&m_local.temp.s], s);
|
||||
movaps(ptr[&m_local.temp.t], t);
|
||||
movaps(ptr[&m_local.temp.q], q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!(m_sel.tfx == TFX_DECAL && m_sel.tcc))
|
||||
{
|
||||
const XYm& f_rb = is64 ? _f_rb : xym5;
|
||||
const XYm& f_ga = is64 ? _f_ga : xym6;
|
||||
const XYm& f_rb = _f_rb;
|
||||
const XYm& f_ga = _f_ga;
|
||||
if (m_sel.iip)
|
||||
{
|
||||
// GSVector4i vc = GSVector4i(v.c);
|
||||
|
@ -942,23 +849,18 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
|
||||
paddw(f_rb, ptr[a1 + offsetof(GSScanlineLocalData::skip, rb)]);
|
||||
paddw(f_ga, ptr[a1 + offsetof(GSScanlineLocalData::skip, ga)]);
|
||||
|
||||
ONLY32(movdqa(ptr[&m_local.temp.rb], f_rb));
|
||||
ONLY32(movdqa(ptr[&m_local.temp.ga], f_ga));
|
||||
}
|
||||
else if (is64 || m_sel.tfx == TFX_NONE)
|
||||
else
|
||||
{
|
||||
movdqa(f_rb, _rip_local(c.rb));
|
||||
movdqa(f_ga, _rip_local(c.ga));
|
||||
}
|
||||
|
||||
ONLY64(movdqa(_rb, _f_rb));
|
||||
ONLY64(movdqa(_ga, _f_ga));
|
||||
movdqa(_rb, _f_rb);
|
||||
movdqa(_ga, _f_ga);
|
||||
}
|
||||
}
|
||||
|
||||
if (is64)
|
||||
{
|
||||
if (m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe)
|
||||
{
|
||||
// On linux, a2 is edx which will be used for fzm
|
||||
|
@ -975,7 +877,6 @@ void GSDrawScanlineCodeGenerator2::Init()
|
|||
mov(_64_m_local__gd__tex, _rip_global(tex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Inputs: a0=steps, t0=fza_offset
|
||||
/// Outputs[x86]: xym0=z xym2=s, xym3=t, xym4=q, xym5=rb, xym6=ga, xym7=test
|
||||
|
@ -991,8 +892,8 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
|
||||
add(t0, vecsize / 2);
|
||||
|
||||
const XYm& z = is64 ? _z : xym0;
|
||||
const XYm& f = is64 ? _f : xym1;
|
||||
const XYm& z =_z;
|
||||
const XYm& f =_f;
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
|
@ -1004,13 +905,6 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
{
|
||||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
else if (is32)
|
||||
{
|
||||
broadcastssLocal(z, _rip_local_d_p(z));
|
||||
addps(z, _rip_local(temp.zo));
|
||||
movaps(_rip_local(temp.zo), z);
|
||||
addps(z, _rip_local(temp.z));
|
||||
}
|
||||
else
|
||||
{
|
||||
BROADCAST_AND_OP(vbroadcastss, addps, z, xym0, _rip_local_d_p(z));
|
||||
|
@ -1020,26 +914,10 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
// f = f.add16(m_local.d4.f);
|
||||
|
||||
if (m_sel.fwrite && m_sel.fge)
|
||||
{
|
||||
if (is32)
|
||||
{
|
||||
pbroadcastwLocal(f, _rip_local_d_p(f));
|
||||
paddw(f, _rip_local(temp.f));
|
||||
movdqa(_rip_local(temp.f), f);
|
||||
}
|
||||
else
|
||||
{
|
||||
BROADCAST_AND_OP(vpbroadcastw, paddw, f, xym0, _rip_local_d_p(f));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is32 && m_sel.ztest)
|
||||
{
|
||||
pbroadcastdLocal(z, _rip_local(p.z));
|
||||
}
|
||||
}
|
||||
|
||||
if (m_sel.fb)
|
||||
{
|
||||
|
@ -1047,7 +925,7 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
{
|
||||
if (m_sel.fst)
|
||||
{
|
||||
const XYm& stq = is64 ? xym0 : xym4;
|
||||
const XYm& stq = xym0;
|
||||
// GSVector4i stq = m_local.d4.stq;
|
||||
|
||||
// s += stq.xxxx();
|
||||
|
@ -1055,28 +933,22 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
|
||||
broadcasti128(stq, _rip_local_d(stq));
|
||||
|
||||
XYm s = is64 ? xym1 : xym2;
|
||||
XYm s = xym1;
|
||||
pshufd(s, stq, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
REG_64_MEM_32(paddd, _s, s, _rip_local(temp.s));
|
||||
ONLY32(movdqa(_rip_local(temp.s), s));
|
||||
paddd(_s, s);
|
||||
|
||||
XYm t = is64 ? xym1 : xym3;
|
||||
XYm t = xym1;
|
||||
if (m_sel.prim != GS_SPRITE_CLASS || m_sel.mmin)
|
||||
{
|
||||
pshufd(t, stq, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
REG_64_MEM_32(paddd, _t, t, _rip_local(temp.t));
|
||||
ONLY32(movdqa(_rip_local(temp.t), t));
|
||||
}
|
||||
else
|
||||
{
|
||||
ONLY32(movdqa(t, _rip_local(temp.t)));
|
||||
paddd(_t, t);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const XYm& s = xym2;
|
||||
const XYm& t = xym3;
|
||||
const XYm& q = is64 ? xym1 : xym4;
|
||||
const XYm& q = xym1;
|
||||
// GSVector4 stq = m_local.d4.stq;
|
||||
|
||||
// s += stq.xxxx();
|
||||
|
@ -1102,13 +974,9 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
shufps(q, q, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
REG_64_MEM_32(addps, _s, s, _rip_local(temp.s));
|
||||
REG_64_MEM_32(addps, _t, t, _rip_local(temp.t));
|
||||
REG_64_MEM_32(addps, _q, q, _rip_local(temp.q));
|
||||
|
||||
ONLY32(movaps(_rip_local(temp.s), s));
|
||||
ONLY32(movaps(_rip_local(temp.t), t));
|
||||
ONLY32(movaps(_rip_local(temp.q), q));
|
||||
addps(_s, s);
|
||||
addps(_t, t);
|
||||
addps(_q, q);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1116,7 +984,7 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
{
|
||||
if (m_sel.iip)
|
||||
{
|
||||
XYm c = is64 ? xym0 : xym7;
|
||||
XYm c = xym0;
|
||||
// GSVector4i c = m_local.d4.c;
|
||||
|
||||
// rb = rb.add16(c.xxxx());
|
||||
|
@ -1127,29 +995,18 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
pshufd(_rb, c, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(_ga, c, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
REG_64_MEM_32(paddw, _f_rb, _rb, _rip_local(temp.rb));
|
||||
REG_64_MEM_32(paddw, _f_ga, _ga, _rip_local(temp.ga));
|
||||
paddw(_f_rb, _rb);
|
||||
paddw(_f_ga, _ga);
|
||||
|
||||
// FIXME: color may underflow and roll over at the end of the line, if decreasing
|
||||
|
||||
pxor(c, c);
|
||||
pmaxsw(is64 ? _f_rb : _rb, c);
|
||||
pmaxsw(is64 ? _f_ga : _ga, c);
|
||||
|
||||
ONLY32(movdqa(_rip_local(temp.rb), _rb));
|
||||
ONLY32(movdqa(_rip_local(temp.ga), _ga));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
ONLY32(movdqa(_rb, ptr[&m_local.c.rb]));
|
||||
ONLY32(movdqa(_ga, ptr[&m_local.c.ga]));
|
||||
}
|
||||
pmaxsw(_f_rb, c);
|
||||
pmaxsw(_f_ga, c);
|
||||
}
|
||||
|
||||
ONLY64(movdqa(_rb, _f_rb));
|
||||
ONLY64(movdqa(_ga, _f_ga));
|
||||
movdqa(_rb, _f_rb);
|
||||
movdqa(_ga, _f_ga);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1162,7 +1019,7 @@ void GSDrawScanlineCodeGenerator2::Step()
|
|||
and(eax, a0.cvt32());
|
||||
if (isXmm)
|
||||
shl(eax, 4);
|
||||
ONLY64(cdqe());
|
||||
cdqe();
|
||||
|
||||
#if USING_XMM
|
||||
movdqa(_test, ptr[rax + _g_const + offsetof(GSScanlineConstantData, m_test_128b[7])]);
|
||||
|
@ -1182,7 +1039,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
|||
return;
|
||||
}
|
||||
|
||||
const XYm& z = is64 ? _z : xym0;
|
||||
const XYm& z = _z;
|
||||
|
||||
// int za = fza_base.y + fza_offset->y;
|
||||
|
||||
|
@ -1196,7 +1053,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
|||
{
|
||||
if (m_sel.zequal)
|
||||
{
|
||||
ONLY64(movdqa(xym0, _z));
|
||||
movdqa(xym0, _z);
|
||||
}
|
||||
else if (m_sel.zoverflow)
|
||||
{
|
||||
|
@ -1247,7 +1104,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
|||
}
|
||||
else
|
||||
{
|
||||
ONLY64(movdqa(xym0, _z));
|
||||
movdqa(xym0, _z);
|
||||
}
|
||||
|
||||
if (m_sel.ztest)
|
||||
|
@ -1312,22 +1169,11 @@ void GSDrawScanlineCodeGenerator2::SampleTexture()
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
if (is32)
|
||||
{
|
||||
mov(ebx, ptr[&m_local.gd->tex[0]]);
|
||||
|
||||
if (m_sel.tlu)
|
||||
{
|
||||
mov(edx, ptr[&m_local.gd->clut]);
|
||||
}
|
||||
}
|
||||
|
||||
const bool needsMoreRegs = isYmm;
|
||||
|
||||
if (!m_sel.fst)
|
||||
{
|
||||
rcpps(xym0, is64 ? _q : xym4);
|
||||
rcpps(xym0, _q);
|
||||
|
||||
MOVE_IF_64(mulps, xym2, _s, xym0);
|
||||
MOVE_IF_64(mulps, xym3, _t, xym0);
|
||||
|
@ -1349,21 +1195,19 @@ void GSDrawScanlineCodeGenerator2::SampleTexture()
|
|||
}
|
||||
else
|
||||
{
|
||||
ONLY64(movdqa(xym2, _s));
|
||||
ONLY64(movdqa(xym3, _t));
|
||||
movdqa(xym2, _s);
|
||||
movdqa(xym3, _t);
|
||||
}
|
||||
|
||||
if (m_sel.ltf)
|
||||
{
|
||||
const XYm& vf = is64 ? xym7 : xym0;
|
||||
const XYm& vf = xym7;
|
||||
|
||||
// GSVector4i uf = u.xxzzlh().srl16(12);
|
||||
|
||||
pshuflw(xym4, xym2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xym4, xym4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xym4, 12);
|
||||
if (is32 && needsMoreRegs)
|
||||
movdqa(_rip_local(temp.uf), xym4);
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
|
@ -1372,10 +1216,10 @@ void GSDrawScanlineCodeGenerator2::SampleTexture()
|
|||
pshuflw(vf, xym3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(vf, vf, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(vf, 12);
|
||||
if (is32 || needsMoreRegs)
|
||||
if (needsMoreRegs)
|
||||
movdqa(_rip_local(temp.vf), vf);
|
||||
}
|
||||
else if (is64 && !needsMoreRegs)
|
||||
else if (!needsMoreRegs)
|
||||
{
|
||||
movdqa(vf, _rip_local(temp.vf));
|
||||
}
|
||||
|
@ -1484,8 +1328,8 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
|||
// c10 = addr10.gather32_32((const u32/u8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const u32/u8*)tex[, clut]);
|
||||
|
||||
const XYm& tmp1 = is64 ? xym7 : xym4; // OK to destroy if needsMoreRegs
|
||||
const XYm& tmp2 = is64 ? xym4 : xym7;
|
||||
const XYm& tmp1 = xym7; // OK to destroy if needsMoreRegs
|
||||
const XYm& tmp2 = xym4;
|
||||
// d0 d1 d2s0 d3s1 s2 s3
|
||||
ReadTexel4(xym5, xym6, xym0, xym2, xym1, xym3, tmp1, tmp2, mip_offset);
|
||||
|
||||
|
@ -1496,9 +1340,6 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
|||
// xym6 = c00
|
||||
// xym7 = used[x86] vf[x64&&!needsMoreRegs]
|
||||
|
||||
if (is32 && needsMoreRegs)
|
||||
movdqa(xym4, _rip_local(temp.uf));
|
||||
|
||||
// GSVector4i rb00 = c00 & mask;
|
||||
// GSVector4i ga00 = (c00 >> 8) & mask;
|
||||
|
||||
|
@ -1565,8 +1406,8 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
|||
// rb00 = rb00.lerp16_4(rb10, vf);
|
||||
// ga00 = ga00.lerp16_4(ga10, vf);
|
||||
|
||||
XYm vf = is64 ? xym7 : xym2;
|
||||
if (needsMoreRegs || is32)
|
||||
XYm vf = xym7;
|
||||
if (needsMoreRegs)
|
||||
movdqa(vf, _rip_local(temp.vf));
|
||||
|
||||
lerp16_4(xym5, xym0, vf);
|
||||
|
@ -1729,21 +1570,8 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
return;
|
||||
}
|
||||
|
||||
if (is32)
|
||||
{
|
||||
push(t2);
|
||||
|
||||
mov(t2, (size_t)m_local.gd->tex);
|
||||
|
||||
if (m_sel.tlu)
|
||||
{
|
||||
mov(edx, ptr[&m_local.gd->clut]);
|
||||
}
|
||||
}
|
||||
|
||||
const bool needsMoreRegs = isYmm;
|
||||
|
||||
if (is64)
|
||||
movdqa(xym4, _q);
|
||||
|
||||
if (!m_sel.fst)
|
||||
|
@ -1758,8 +1586,8 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
}
|
||||
else
|
||||
{
|
||||
ONLY64(movdqa(xym2, _s));
|
||||
ONLY64(movdqa(xym3, _t));
|
||||
movdqa(xym2, _s);
|
||||
movdqa(xym3, _t);
|
||||
}
|
||||
|
||||
// xym2 = u
|
||||
|
@ -1974,7 +1802,7 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
|
||||
if (m_sel.ltf)
|
||||
{
|
||||
const XYm& vf = is64 ? xym7 : xym0;
|
||||
const XYm& vf = xym7;
|
||||
// u -= 0x8000;
|
||||
// v -= 0x8000;
|
||||
|
||||
|
@ -1989,15 +1817,13 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
pshuflw(xym4, xym2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xym4, xym4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xym4, 12);
|
||||
if (is32 && needsMoreRegs)
|
||||
movdqa(_rip_local(temp.uf), xym4);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(vf, xym3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(vf, vf, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(vf, 12);
|
||||
if (is32 || needsMoreRegs)
|
||||
if (needsMoreRegs)
|
||||
movdqa(_rip_local(temp.vf), vf);
|
||||
}
|
||||
|
||||
|
@ -2058,7 +1884,7 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
|
||||
if (m_sel.ltf)
|
||||
{
|
||||
const XYm& vf = is64 ? xym7 : xym0;
|
||||
const XYm& vf = xym7;
|
||||
// u -= 0x8000;
|
||||
// v -= 0x8000;
|
||||
|
||||
|
@ -2073,15 +1899,13 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
pshuflw(xym4, xym2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xym4, xym4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xym4, 12);
|
||||
if (is32 && needsMoreRegs)
|
||||
movdqa(_rip_local(temp.uf), xym4);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(vf, xym3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(vf, vf, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(vf, 12);
|
||||
if (is32 || needsMoreRegs)
|
||||
if (needsMoreRegs)
|
||||
movdqa(_rip_local(temp.vf), vf);
|
||||
}
|
||||
|
||||
|
@ -2131,9 +1955,6 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
|||
lerp16(xym5, xym2, xym0, 0);
|
||||
lerp16(xym6, xym3, xym0, 0);
|
||||
}
|
||||
|
||||
if (is32)
|
||||
pop(t2);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator2::WrapLOD(const XYm& uv)
|
||||
|
@ -2264,9 +2085,9 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX()
|
|||
return;
|
||||
}
|
||||
|
||||
const XYm& f_ga = is64 ? _f_ga : xym4;
|
||||
const XYm& tmpga = is64 ? xym1 : f_ga;
|
||||
const XYm& tmp = is64 ? xym0 : xym3;
|
||||
const XYm& f_ga = _f_ga;
|
||||
const XYm& tmpga = xym1;
|
||||
const XYm& tmp = xym0;
|
||||
Address _32_gaptr = m_sel.iip ? _rip_local(temp.ga) : _rip_local(c.ga);
|
||||
|
||||
switch (m_sel.tfx)
|
||||
|
@ -2274,9 +2095,6 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX()
|
|||
case TFX_MODULATE:
|
||||
|
||||
// GSVector4i ga = iip ? gaf : m_local.c.ga;
|
||||
|
||||
ONLY32(movdqa(f_ga, _32_gaptr));
|
||||
|
||||
// gat = gat.modulate16<1>(ga).clamp8();
|
||||
|
||||
modulate16(_ga, f_ga, 1);
|
||||
|
@ -2301,8 +2119,6 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX()
|
|||
{
|
||||
// GSVector4i ga = iip ? gaf : m_local.c.ga;
|
||||
|
||||
ONLY32(movdqa(f_ga, _32_gaptr));
|
||||
|
||||
MOVE_IF_64(psrlw, tmpga, f_ga, 7);
|
||||
|
||||
mix16(_ga, tmpga, tmp);
|
||||
|
@ -2313,10 +2129,6 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX()
|
|||
case TFX_HIGHLIGHT:
|
||||
|
||||
// GSVector4i ga = iip ? gaf : m_local.c.ga;
|
||||
|
||||
ONLY32(movdqa(f_ga, _32_gaptr));
|
||||
ONLY32(movdqa(xym2, f_ga)); // WHY
|
||||
|
||||
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
|
||||
|
||||
MOVE_IF_64(psrlw, tmpga, f_ga, 7);
|
||||
|
@ -2338,9 +2150,6 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX()
|
|||
{
|
||||
// GSVector4i ga = iip ? gaf : m_local.c.ga;
|
||||
|
||||
ONLY32(movdqa(f_ga, _32_gaptr));
|
||||
ONLY32(movdqa(xym2, f_ga));
|
||||
|
||||
MOVE_IF_64(psrlw, tmpga, f_ga, 7);
|
||||
|
||||
mix16(_ga, tmpga, tmp);
|
||||
|
@ -2505,16 +2314,13 @@ void GSDrawScanlineCodeGenerator2::ColorTFX()
|
|||
return;
|
||||
}
|
||||
|
||||
const XYm& f_ga = is64 ? _f_ga : xym2;
|
||||
const XYm& tmpga = is64 ? xym2 : f_ga;
|
||||
const XYm& f_ga = _f_ga;
|
||||
const XYm& tmpga = xym2;
|
||||
|
||||
auto modulate16_1_rb = [&]
|
||||
{
|
||||
// GSVector4i rb = iip ? rbf : m_local.c.rb;
|
||||
if (is64)
|
||||
modulate16(_rb, _f_rb, 1);
|
||||
else
|
||||
modulate16(_rb, m_sel.iip ? _rip_local(temp.rb) : _rip_local(c.rb), 1);
|
||||
};
|
||||
|
||||
switch (m_sel.tfx)
|
||||
|
@ -2537,14 +2343,6 @@ void GSDrawScanlineCodeGenerator2::ColorTFX()
|
|||
|
||||
case TFX_HIGHLIGHT:
|
||||
case TFX_HIGHLIGHT2:
|
||||
|
||||
if (m_sel.tfx == TFX_HIGHLIGHT2 && m_sel.tcc)
|
||||
{
|
||||
// GSVector4i ga = iip ? gaf : m_local.c.ga;
|
||||
|
||||
ONLY32(movdqa(f_ga, m_sel.iip ? _rip_local(temp.ga) : _rip_local(c.ga)));
|
||||
}
|
||||
|
||||
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
||||
|
||||
movdqa(xym1, _ga);
|
||||
|
@ -2593,21 +2391,12 @@ void GSDrawScanlineCodeGenerator2::Fog()
|
|||
return;
|
||||
}
|
||||
|
||||
const XYm& f = is64 ? _f : xym0;
|
||||
const XYm& tmp = is64 ? xym0 : xym2;
|
||||
const XYm& f = _f;
|
||||
const XYm& tmp = xym0;
|
||||
|
||||
// rb = m_local.gd->frb.lerp16<0>(rb, f);
|
||||
// ga = m_local.gd->fga.lerp16<0>(ga, f).mix16(ga);
|
||||
|
||||
if (m_sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
ONLY32(movdqa(f, _rip_local(temp.f)));
|
||||
}
|
||||
else
|
||||
{
|
||||
ONLY32(pbroadcastwLocal(f, _rip_local(p.f)));
|
||||
}
|
||||
|
||||
movdqa(xym1, _ga);
|
||||
|
||||
pbroadcastdLocal(tmp, _rip_global(frb));
|
||||
|
@ -3060,7 +2849,7 @@ void GSDrawScanlineCodeGenerator2::WriteFrame()
|
|||
}
|
||||
|
||||
|
||||
const XYm& tmp = is64 ? xym15 : xym7;
|
||||
const XYm& tmp = xym15;
|
||||
|
||||
if (m_sel.fpsm == 2 && m_sel.dthe)
|
||||
{
|
||||
|
@ -3379,7 +3168,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(
|
|||
|
||||
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
|
||||
{
|
||||
AddressReg texIn = is64 ? _64_m_local__gd__tex : t2;
|
||||
AddressReg texIn = _64_m_local__gd__tex;
|
||||
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.U32[lod]) : _rip_local(temp.lod.i.U32[lod]);
|
||||
mov(ebx, lod_addr);
|
||||
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
|
||||
|
@ -3397,7 +3186,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplYmm(
|
|||
const Ymm t1[] = { d1, d2s0, d3s1, s2 };
|
||||
const Ymm t2[] = { tmp, tmp, tmp, tmp };
|
||||
|
||||
bool texInRBX = is32;
|
||||
bool texInRBX = false;
|
||||
if (use_lod && m_sel.lcm)
|
||||
{
|
||||
ReadTexelImplLoadTexLOD(0, mip_offset);
|
||||
|
@ -3489,7 +3278,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplSSE4(
|
|||
else
|
||||
{
|
||||
bool preserve = false;
|
||||
bool texInRBX = is32;
|
||||
bool texInRBX = false;
|
||||
|
||||
if (use_lod && m_sel.lcm)
|
||||
{
|
||||
|
@ -3511,7 +3300,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(const Xmm& dst, const Xmm& addr
|
|||
{
|
||||
ASSERT(i < 4);
|
||||
|
||||
AddressReg clut = is64 ? _64_m_local__gd__clut : rdx;
|
||||
AddressReg clut = _64_m_local__gd__clut;
|
||||
AddressReg tex = texInRBX ? rbx : _64_m_local__gd__tex;
|
||||
Address src = m_sel.tlu ? ptr[clut + rax * 4] : ptr[tex + rax * 4];
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
|||
|
||||
constexpr static bool isXmm = std::is_same<XYm, Xbyak::Xmm>::value;
|
||||
constexpr static bool isYmm = std::is_same<XYm, Xbyak::Ymm>::value;
|
||||
constexpr static int wordsize = is64 ? 8 : 4;
|
||||
constexpr static int wordsize = 8;
|
||||
constexpr static int vecsize = isXmm ? 16 : 32;
|
||||
constexpr static int vecsizelog = isXmm ? 4 : 5;
|
||||
constexpr static int vecints = vecsize / 4;
|
||||
|
@ -67,8 +67,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
|||
constexpr static int _64_rz_r15 = -8 * 5;
|
||||
constexpr static int _64_top = -8 * 6;
|
||||
#endif
|
||||
constexpr static int _top = is64 ? _64_top : _32_args + 4;
|
||||
constexpr static int _v = is64 ? _invalid : _32_args + 8;
|
||||
constexpr static int _top = _64_top;
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
|
|
@ -100,30 +100,6 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
/// Make sure the register is okay to use
|
||||
void validateRegister(const Operand& op)
|
||||
{
|
||||
if (is64)
|
||||
return;
|
||||
if (op.isREG() && (op.isExtIdx() || op.isExt8bit()))
|
||||
throw Error(Error::ERR_64_BIT_REG_IN_32);
|
||||
if (op.isMEM())
|
||||
{
|
||||
auto e = static_cast<const Address&>(op).getRegExp();
|
||||
validateRegister(e.getIndex());
|
||||
validateRegister(e.getBase());
|
||||
}
|
||||
}
|
||||
/// For easier macro-ing
|
||||
void validateRegister(int imm)
|
||||
{
|
||||
}
|
||||
|
||||
void require64()
|
||||
{
|
||||
if (!is64)
|
||||
throw Error(Error::ERR_64_INSTR_IN_32);
|
||||
}
|
||||
void requireAVX()
|
||||
{
|
||||
if (!hasAVX)
|
||||
|
@ -133,9 +109,6 @@ private:
|
|||
public:
|
||||
Xbyak::CodeGenerator& actual;
|
||||
|
||||
#if defined(_M_X86_64)
|
||||
constexpr static bool is32 = false;
|
||||
constexpr static bool is64 = true;
|
||||
using AddressReg = Xbyak::Reg64;
|
||||
using RipType = Xbyak::RegRip;
|
||||
|
||||
|
@ -144,18 +117,6 @@ public:
|
|||
|
||||
template <typename T32, typename T64>
|
||||
static T64 choose3264(T32 t32, T64 t64) { return t64; }
|
||||
#else
|
||||
constexpr static bool is32 = true;
|
||||
constexpr static bool is64 = false;
|
||||
using AddressReg = Xbyak::Reg32;
|
||||
using RipType = int;
|
||||
|
||||
template <typename T32, typename T64>
|
||||
struct Choose3264 { using type = T32; };
|
||||
|
||||
template <typename T32, typename T64>
|
||||
static T32 choose3264(T32 t32, T64 t64) { return t32; }
|
||||
#endif
|
||||
|
||||
const bool hasAVX, hasAVX2, hasFMA;
|
||||
|
||||
|
@ -238,34 +199,24 @@ public:
|
|||
#define FORWARD1(category, name, type) \
|
||||
void name(type a) \
|
||||
{ \
|
||||
validateRegister(a); \
|
||||
ACTUAL_FORWARD_##category(name, a) \
|
||||
}
|
||||
|
||||
#define FORWARD2(category, name, type1, type2) \
|
||||
void name(type1 a, type2 b) \
|
||||
{ \
|
||||
validateRegister(a); \
|
||||
validateRegister(b); \
|
||||
ACTUAL_FORWARD_##category(name, a, b) \
|
||||
}
|
||||
|
||||
#define FORWARD3(category, name, type1, type2, type3) \
|
||||
void name(type1 a, type2 b, type3 c) \
|
||||
{ \
|
||||
validateRegister(a); \
|
||||
validateRegister(b); \
|
||||
validateRegister(c); \
|
||||
ACTUAL_FORWARD_##category(name, a, b, c) \
|
||||
}
|
||||
|
||||
#define FORWARD4(category, name, type1, type2, type3, type4) \
|
||||
void name(type1 a, type2 b, type3 c, type4 d) \
|
||||
{ \
|
||||
validateRegister(a); \
|
||||
validateRegister(b); \
|
||||
validateRegister(c); \
|
||||
validateRegister(d); \
|
||||
ACTUAL_FORWARD_##category(name, a, b, c, d) \
|
||||
}
|
||||
|
||||
|
@ -282,8 +233,6 @@ public:
|
|||
#define FORWARD_SSE_XMM0(name) \
|
||||
void name(const Xmm& a, const Operand& b) \
|
||||
{ \
|
||||
validateRegister(a); \
|
||||
validateRegister(b); \
|
||||
if (hasAVX) \
|
||||
actual.v##name(a, b, Xmm(0)); \
|
||||
else \
|
||||
|
@ -326,19 +275,12 @@ public:
|
|||
#define ARGS_XOI const Xmm&, const Operand&, u8
|
||||
#define ARGS_XXO const Xmm&, const Xmm&, const Operand&
|
||||
|
||||
// For instructions that are ifdef'd out without XBYAK64
|
||||
#ifdef XBYAK64
|
||||
#define REQUIRE64(action) require64(); action
|
||||
#else
|
||||
#define REQUIRE64(action) require64()
|
||||
#endif
|
||||
|
||||
const u8 *getCurr() { return actual.getCurr(); }
|
||||
void align(int x = 16) { return actual.align(x); }
|
||||
void db(int code) { actual.db(code); }
|
||||
void L(const std::string& label) { actual.L(label); }
|
||||
|
||||
void cdqe() { REQUIRE64(actual.cdqe()); }
|
||||
void cdqe() { actual.cdqe(); }
|
||||
void ret(int imm = 0) { actual.ret(imm); }
|
||||
void vzeroupper() { requireAVX(); actual.vzeroupper(); }
|
||||
void vzeroall() { requireAVX(); actual.vzeroall(); }
|
||||
|
@ -458,7 +400,6 @@ public:
|
|||
FORWARD(3, AVX2, vpsravd, ARGS_XXO)
|
||||
FORWARD(3, AVX2, vpsrlvd, ARGS_XXO)
|
||||
|
||||
#undef REQUIRE64
|
||||
#undef ARGS_OI
|
||||
#undef ARGS_OO
|
||||
#undef ARGS_XI
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
#define _rip_local(field) ((is32 || m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
#define _rip_local(field) ((m_rip) ? ptr[rip + (char*)&m_local.field] : ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
||||
|
||||
#define _64_m_local _64_t0
|
||||
|
||||
|
@ -53,15 +53,15 @@ GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base,
|
|||
, m_rip(false), many_regs(false)
|
||||
// On x86 arg registers are very temporary but on x64 they aren't, so on x86 some registers overlap
|
||||
#ifdef _WIN32
|
||||
, _64_vertex(is64 ? rcx : r8)
|
||||
, _index(is64 ? rdx : rcx)
|
||||
, _dscan(is64 ? r8 : rdx)
|
||||
, _64_t0(r9), t1(is64 ? r10 : rcx)
|
||||
, _64_vertex(rcx)
|
||||
, _index(rdx)
|
||||
, _dscan(r8)
|
||||
, _64_t0(r9), t1(r10)
|
||||
#else
|
||||
, _64_vertex(is64 ? rdi : r8)
|
||||
, _index(is64 ? rsi : rcx)
|
||||
, _64_vertex(rdi)
|
||||
, _index(rsi)
|
||||
, _dscan(rdx)
|
||||
, _64_t0(is64 ? rcx : r8), t1(is64 ? r8 : rcx)
|
||||
, _64_t0(rcx), t1(r8)
|
||||
#endif
|
||||
, _m_local(chooseLocal(&m_local, _64_m_local))
|
||||
{
|
||||
|
@ -88,7 +88,7 @@ void GSSetupPrimCodeGenerator2::Generate()
|
|||
m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000;
|
||||
|
||||
bool needs_shift = (m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip;
|
||||
many_regs = is64 && isYmm && !m_sel.notest && needs_shift;
|
||||
many_regs = isYmm && !m_sel.notest && needs_shift;
|
||||
|
||||
#ifdef _WIN64
|
||||
int needs_saving = many_regs ? 6 : m_sel.notest ? 0 : 2;
|
||||
|
@ -102,13 +102,11 @@ void GSSetupPrimCodeGenerator2::Generate()
|
|||
}
|
||||
#endif
|
||||
|
||||
if (is64 && !m_rip)
|
||||
if (!m_rip)
|
||||
mov(_64_m_local, (size_t)&m_local);
|
||||
|
||||
if (needs_shift)
|
||||
{
|
||||
if (is32)
|
||||
mov(_dscan, ptr[rsp + _32_dscan]);
|
||||
|
||||
if (isXmm)
|
||||
mov(rax, (size_t)g_const->m_shift_128b);
|
||||
|
@ -193,14 +191,9 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
|||
if (m_sel.prim != GS_POINT_CLASS)
|
||||
offset = sizeof(u32) * 1;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + offset]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
@ -231,14 +224,9 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
|||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
|
@ -285,14 +273,9 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
|||
if (m_sel.prim != GS_POINT_CLASS)
|
||||
offset = sizeof(u32) * 1;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + offset]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]);
|
||||
mov(_rip_local(p.z), t1.cvt32());
|
||||
|
@ -354,14 +337,9 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
|||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
|
||||
if (m_en.f)
|
||||
{
|
||||
|
@ -564,14 +542,9 @@ void GSSetupPrimCodeGenerator2::Color()
|
|||
|
||||
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
|
||||
{
|
||||
if (is32)
|
||||
mov(_index, ptr[rsp + _32_index]);
|
||||
mov(eax, ptr[_index + sizeof(u32) * last]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
if (is64)
|
||||
add(rax, _64_vertex);
|
||||
else
|
||||
add(rax, ptr[rsp + _32_vertex]);
|
||||
}
|
||||
|
||||
if (isXmm)
|
||||
|
|
|
@ -46,12 +46,6 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
|||
|
||||
constexpr static int dsize = isXmm ? 4 : 8;
|
||||
|
||||
constexpr static int _32_args = 0;
|
||||
constexpr static int _invalid = 0xaaaaaaaa;
|
||||
constexpr static int _32_vertex = is64 ? _invalid : _32_args + 4;
|
||||
constexpr static int _32_index = is64 ? _invalid : _32_args + 8;
|
||||
constexpr static int _32_dscan = is64 ? _invalid : _32_args + 12;
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
bool m_rip;
|
||||
|
|
Loading…
Reference in New Issue