mirror of https://github.com/PCSX2/pcsx2.git
gsdx sw x64: use rip addressing on setup prim
byte code: 9017 => 8736 Save a register
This commit is contained in:
parent
923c297dfc
commit
7c06e87d59
|
@ -179,7 +179,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
|
|
||||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
GSVector8 z, zo;
|
GSVector8 z, zo;
|
||||||
GSVector8i f;
|
GSVector8i f;
|
||||||
|
@ -196,7 +196,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
GSVector8i uv_minmax[2];
|
GSVector8i uv_minmax[2];
|
||||||
GSVector8i trb, tga;
|
GSVector8i trb, tga;
|
||||||
GSVector8i test;
|
GSVector8i test;
|
||||||
} temp;
|
} temp;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
|
|
||||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
GSVector4 z, zo;
|
GSVector4 z, zo;
|
||||||
GSVector4i f;
|
GSVector4i f;
|
||||||
|
@ -224,7 +224,7 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
GSVector4i uv_minmax[2];
|
GSVector4i uv_minmax[2];
|
||||||
GSVector4i trb, tga;
|
GSVector4i trb, tga;
|
||||||
GSVector4i test;
|
GSVector4i test;
|
||||||
} temp;
|
} temp;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@ void GSSetupPrimCodeGenerator::InitVectors()
|
||||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: GSCodeGenerator(code, maxsize)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
|
, m_rip(false)
|
||||||
{
|
{
|
||||||
m_sel.key = key;
|
m_sel.key = key;
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
||||||
|
|
||||||
GSScanlineSelector m_sel;
|
GSScanlineSelector m_sel;
|
||||||
GSScanlineLocalData& m_local;
|
GSScanlineLocalData& m_local;
|
||||||
|
bool m_rip;
|
||||||
|
|
||||||
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
|
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,14 @@
|
||||||
|
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
|
#define _rip_local(field) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offsetof(GSScanlineLocalData, field)])
|
||||||
|
#define _rip_local_v(field, offset) (m_rip ? ptr[rip + &m_local.field] : ptr[t0 + offset])
|
||||||
|
|
||||||
void GSSetupPrimCodeGenerator::Generate_AVX()
|
void GSSetupPrimCodeGenerator::Generate_AVX()
|
||||||
{
|
{
|
||||||
|
// Technically we just need the delta < 2GB
|
||||||
|
m_rip = (size_t)&m_local < 0x80000000 && (size_t)getCurr() < 0x80000000;
|
||||||
|
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
sub(rsp, 8 + 2 * 16);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
|
@ -36,7 +42,8 @@ void GSSetupPrimCodeGenerator::Generate_AVX()
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mov(t0, (size_t)&m_local);
|
if (!m_rip)
|
||||||
|
mov(t0, (size_t)&m_local);
|
||||||
|
|
||||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||||
{
|
{
|
||||||
|
@ -89,7 +96,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
vcvttps2dq(xmm2, xmm2);
|
vcvttps2dq(xmm2, xmm2);
|
||||||
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
vpshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.f)], xmm2);
|
vmovdqa(_rip_local(d4.f), xmm2);
|
||||||
|
|
||||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||||
{
|
{
|
||||||
|
@ -101,7 +108,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
|
||||||
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0]));
|
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].f) + (i * sizeof(GSScanlineLocalData::d[0]));
|
||||||
vmovdqa(ptr[t0 + variableOffset], xmm2);
|
vmovdqa(_rip_local_v(d[i].f, variableOffset), xmm2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +121,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
// m_local.d4.z = dz * 4.0f;
|
// m_local.d4.z = dz * 4.0f;
|
||||||
|
|
||||||
vmulps(xmm1, xmm0, xmm3);
|
vmulps(xmm1, xmm0, xmm3);
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.z)], xmm1);
|
vmovdqa(_rip_local(d4.z), xmm1);
|
||||||
|
|
||||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||||
{
|
{
|
||||||
|
@ -123,7 +130,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
vmulps(xmm1, xmm0, Xmm(4 + i));
|
vmulps(xmm1, xmm0, Xmm(4 + i));
|
||||||
|
|
||||||
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0]));
|
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].z) + (i * sizeof(GSScanlineLocalData::d[0]));
|
||||||
vmovdqa(ptr[t0 + variableOffset], xmm1);
|
vmovdqa(_rip_local_v(d[i].z, variableOffset), xmm1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,7 +151,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
vcvttps2dq(xmm1, xmm0);
|
vcvttps2dq(xmm1, xmm0);
|
||||||
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.f)], xmm1);
|
vmovdqa(_rip_local(p.f), xmm1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_en.z)
|
if(m_en.z)
|
||||||
|
@ -153,7 +160,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX()
|
||||||
|
|
||||||
vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
vmovdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
||||||
vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, p.z)], xmm0);
|
vmovdqa(_rip_local(p.z), xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -177,13 +184,13 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
|
||||||
|
|
||||||
vcvttps2dq(xmm1, xmm1);
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
vmovdqa(_rip_local(d4.stq), xmm1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// m_local.d4.stq = t * 4.0f;
|
// m_local.d4.stq = t * 4.0f;
|
||||||
|
|
||||||
vmovaps(ptr[t0 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
vmovaps(_rip_local(d4.stq), xmm1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||||
|
@ -211,8 +218,8 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
|
||||||
|
|
||||||
switch(j)
|
switch(j)
|
||||||
{
|
{
|
||||||
case 0: vmovdqa(ptr[t0 + variableOffsetS], xmm2); break;
|
case 0: vmovdqa(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
|
||||||
case 1: vmovdqa(ptr[t0 + variableOffsetT], xmm2); break;
|
case 1: vmovdqa(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -225,9 +232,9 @@ void GSSetupPrimCodeGenerator::Texture_AVX()
|
||||||
|
|
||||||
switch(j)
|
switch(j)
|
||||||
{
|
{
|
||||||
case 0: vmovaps(ptr[t0 + variableOffsetS], xmm2); break;
|
case 0: vmovaps(_rip_local_v(d[i].s, variableOffsetS), xmm2); break;
|
||||||
case 1: vmovaps(ptr[t0 + variableOffsetT], xmm2); break;
|
case 1: vmovaps(_rip_local_v(d[i].t, variableOffsetT), xmm2); break;
|
||||||
case 2: vmovaps(ptr[t0 + variableOffsetQ], xmm2); break;
|
case 2: vmovaps(_rip_local_v(d[i].q, variableOffsetQ), xmm2); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -253,7 +260,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
|
||||||
vcvttps2dq(xmm1, xmm1);
|
vcvttps2dq(xmm1, xmm1);
|
||||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
vpshufd(xmm1, xmm1, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
vpackssdw(xmm1, xmm1);
|
vpackssdw(xmm1, xmm1);
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, d4.c)], xmm1);
|
vmovdqa(_rip_local(d4.c), xmm1);
|
||||||
|
|
||||||
// xmm3 is not needed anymore
|
// xmm3 is not needed anymore
|
||||||
|
|
||||||
|
@ -282,7 +289,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
|
||||||
vpunpcklwd(xmm0, xmm1);
|
vpunpcklwd(xmm0, xmm1);
|
||||||
|
|
||||||
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0]));
|
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].rb) + (i * sizeof(GSScanlineLocalData::d[0]));
|
||||||
vmovdqa(ptr[t0 + variableOffset], xmm0);
|
vmovdqa(_rip_local_v(d[i].rb, variableOffset), xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GSVector4 c = dscan.c;
|
// GSVector4 c = dscan.c;
|
||||||
|
@ -314,7 +321,7 @@ void GSSetupPrimCodeGenerator::Color_AVX()
|
||||||
vpunpcklwd(xmm0, xmm1);
|
vpunpcklwd(xmm0, xmm1);
|
||||||
|
|
||||||
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0]));
|
const size_t variableOffset = offsetof(GSScanlineLocalData, d[0].ga) + (i * sizeof(GSScanlineLocalData::d[0]));
|
||||||
vmovdqa(ptr[t0 + variableOffset], xmm0);
|
vmovdqa(_rip_local_v(d[i].ga, variableOffset), xmm0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -358,8 +365,8 @@ void GSSetupPrimCodeGenerator::Color_AVX()
|
||||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
vpshufd(xmm2, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||||
|
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.rb)], xmm1);
|
vmovdqa(_rip_local(c.rb), xmm1);
|
||||||
vmovdqa(ptr[t0 + offsetof(GSScanlineLocalData, c.ga)], xmm2);
|
vmovdqa(_rip_local(c.ga), xmm2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue