GS/SW: Work around not being able to use non-constant offsetof

This commit is contained in:
Stenzek 2023-12-29 21:31:29 +10:00 committed by Connor McLaughlin
parent 0e15de7103
commit fc1304dff5
2 changed files with 15 additions and 14 deletions

View File

@ -17,7 +17,9 @@ using namespace Xbyak;
#define _rip_const(cptr) ptr[rip + ((char*)(cptr))] #define _rip_const(cptr) ptr[rip + ((char*)(cptr))]
#define _rip_local(field) ptr[_m_local + offsetof(GSScanlineLocalData, field)] #define _rip_local(field) ptr[_m_local + offsetof(GSScanlineLocalData, field)]
#define _rip_local_offset(field, offset) ptr[_m_local + offsetof(GSScanlineLocalData, field) + (offset)]
#define _rip_global(field) ptr[_m_local__gd + offsetof(GSScanlineGlobalData, field)] #define _rip_global(field) ptr[_m_local__gd + offsetof(GSScanlineGlobalData, field)]
#define _rip_global_offset(field, offset) ptr[_m_local__gd + offsetof(GSScanlineGlobalData, field) + (offset)]
/// On AVX, does a v-prefixed separate destination operation /// On AVX, does a v-prefixed separate destination operation
/// On SSE, moves src1 into dst using movdqa, then does the operation /// On SSE, moves src1 into dst using movdqa, then does the operation
@ -3146,7 +3148,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset) void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
{ {
AddressReg texIn = _m_local__gd__tex; AddressReg texIn = _m_local__gd__tex;
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.U32[lod]) : _rip_local(temp.lod.i.U32[lod]); Address lod_addr = m_sel.lcm ? _rip_global_offset(lod.i.U32[0], sizeof(u32) * lod) : _rip_local_offset(temp.lod.i.U32[0], sizeof(u32) * lod);
mov(ebx, lod_addr); mov(ebx, lod_addr);
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]); mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
} }

View File

@ -11,8 +11,7 @@ MULTI_ISA_UNSHARED_IMPL;
using namespace Xbyak; using namespace Xbyak;
#define _rip_local(field) (ptr[_m_local + offsetof(GSScanlineLocalData, field)]) #define _rip_local(field) (ptr[_m_local + offsetof(GSScanlineLocalData, field)])
#define _rip_local_di(i, field) (ptr[_m_local + offsetof(GSScanlineLocalData, d[0].field) + (sizeof(GSScanlineLocalData::skip) * (i))])
#define _64_m_local _64_t0
/// On AVX, does a v-prefixed separate destination operation /// On AVX, does a v-prefixed separate destination operation
/// On SSE, moves src1 into dst using movdqa, then does the operation /// On SSE, moves src1 into dst using movdqa, then does the operation
@ -171,7 +170,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
cvttps2dq(xmm2, xmm2); cvttps2dq(xmm2, xmm2);
pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshuflw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
movdqa(_rip_local(d[i].f), xmm2); movdqa(_rip_local_di(i, f), xmm2);
} }
} }
@ -194,7 +193,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
// m_local.d[i].z1 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 3])); // m_local.d[i].z1 = dz.mul64(VectorF::f32to64(half_shift[2 * i + 3]));
THREEARG(mulps, xmm1, xmm0, XYm(4 + i)); THREEARG(mulps, xmm1, xmm0, XYm(4 + i));
movdqa(_rip_local(d[i].z), xmm1); movdqa(_rip_local_di(i, z), xmm1);
} }
} }
} }
@ -258,7 +257,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
cvttps2dq(ymm0, ymm0); cvttps2dq(ymm0, ymm0);
pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
movdqa(_rip_local(d[i].f), ymm0); movdqa(_rip_local_di(i, f), ymm0);
} }
} }
@ -283,7 +282,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
vmulps(ymm1, Ymm(4 + i), ymm0); vmulps(ymm1, Ymm(4 + i), ymm0);
else else
vmulps(ymm1, ymm0, ptr[g_const.m_shift_256b[i + 1]]); vmulps(ymm1, ymm0, ptr[g_const.m_shift_256b[i + 1]]);
movaps(_rip_local(d[i].z), ymm1); movaps(_rip_local_di(i, z), ymm1);
} }
} }
} }
@ -367,8 +366,8 @@ void GSSetupPrimCodeGenerator2::Texture()
switch (j) switch (j)
{ {
case 0: movdqa(_rip_local(d[i].s), xym2); break; case 0: movdqa(_rip_local_di(i, s), xym2); break;
case 1: movdqa(_rip_local(d[i].t), xym2); break; case 1: movdqa(_rip_local_di(i, t), xym2); break;
} }
} }
else else
@ -377,9 +376,9 @@ void GSSetupPrimCodeGenerator2::Texture()
switch (j) switch (j)
{ {
case 0: movaps(_rip_local(d[i].s), xym2); break; case 0: movaps(_rip_local_di(i, s), xym2); break;
case 1: movaps(_rip_local(d[i].t), xym2); break; case 1: movaps(_rip_local_di(i, t), xym2); break;
case 2: movaps(_rip_local(d[i].q), xym2); break; case 2: movaps(_rip_local_di(i, q), xym2); break;
} }
} }
} }
@ -441,7 +440,7 @@ void GSSetupPrimCodeGenerator2::Color()
// m_local.d[i].rb = r.upl16(b); // m_local.d[i].rb = r.upl16(b);
punpcklwd(xym0, xym1); punpcklwd(xym0, xym1);
movdqa(_rip_local(d[i].rb), xym0); movdqa(_rip_local_di(i, rb), xym0);
} }
// GSVector4 c = dscan.c; // GSVector4 c = dscan.c;
@ -477,7 +476,7 @@ void GSSetupPrimCodeGenerator2::Color()
// m_local.d[i].ga = g.upl16(a); // m_local.d[i].ga = g.upl16(a);
punpcklwd(xym0, xym1); punpcklwd(xym0, xym1);
movdqa(_rip_local(d[i].ga), xym0); movdqa(_rip_local_di(i, ga), xym0);
} }
} }
else else