mirror of https://github.com/PCSX2/pcsx2.git
GS/SW: Make local a function parameter to DrawScanline
This commit is contained in:
parent
3292121e67
commit
ee4eadf7a6
|
@ -114,8 +114,9 @@ typedef GSVector4 VectorF;
|
||||||
#define LOCAL_STEP local.d4
|
#define LOCAL_STEP local.d4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local, const GSScanlineGlobalData& global)
|
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
|
const GSScanlineGlobalData& global = *local.gd;
|
||||||
GSScanlineSelector sel = global.sel;
|
GSScanlineSelector sel = global.sel;
|
||||||
|
|
||||||
bool has_z = sel.zb != 0;
|
bool has_z = sel.zb != 0;
|
||||||
|
@ -297,8 +298,10 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local, const GSScanlineGlobalData& global)
|
void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
|
const GSScanlineGlobalData& global = *local.gd;
|
||||||
|
|
||||||
GSScanlineSelector sel = global.sel;
|
GSScanlineSelector sel = global.sel;
|
||||||
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
||||||
|
|
||||||
|
@ -1575,13 +1578,13 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
CSetupPrim(vertex, index, dscan, local, *local.gd);
|
CSetupPrim(vertex, index, dscan, local);
|
||||||
}
|
}
|
||||||
void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
|
void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
CDrawScanline(pixels, left, top, scan, m_local, m_global);
|
CDrawScanline(pixels, left, top, scan, local);
|
||||||
}
|
}
|
||||||
void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
u32 zwrite = m_global.sel.zwrite;
|
u32 zwrite = m_global.sel.zwrite;
|
||||||
u32 edge = m_global.sel.edge;
|
u32 edge = m_global.sel.edge;
|
||||||
|
@ -1589,7 +1592,7 @@ void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& s
|
||||||
m_global.sel.zwrite = 0;
|
m_global.sel.zwrite = 0;
|
||||||
m_global.sel.edge = 1;
|
m_global.sel.edge = 1;
|
||||||
|
|
||||||
CDrawScanline(pixels, left, top, scan, m_local, m_global);
|
CDrawScanline(pixels, left, top, scan, local);
|
||||||
|
|
||||||
m_global.sel.zwrite = zwrite;
|
m_global.sel.zwrite = zwrite;
|
||||||
m_global.sel.edge = edge;
|
m_global.sel.edge = edge;
|
||||||
|
|
|
@ -34,7 +34,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||||
typedef void (*DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
|
typedef void (*DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
GSScanlineGlobalData m_global = {};
|
GSScanlineGlobalData m_global = {};
|
||||||
|
@ -80,8 +80,8 @@ public:
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const GSRasterizerData* data);
|
||||||
void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims);
|
void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims);
|
||||||
|
|
||||||
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local, const GSScanlineGlobalData& global);
|
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||||
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local, const GSScanlineGlobalData& global);
|
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
|
|
||||||
template<class T> static bool TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global);
|
template<class T> static bool TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global);
|
||||||
template<class T> static void WritePixel(const T& src, int addr, int i, u32 psm, const GSScanlineGlobalData& global);
|
template<class T> static void WritePixel(const T& src, int addr, int i, u32 psm, const GSScanlineGlobalData& global);
|
||||||
|
@ -89,14 +89,14 @@ public:
|
||||||
#ifdef ENABLE_JIT_RASTERIZER
|
#ifdef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
__forceinline void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local) { m_sp(vertex, index, dscan, local); }
|
__forceinline void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local) { m_sp(vertex, index, dscan, local); }
|
||||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { m_ds(pixels, left, top, scan); }
|
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local) { m_ds(pixels, left, top, scan, local); }
|
||||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) { m_de(pixels, left, top, scan); }
|
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local) { m_de(pixels, left, top, scan, local); }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -21,19 +21,8 @@
|
||||||
MULTI_ISA_UNSHARED_IMPL;
|
MULTI_ISA_UNSHARED_IMPL;
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
// Ease the reading of the code
|
#define _rip_local(field) ptr[_m_local + OFFSETOF(GSScanlineLocalData, field)]
|
||||||
// Note, there are versions without the _64 prefix that can be used as source (but not destination) operands on both 32 and 64 bit
|
#define _rip_global(field) ptr[_m_local__gd + OFFSETOF(GSScanlineGlobalData, field)]
|
||||||
#define _64_g_const r10
|
|
||||||
#define _64_m_local r12
|
|
||||||
#define _64_m_local__gd r13
|
|
||||||
#define _64_m_local__gd__vm t3
|
|
||||||
#define _64_m_local__gd__clut r11
|
|
||||||
// If use_lod, m_local.gd->tex, else m_local.gd->tex[0]
|
|
||||||
#define _64_m_local__gd__tex r14
|
|
||||||
|
|
||||||
#define _rip_local_(ptrtype, field) ((m_rip) ? ptrtype[rip + (char*)&m_local.field] : ptrtype[_m_local + OFFSETOF(GSScanlineLocalData, field)])
|
|
||||||
#define _rip_local(field) _rip_local_(ptr, field)
|
|
||||||
#define _rip_global(field) ((m_rip) ? ptr[rip + (char*)&m_local.gd->field] : ptr[_m_local__gd + OFFSETOF(GSScanlineGlobalData, field)])
|
|
||||||
|
|
||||||
/// On AVX, does a v-prefixed separate destination operation
|
/// On AVX, does a v-prefixed separate destination operation
|
||||||
/// On SSE, moves src1 into dst using movdqa, then does the operation
|
/// On SSE, moves src1 into dst using movdqa, then does the operation
|
||||||
|
@ -84,22 +73,29 @@ using namespace Xbyak;
|
||||||
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
|
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
|
||||||
: _parent(base, cpu)
|
: _parent(base, cpu)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
, a0(rcx), a1(rdx)
|
, a0(rcx), a1(rdx)
|
||||||
, a2(r8) , a3(r9)
|
, a2(r8) , a3(r9)
|
||||||
, t0(rdi), t1(rsi)
|
, t0(rdi), t1(rsi)
|
||||||
, t2(r8) , t3(r9)
|
, t2(r8) , t3(r9)
|
||||||
|
, _g_const(r10)
|
||||||
|
, _m_local(r12)
|
||||||
|
, _m_local__gd(r13)
|
||||||
|
, _m_local__gd__vm(t3)
|
||||||
|
, _m_local__gd__clut(r11)
|
||||||
|
, _m_local__gd__tex(r14)
|
||||||
#else
|
#else
|
||||||
, a0(rdi), a1(rsi)
|
, a0(rdi), a1(rsi)
|
||||||
, a2(rdx), a3(rcx)
|
, a2(rdx), a3(rcx)
|
||||||
, t0(r8) , t1(r9)
|
, t0(r12), t1(r9)
|
||||||
, t2(rcx), t3(rsi)
|
, t2(rcx), t3(rsi)
|
||||||
|
, _g_const(r10)
|
||||||
|
, _m_local(r8)
|
||||||
|
, _m_local__gd(r13)
|
||||||
|
, _m_local__gd__vm(t3)
|
||||||
|
, _m_local__gd__clut(r11)
|
||||||
|
, _m_local__gd__tex(r14)
|
||||||
#endif
|
#endif
|
||||||
, _g_const(chooseLocal(&g_const, _64_g_const))
|
|
||||||
, _m_local(chooseLocal(&m_local, _64_m_local))
|
|
||||||
, _m_local__gd(chooseLocal(m_local.gd, _64_m_local__gd))
|
|
||||||
, _m_local__gd__vm(chooseLocal(m_local.gd->vm, _64_m_local__gd__vm))
|
|
||||||
, _rb(xym5), _ga(xym6), _fm(xym3), _zm(xym4), _fd(xym2), _test(xym15)
|
, _rb(xym5), _ga(xym6), _fm(xym3), _zm(xym4), _fd(xym2), _test(xym15)
|
||||||
, _z(xym8), _f(xym9), _s(xym10), _t(xym11), _q(xym12), _f_rb(xym13), _f_ga(xym14)
|
, _z(xym8), _f(xym9), _s(xym10), _t(xym11), _q(xym12), _f_rb(xym13), _f_ga(xym14)
|
||||||
{
|
{
|
||||||
|
@ -111,10 +107,10 @@ GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator*
|
||||||
|
|
||||||
// MARK: - Helpers
|
// MARK: - Helpers
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator2::LocalAddr GSDrawScanlineCodeGenerator2::loadAddress(AddressReg reg, const void* addr)
|
GSDrawScanlineCodeGenerator2::AddressReg GSDrawScanlineCodeGenerator2::loadAddress(AddressReg reg, const void* addr)
|
||||||
{
|
{
|
||||||
mov(reg, (size_t)addr);
|
mov(reg, (size_t)addr);
|
||||||
return choose3264((size_t)addr, reg);
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem)
|
void GSDrawScanlineCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem)
|
||||||
|
@ -335,14 +331,11 @@ void GSDrawScanlineCodeGenerator2::split16_2x8(const XYm& l, const XYm& h, const
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::Generate()
|
void GSDrawScanlineCodeGenerator2::Generate()
|
||||||
{
|
{
|
||||||
bool need_tex = m_sel.fb && m_sel.tfx != TFX_NONE;
|
const bool need_tex = m_sel.fb && m_sel.tfx != TFX_NONE;
|
||||||
bool need_clut = need_tex && m_sel.tlu;
|
const bool need_clut = need_tex && m_sel.tlu;
|
||||||
m_rip = (size_t)getCurr() < 0x80000000;
|
|
||||||
m_rip &= (size_t)&m_local < 0x80000000;
|
|
||||||
m_rip &= (size_t)&m_local.gd < 0x80000000;
|
|
||||||
|
|
||||||
push(rbp);
|
push(rbp);
|
||||||
mov(rbp, rsp); // Stack traces look much nicer this way
|
mov(rbp, rsp); // Stack traces look much nicer this way, TODO drop in release builds
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
push(rbx);
|
push(rbx);
|
||||||
push(rsi);
|
push(rsi);
|
||||||
|
@ -359,23 +352,23 @@ void GSDrawScanlineCodeGenerator2::Generate()
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
mov(ptr[rsp + _64_rz_rbx], rbx);
|
mov(ptr[rsp + _64_rz_rbx], rbx);
|
||||||
if (!m_rip)
|
|
||||||
{
|
|
||||||
mov(ptr[rsp + _64_rz_r12], r12);
|
mov(ptr[rsp + _64_rz_r12], r12);
|
||||||
mov(ptr[rsp + _64_rz_r13], r13);
|
mov(ptr[rsp + _64_rz_r13], r13);
|
||||||
}
|
|
||||||
mov(ptr[rsp + _64_rz_r14], r14);
|
mov(ptr[rsp + _64_rz_r14], r14);
|
||||||
mov(ptr[rsp + _64_rz_r15], r15);
|
mov(ptr[rsp + _64_rz_r15], r15);
|
||||||
#endif
|
#endif
|
||||||
mov(_64_g_const, (size_t)&g_const);
|
mov(_g_const, (size_t)&g_const);
|
||||||
if (!m_rip)
|
|
||||||
{
|
#ifdef _WIN32
|
||||||
mov(_64_m_local, (size_t)&m_local);
|
// Local (5th arg) is passed on the stack in Windows.
|
||||||
mov(_64_m_local__gd, _rip_local(gd));
|
// 32 bytes shadow space less the 7 pushed registers and return address = 96.
|
||||||
}
|
mov(_m_local, ptr[rsp + _64_win_stack_size + 96]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
mov(_m_local__gd, _rip_local(gd));
|
||||||
|
|
||||||
if (need_clut)
|
if (need_clut)
|
||||||
mov(_64_m_local__gd__clut, _rip_global(clut));
|
mov(_m_local__gd__clut, _rip_global(clut));
|
||||||
|
|
||||||
Init();
|
Init();
|
||||||
|
|
||||||
|
@ -398,7 +391,7 @@ L("loop");
|
||||||
// xym7 = test | z0
|
// xym7 = test | z0
|
||||||
// xym15 = | test
|
// xym15 = | test
|
||||||
|
|
||||||
bool tme = m_sel.tfx != TFX_NONE;
|
const bool tme = m_sel.tfx != TFX_NONE;
|
||||||
|
|
||||||
TestZ(tme ? xym5 : xym2, tme ? xym6 : xym3);
|
TestZ(tme ? xym5 : xym2, tme ? xym6 : xym3);
|
||||||
|
|
||||||
|
@ -609,11 +602,8 @@ L("exit");
|
||||||
pop(rbx);
|
pop(rbx);
|
||||||
#else
|
#else
|
||||||
mov(rbx, ptr[rsp + _64_rz_rbx]);
|
mov(rbx, ptr[rsp + _64_rz_rbx]);
|
||||||
if (!m_rip)
|
|
||||||
{
|
|
||||||
mov(r12, ptr[rsp + _64_rz_r12]);
|
mov(r12, ptr[rsp + _64_rz_r12]);
|
||||||
mov(r13, ptr[rsp + _64_rz_r13]);
|
mov(r13, ptr[rsp + _64_rz_r13]);
|
||||||
}
|
|
||||||
mov(r14, ptr[rsp + _64_rz_r14]);
|
mov(r14, ptr[rsp + _64_rz_r14]);
|
||||||
mov(r15, ptr[rsp + _64_rz_r15]);
|
mov(r15, ptr[rsp + _64_rz_r15]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -889,13 +879,13 @@ void GSDrawScanlineCodeGenerator2::Init()
|
||||||
mov(ptr[rsp + _top], a2);
|
mov(ptr[rsp + _top], a2);
|
||||||
}
|
}
|
||||||
|
|
||||||
mov(_64_m_local__gd__vm, _rip_global(vm));
|
mov(_m_local__gd__vm, _rip_global(vm));
|
||||||
if (m_sel.fb && m_sel.tfx != TFX_NONE)
|
if (m_sel.fb && m_sel.tfx != TFX_NONE)
|
||||||
{
|
{
|
||||||
if (use_lod)
|
if (use_lod)
|
||||||
lea(_64_m_local__gd__tex, _rip_global(tex));
|
lea(_m_local__gd__tex, _rip_global(tex));
|
||||||
else
|
else
|
||||||
mov(_64_m_local__gd__tex, _rip_global(tex));
|
mov(_m_local__gd__tex, _rip_global(tex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3177,7 +3167,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
|
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
|
||||||
{
|
{
|
||||||
AddressReg texIn = _64_m_local__gd__tex;
|
AddressReg texIn = _m_local__gd__tex;
|
||||||
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.U32[lod]) : _rip_local(temp.lod.i.U32[lod]);
|
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.U32[lod]) : _rip_local(temp.lod.i.U32[lod]);
|
||||||
mov(ebx, lod_addr);
|
mov(ebx, lod_addr);
|
||||||
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
|
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
|
||||||
|
@ -3230,7 +3220,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplYmm(
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
AddressReg tex = texInRBX ? rbx : _64_m_local__gd__tex;
|
AddressReg tex = texInRBX ? rbx : _m_local__gd__tex;
|
||||||
if (!m_sel.tlu)
|
if (!m_sel.tlu)
|
||||||
{
|
{
|
||||||
pcmpeqd(t1[i], t1[i]);
|
pcmpeqd(t1[i], t1[i]);
|
||||||
|
@ -3309,8 +3299,8 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(const Xmm& dst, const Xmm& addr
|
||||||
{
|
{
|
||||||
ASSERT(i < 4);
|
ASSERT(i < 4);
|
||||||
|
|
||||||
AddressReg clut = _64_m_local__gd__clut;
|
AddressReg clut = _m_local__gd__clut;
|
||||||
AddressReg tex = texInRBX ? rbx : _64_m_local__gd__tex;
|
AddressReg tex = texInRBX ? rbx : _m_local__gd__tex;
|
||||||
Address src = m_sel.tlu ? ptr[clut + rax * 4] : ptr[tex + rax * 4];
|
Address src = m_sel.tlu ? ptr[clut + rax * 4] : ptr[tex + rax * 4];
|
||||||
|
|
||||||
// Extract address offset
|
// Extract address offset
|
||||||
|
|
|
@ -38,10 +38,6 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
||||||
using _parent = GSNewCodeGenerator;
|
using _parent = GSNewCodeGenerator;
|
||||||
using XYm = DRAW_SCANLINE_VECTOR_REGISTER;
|
using XYm = DRAW_SCANLINE_VECTOR_REGISTER;
|
||||||
|
|
||||||
/// On x86-64 we reserve a bunch of GPRs for holding addresses of locals that would otherwise be hard to reach
|
|
||||||
/// On x86-32 the same values are just raw 32-bit addresses
|
|
||||||
using LocalAddr = Choose3264<size_t, AddressReg>::type;
|
|
||||||
|
|
||||||
constexpr static bool isXmm = std::is_same<XYm, Xbyak::Xmm>::value;
|
constexpr static bool isXmm = std::is_same<XYm, Xbyak::Xmm>::value;
|
||||||
constexpr static bool isYmm = std::is_same<XYm, Xbyak::Ymm>::value;
|
constexpr static bool isYmm = std::is_same<XYm, Xbyak::Ymm>::value;
|
||||||
constexpr static int wordsize = 8;
|
constexpr static int wordsize = 8;
|
||||||
|
@ -74,32 +70,27 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
||||||
|
|
||||||
GSScanlineSelector m_sel;
|
GSScanlineSelector m_sel;
|
||||||
GSScanlineLocalData& m_local;
|
GSScanlineLocalData& m_local;
|
||||||
bool m_rip;
|
|
||||||
bool use_lod;
|
bool use_lod;
|
||||||
|
|
||||||
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
|
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
|
||||||
/// Note: a2 and t3 are only available on x86-64
|
/// Note: a2 and t3 are only available on x86-64
|
||||||
/// Outside of Init, usable registers are a0, t0, t1, t2, t3[x64], rax, rbx, rdx, r10+
|
/// Outside of Init, usable registers are a0, t0, t1, t2, t3[x64], rax, rbx, rdx, r10+
|
||||||
const AddressReg a0, a1, a2, a3, t0, t1, t2, t3;
|
const AddressReg a0, a1, a2, a3, t0, t1, t2, t3;
|
||||||
const LocalAddr _g_const, _m_local, _m_local__gd, _m_local__gd__vm;
|
const AddressReg _g_const, _m_local, _m_local__gd, _m_local__gd__vm, _m_local__gd__clut;
|
||||||
|
// If use_lod, m_local.gd->tex, else m_local.gd->tex[0]
|
||||||
|
const AddressReg _m_local__gd__tex;
|
||||||
/// Available on both x86 and x64, not always valid
|
/// Available on both x86 and x64, not always valid
|
||||||
const XYm _rb, _ga, _fm, _zm, _fd, _test;
|
const XYm _rb, _ga, _fm, _zm, _fd, _test;
|
||||||
/// Always valid if needed, x64 only
|
/// Always valid if needed, x64 only
|
||||||
const XYm _z, _f, _s, _t, _q, _f_rb, _f_ga;
|
const XYm _z, _f, _s, _t, _q, _f_rb, _f_ga;
|
||||||
|
|
||||||
/// Returns the first arg on 32-bit, second on 64-bit
|
|
||||||
static LocalAddr chooseLocal(const void* addr32, AddressReg reg64)
|
|
||||||
{
|
|
||||||
return choose3264((size_t)addr32, reg64);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
|
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
|
||||||
void Generate();
|
void Generate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Loads the given address into the given register if needed, and returns something that can be used in a `ptr[]`
|
/// Loads the given address into the given register if needed, and returns something that can be used in a `ptr[]`
|
||||||
LocalAddr loadAddress(AddressReg reg, const void* addr);
|
AddressReg loadAddress(AddressReg reg, const void* addr);
|
||||||
/// Broadcast 128 bits of floats from memory to the whole register, whatever size that register might be
|
/// Broadcast 128 bits of floats from memory to the whole register, whatever size that register might be
|
||||||
void broadcastf128(const XYm& reg, const Xbyak::Address& mem);
|
void broadcastf128(const XYm& reg, const Xbyak::Address& mem);
|
||||||
/// Broadcast 128 bits of integers from memory to the whole register, whatever size that register might be
|
/// Broadcast 128 bits of integers from memory to the whole register, whatever size that register might be
|
||||||
|
|
|
@ -84,7 +84,6 @@ static bool shouldUseCDrawScanline(u64 key)
|
||||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||||
: Xbyak::CodeGenerator(maxsize, code)
|
: Xbyak::CodeGenerator(maxsize, code)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
|
||||||
{
|
{
|
||||||
m_sel.key = key;
|
m_sel.key = key;
|
||||||
|
|
||||||
|
@ -93,18 +92,8 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, v
|
||||||
|
|
||||||
if (shouldUseCDrawScanline(key))
|
if (shouldUseCDrawScanline(key))
|
||||||
{
|
{
|
||||||
#if defined(_WIN32)
|
mov(rax, reinterpret_cast<size_t>(GSDrawScanline::CDrawScanline)); // TODO: Get rid of once we move to memory map
|
||||||
mov(r8, reinterpret_cast<size_t>(&m_local));
|
jmp(rax);
|
||||||
push(ptr[r8 + offsetof(GSScanlineLocalData, gd)]);
|
|
||||||
push(r8);
|
|
||||||
sub(rsp, 32); // CC required shadow space
|
|
||||||
call(reinterpret_cast<void*>(GSDrawScanline::CDrawScanline));
|
|
||||||
ret(48);
|
|
||||||
#else
|
|
||||||
mov(r8, reinterpret_cast<size_t>(&m_local));
|
|
||||||
mov(r9, ptr[r8 + offsetof(GSScanlineLocalData, gd)]);
|
|
||||||
jmp(reinterpret_cast<void*>(GSDrawScanline::CDrawScanline));
|
|
||||||
#endif
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,6 @@ class GSDrawScanlineCodeGenerator : public Xbyak::CodeGenerator
|
||||||
|
|
||||||
GSScanlineSelector m_sel;
|
GSScanlineSelector m_sel;
|
||||||
GSScanlineLocalData& m_local;
|
GSScanlineLocalData& m_local;
|
||||||
bool m_rip;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
||||||
|
|
|
@ -91,12 +91,6 @@ public:
|
||||||
using AddressReg = Xbyak::Reg64;
|
using AddressReg = Xbyak::Reg64;
|
||||||
using RipType = Xbyak::RegRip;
|
using RipType = Xbyak::RegRip;
|
||||||
|
|
||||||
template <typename T32, typename T64>
|
|
||||||
struct Choose3264 { using type = T64; };
|
|
||||||
|
|
||||||
template <typename T32, typename T64>
|
|
||||||
static T64 choose3264(T32 t32, T64 t64) { return t64; }
|
|
||||||
|
|
||||||
const bool hasAVX, hasAVX2, hasFMA;
|
const bool hasAVX, hasAVX2, hasFMA;
|
||||||
|
|
||||||
const Xmm xmm0{0}, xmm1{1}, xmm2{2}, xmm3{3}, xmm4{4}, xmm5{5}, xmm6{6}, xmm7{7}, xmm8{8}, xmm9{9}, xmm10{10}, xmm11{11}, xmm12{12}, xmm13{13}, xmm14{14}, xmm15{15};
|
const Xmm xmm0{0}, xmm1{1}, xmm2{2}, xmm3{3}, xmm4{4}, xmm5{5}, xmm6{6}, xmm7{7}, xmm8{8}, xmm9{9}, xmm10{10}, xmm11{11}, xmm12{12}, xmm13{13}, xmm14{14}, xmm15{15};
|
||||||
|
|
|
@ -1140,7 +1140,7 @@ void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW&
|
||||||
|
|
||||||
ASSERT(m_pixels.actual <= m_pixels.total);
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||||
|
|
||||||
m_ds->DrawScanline(pixels, left, top, scan);
|
m_ds->DrawScanline(pixels, left, top, scan, m_ds->GetLocalData());
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
||||||
|
@ -1151,7 +1151,7 @@ void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& sca
|
||||||
|
|
||||||
ASSERT(m_pixels.actual <= m_pixels.total);
|
ASSERT(m_pixels.actual <= m_pixels.total);
|
||||||
|
|
||||||
m_ds->DrawEdge(pixels, left, top, scan);
|
m_ds->DrawEdge(pixels, left, top, scan, m_ds->GetLocalData());
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Sync()
|
void GSRasterizer::Sync()
|
||||||
|
|
Loading…
Reference in New Issue