GS/SW: Move scanline local data to rasterizer

This commit is contained in:
Stenzek 2023-01-25 20:51:18 +10:00 committed by refractionpcsx2
parent 90fc037833
commit 56046d4db8
13 changed files with 66 additions and 76 deletions

View File

@ -146,7 +146,6 @@ template <class CG, class KEY, class VALUE>
class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
std::string m_name;
void* m_param;
std::unordered_map<u64, VALUE> m_cgmap;
GSCodeBuffer m_cb;
size_t m_total_code_size;
@ -154,9 +153,8 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
enum { MAX_SIZE = 8192 };
public:
GSCodeGeneratorFunctionMap(const char* name, void* param)
GSCodeGeneratorFunctionMap(const char* name)
: m_name(name)
, m_param(param)
, m_total_code_size(0)
{
}
@ -182,7 +180,7 @@ public:
{
void* code_ptr = m_cb.GetBuffer(MAX_SIZE);
CG* cg = new CG(m_param, key, code_ptr, MAX_SIZE);
CG* cg = new CG(key, code_ptr, MAX_SIZE);
ASSERT(cg->getSize() < MAX_SIZE);
#if 0

View File

@ -24,16 +24,21 @@ constexpr GSScanlineConstantData g_const;
MULTI_ISA_UNSHARED_IMPL;
static __forceinline const GSScanlineGlobalData& GlobalFromLocal(const GSScanlineLocalData& local)
{
return *local.gd;
}
GSDrawScanline::GSDrawScanline()
: m_sp_map("GSSetupPrim", &m_local)
, m_ds_map("GSDrawScanline", &m_local)
: m_sp_map("GSSetupPrim")
, m_ds_map("GSDrawScanline")
{
}
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = ((const SharedData*)data)->global;
m_local.gd = &global;
local.gd = &global;
if (global.sel.mmin && global.sel.lcm)
{
@ -55,8 +60,8 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
v = v.upl16(v);
m_local.temp.uv_minmax[0] = v.upl32(v);
m_local.temp.uv_minmax[1] = v.uph32(v);
local.temp.uv_minmax[0] = v.upl32(v);
local.temp.uv_minmax[1] = v.uph32(v);
}
m_ds = m_ds_map[global.sel];
@ -1702,9 +1707,9 @@ void GSDrawScanline::WritePixel(const T& src, int addr, int i, u32 psm, const GS
}
}
void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = *m_local.gd;
const GSScanlineGlobalData& global = GlobalFromLocal(local);
ASSERT(r.y >= 0);
ASSERT(r.w >= 0);
@ -1726,22 +1731,22 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
if (m == 0)
{
DrawRectT<u32, false>(global.zbo, r, z, m);
DrawRectT<u32, false>(global.zbo, r, z, m, local);
}
else
{
DrawRectT<u32, true>(global.zbo, r, z, m);
DrawRectT<u32, true>(global.zbo, r, z, m, local);
}
}
else
{
if ((m & 0xffff) == 0)
{
DrawRectT<u16, false>(global.zbo, r, z, m);
DrawRectT<u16, false>(global.zbo, r, z, m, local);
}
else
{
DrawRectT<u16, true>(global.zbo, r, z, m);
DrawRectT<u16, true>(global.zbo, r, z, m, local);
}
}
}
@ -1765,11 +1770,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
if (m == 0)
{
DrawRectT<u32, false>(global.fbo, r, c, m);
DrawRectT<u32, false>(global.fbo, r, c, m, local);
}
else
{
DrawRectT<u32, true>(global.fbo, r, c, m);
DrawRectT<u32, true>(global.fbo, r, c, m, local);
}
}
else
@ -1778,18 +1783,18 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
if ((m & 0xffff) == 0)
{
DrawRectT<u16, false>(global.fbo, r, c, m);
DrawRectT<u16, false>(global.fbo, r, c, m, local);
}
else
{
DrawRectT<u16, true>(global.fbo, r, c, m);
DrawRectT<u16, true>(global.fbo, r, c, m, local);
}
}
}
}
template <class T, bool masked>
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m)
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
if (m == 0xffffffff)
return;
@ -1824,30 +1829,30 @@ void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c,
if (!br.rempty())
{
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m, local);
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m, local);
if (r.x < br.x || br.z < r.z)
{
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m);
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m);
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m, local);
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m, local);
}
FillBlock<T, masked>(off, br, color, mask);
FillBlock<T, masked>(off, br, color, mask, local);
}
else
{
FillRect<T, masked>(off, r, c, m);
FillRect<T, masked>(off, r, c, m, local);
}
}
template <class T, bool masked>
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m)
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(m_local).vm;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y++)
{
@ -1864,12 +1869,12 @@ void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u
#if _M_SSE >= 0x501
template <class T, bool masked>
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m)
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(m_local).vm;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{
@ -1892,12 +1897,12 @@ void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const G
#else
template <class T, bool masked>
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m)
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(m_local).vm;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{

View File

@ -37,8 +37,6 @@ public:
typedef void (*DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
protected:
GSScanlineLocalData m_local = {};
SetupPrimPtr m_sp = nullptr;
DrawScanlinePtr m_ds = nullptr;
DrawScanlinePtr m_de = nullptr;
@ -47,20 +45,20 @@ protected:
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
template <class T, bool masked>
void DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m);
static void DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local);
template <class T, bool masked>
__forceinline void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m);
static __forceinline void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local);
#if _M_SSE >= 0x501
template <class T, bool masked>
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m);
static __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m, GSScanlineLocalData& local);
#else
template <class T, bool masked>
__forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
static __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m, GSScanlineLocalData& local);
#endif
@ -68,15 +66,11 @@ public:
GSDrawScanline();
virtual ~GSDrawScanline() = default;
__forceinline GSScanlineLocalData& GetLocalData() { return m_local; }
static __forceinline const GSScanlineGlobalData& GlobalFromLocal(const GSScanlineLocalData& local) { return *local.gd; }
__forceinline bool HasEdge() const { return m_de != nullptr; }
__forceinline bool IsSolidRect() const { return m_local.gd->sel.IsSolidRect(); }
// IDrawScanline
void BeginDraw(const GSRasterizerData* data);
void BeginDraw(const GSRasterizerData* data, GSScanlineLocalData& local);
void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims);
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
@ -100,7 +94,7 @@ public:
#endif
// Not currently jitted.
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
void DrawRect(const GSVector4i& r, const GSVertexSW& v, GSScanlineLocalData& local);
void PrintStats()
{

View File

@ -70,9 +70,8 @@ using namespace Xbyak;
#define _rip_local_d_p(x) _rip_local_d(x)
#endif
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key)
: _parent(base, cpu)
, m_local(*(GSScanlineLocalData*)param)
#ifdef _WIN32
, a0(rcx), a1(rdx)
, a2(r8) , a3(r9)

View File

@ -69,7 +69,6 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
constexpr static int _top = _64_top;
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
bool use_lod;
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
@ -85,7 +84,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
const XYm _z, _f, _s, _t, _q, _f_rb, _f_ga;
public:
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key);
void Generate();
private:

View File

@ -81,9 +81,8 @@ static bool shouldUseCDrawScanline(u64 key)
return idx->second;
}
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize)
: Xbyak::CodeGenerator(maxsize, code)
, m_local(*(GSScanlineLocalData*)param)
{
m_sel.key = key;
@ -97,5 +96,5 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, v
return;
}
GSDrawScanlineCodeGenerator2(this, g_cpu, (void*)&m_local, m_sel.key).Generate();
GSDrawScanlineCodeGenerator2(this, g_cpu, m_sel.key).Generate();
}

View File

@ -38,10 +38,9 @@ class GSDrawScanlineCodeGenerator : public Xbyak::CodeGenerator
void operator=(const GSDrawScanlineCodeGenerator&);
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
public:
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize);
};
MULTI_ISA_UNSHARED_END

View File

@ -152,7 +152,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
if constexpr (ENABLE_DRAW_STATS)
data->start = __rdtsc();
m_ds->BeginDraw(data);
m_ds->BeginDraw(data, m_local);
const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
@ -280,7 +280,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, index, GSVertexSW::zero(), m_ds->GetLocalData());
m_ds->SetupPrim(vertex, index, GSVertexSW::zero(), m_local);
DrawScanline(1, p.x, p.y, v);
}
@ -301,7 +301,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero(), m_ds->GetLocalData());
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero(), m_local);
DrawScanline(1, p.x, p.y, v);
}
@ -369,7 +369,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(vertex, index, dscan, m_ds->GetLocalData());
m_ds->SetupPrim(vertex, index, dscan, m_local);
DrawScanline(pixels, left, p.y, scan);
}
@ -814,11 +814,12 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
GSVertexSW scan = v[0];
if ((m_scanmsk_value & 2) == 0 && m_ds->IsSolidRect())
// TODO: Double check IsSolidRect() works.
if ((m_scanmsk_value & 2) == 0 && m_local.gd->sel.IsSolidRect())
{
if (m_threads == 1)
{
m_ds->DrawRect(r, scan);
m_ds->DrawRect(r, scan, m_local);
int pixels = r.width() * r.height();
@ -835,7 +836,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
r.top = top;
r.bottom = std::min<int>((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom);
m_ds->DrawRect(r, scan);
m_ds->DrawRect(r, scan, m_local);
int pixels = r.width() * r.height();
@ -864,7 +865,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
scan.t = (scan.t + dt * prestep).xyzw(scan.t);
m_ds->SetupPrim(vertex, index, dscan, m_ds->GetLocalData());
m_ds->SetupPrim(vertex, index, dscan, m_local);
while (1)
{
@ -1093,7 +1094,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVer
if (count > 0)
{
m_ds->SetupPrim(vertex, index, dscan, m_ds->GetLocalData());
m_ds->SetupPrim(vertex, index, dscan, m_local);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
@ -1140,7 +1141,7 @@ void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW&
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawScanline(pixels, left, top, scan, m_ds->GetLocalData());
m_ds->DrawScanline(pixels, left, top, scan, m_local);
}
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
@ -1151,7 +1152,7 @@ void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& sca
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawEdge(pixels, left, top, scan, m_ds->GetLocalData());
m_ds->DrawEdge(pixels, left, top, scan, m_local);
}
void GSRasterizer::Sync()

View File

@ -99,6 +99,8 @@ protected:
struct { int sum, actual, total; } m_pixels;
int m_primcount;
GSScanlineLocalData m_local = {};
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template <bool scissor_test>

View File

@ -48,9 +48,8 @@ using namespace Xbyak;
#define _rip_local_d_p(x) _rip_local_d(x)
#endif
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key)
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key)
: _parent(base, cpu)
, m_local(*(GSScanlineLocalData*)param)
, many_regs(false)
// On x86 arg registers are very temporary but on x64 they aren't, so on x86 some registers overlap
#ifdef _WIN32

View File

@ -46,7 +46,6 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
constexpr static int dsize = isXmm ? 4 : 8;
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
bool many_regs;
struct {u32 z:1, f:1, t:1, c:1;} m_en;
@ -55,7 +54,7 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
const AddressReg _64_vertex, _index, _dscan, _m_local, t1;
public:
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, void* param, u64 key);
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key);
void Generate();
private:

View File

@ -19,10 +19,8 @@
MULTI_ISA_UNSHARED_IMPL;
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize)
: Xbyak::CodeGenerator(maxsize, code)
, m_local(*(GSScanlineLocalData*)param)
, m_rip(false)
{
m_sel.key = key;
@ -31,5 +29,5 @@ GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* c
m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0;
m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0;
GSSetupPrimCodeGenerator2(this, g_cpu, param, key).Generate();
GSSetupPrimCodeGenerator2(this, g_cpu, key).Generate();
}

View File

@ -31,8 +31,6 @@ class GSSetupPrimCodeGenerator : public Xbyak::CodeGenerator
void operator=(const GSSetupPrimCodeGenerator&);
GSScanlineSelector m_sel;
GSScanlineLocalData& m_local;
bool m_rip;
struct
{
@ -40,7 +38,7 @@ class GSSetupPrimCodeGenerator : public Xbyak::CodeGenerator
} m_en;
public:
GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize);
};
MULTI_ISA_UNSHARED_END