GS/SW: Share JIT code between all threads

This commit is contained in:
Stenzek 2023-01-25 20:51:21 +10:00 committed by refractionpcsx2
parent 56046d4db8
commit 3d84443bcf
6 changed files with 247 additions and 165 deletions

View File

@ -120,6 +120,7 @@ private:
// software sprite renderer state
std::vector<GSVertexSW> m_sw_vertex_buffer;
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture[7 + 1];
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_draw_scanline;
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
public:

View File

@ -549,9 +549,13 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
}
if (!hw.m_sw_rasterizer)
hw.m_sw_rasterizer = std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
{
hw.m_sw_draw_scanline = std::make_unique<GSDrawScanline>();
hw.m_sw_rasterizer = std::make_unique<GSRasterizer>(static_cast<GSDrawScanline*>(hw.m_sw_draw_scanline.get()), 0, 1);
}
static_cast<GSRasterizer*>(hw.m_sw_rasterizer.get())->Draw(&data);
static_cast<GSDrawScanline*>(hw.m_sw_draw_scanline.get())->SetupDraw(data);
static_cast<GSRasterizer*>(hw.m_sw_rasterizer.get())->Draw(data);
if (invalidate_tc)
hw.m_tc->InvalidateVideoMem(context->offset.fb, bbox);

View File

@ -35,9 +35,11 @@ GSDrawScanline::GSDrawScanline()
{
}
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, GSScanlineLocalData& local)
GSDrawScanline::~GSDrawScanline() = default;
void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = ((const SharedData*)data)->global;
const GSScanlineGlobalData& global = static_cast<const SharedData&>(data).global;
local.gd = &global;
if (global.sel.mmin && global.sel.lcm)
@ -63,8 +65,14 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, GSScanlineLocalData
local.temp.uv_minmax[0] = v.upl32(v);
local.temp.uv_minmax[1] = v.uph32(v);
}
}
m_ds = m_ds_map[global.sel];
void GSDrawScanline::SetupDraw(GSRasterizerData& data)
{
#ifdef ENABLE_JIT_RASTERIZER
SharedData& sdata = static_cast<SharedData&>(data);
const GSScanlineGlobalData& global = sdata.global;
sdata.ds = m_ds_map[global.sel];
if (global.sel.aa1)
{
@ -74,11 +82,11 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, GSScanlineLocalData
sel.zwrite = 0;
sel.edge = 1;
m_de = m_ds_map[sel];
sdata.de = m_ds_map[sel];
}
else
{
m_de = nullptr;
sdata.de = nullptr;
}
// doesn't need all bits => less functions generated
@ -99,10 +107,11 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, GSScanlineLocalData
sel.zequal = global.sel.zequal;
sel.notest = global.sel.notest;
m_sp = m_sp_map[sel];
sdata.sp = m_sp_map[sel];
#endif
}
void GSDrawScanline::EndDraw(u64 frame, u64 ticks, int actual, int total, int prims)
void GSDrawScanline::UpdateDrawStats(u64 frame, u64 ticks, int actual, int total, int prims)
{
m_ds_map.UpdateStats(frame, ticks, actual, total, prims);
}
@ -1578,30 +1587,6 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
}
}
#ifndef ENABLE_JIT_RASTERIZER
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
{
CSetupPrim(vertex, index, dscan, local);
}
void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
CDrawScanline(pixels, left, top, scan, local);
}
void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
u32 zwrite = m_global.sel.zwrite;
u32 edge = m_global.sel.edge;
m_global.sel.zwrite = 0;
m_global.sel.edge = 1;
CDrawScanline(pixels, left, top, scan, local);
m_global.sel.zwrite = zwrite;
m_global.sel.edge = edge;
}
#endif
template <class T>
bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global)
{

View File

@ -22,25 +22,29 @@
#include "GS/Renderers/SW/GSDrawScanlineCodeGenerator.h"
#include "GS/config.h"
#include <cstring>
MULTI_ISA_UNSHARED_START
class GSDrawScanline : public GSAlignedClass<32>
class GSDrawScanline : public GSVirtualAlignedClass<32>
{
public:
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
class SharedData : public GSRasterizerData
{
public:
GSScanlineGlobalData global;
#ifdef ENABLE_JIT_RASTERIZER
SetupPrimPtr sp;
DrawScanlinePtr ds;
DrawScanlinePtr de;
#endif
};
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
typedef void (*DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
protected:
SetupPrimPtr m_sp = nullptr;
DrawScanlinePtr m_ds = nullptr;
DrawScanlinePtr m_de = nullptr;
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
@ -64,14 +68,12 @@ protected:
public:
GSDrawScanline();
virtual ~GSDrawScanline() = default;
~GSDrawScanline() override;
__forceinline bool HasEdge() const { return m_de != nullptr; }
void SetupDraw(GSRasterizerData& data);
void UpdateDrawStats(u64 frame, u64 ticks, int actual, int total, int prims);
// IDrawScanline
void BeginDraw(const GSRasterizerData* data, GSScanlineLocalData& local);
void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims);
static void BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local);
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
@ -81,15 +83,59 @@ public:
#ifdef ENABLE_JIT_RASTERIZER
__forceinline void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local) { m_sp(vertex, index, dscan, local); }
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local) { m_ds(pixels, left, top, scan, local); }
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local) { m_de(pixels, left, top, scan, local); }
__forceinline static void SetupPrim(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index,
const GSVertexSW& dscan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).sp(vertex, index, dscan, local);
}
__forceinline static void DrawScanline(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).ds(pixels, left, top, scan, local);
}
__forceinline static void DrawEdge(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).de(pixels, left, top, scan, local);
}
__forceinline static bool HasEdge(const GSRasterizerData& data)
{
return static_cast<const SharedData&>(data).de != nullptr;
}
#else
void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
__forceinline static void SetupPrim(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index,
const GSVertexSW& dscan, GSScanlineLocalData& local)
{
CSetupPrim(vertex, index, dscan, local);
}
__forceinline static void DrawScanline(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
CDrawScanline(pixels, left, top, scan, local);
}
__forceinline static void DrawEdge(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
// This sucks. But so does not jitting!
const GSScanlineGlobalData* old_gd = local.gd;
GSScanlineGlobalData gd;
std::memcpy(&gd, &local.gd, sizeof(gd));
gd.sel.zwrite = 0;
gd.sel.edge = 1;
local.gd = &gd;
CDrawScanline(pixels, left, top, scan, local);
local.gd = old_gd;
}
__forceinline static bool HasEdge(const SharedData& data)
{
return static_cast<const SharedData&>(data).global.sel.aa1;
}
#endif

View File

@ -73,7 +73,6 @@ GSRasterizer::~GSRasterizer()
{
_aligned_free(m_scanline);
_aligned_free(m_edge.buff);
delete m_ds;
}
static void __fi AddScanlineInfo(GSVertexSW* e, int pixels, int left, int top)
@ -123,11 +122,6 @@ int GSRasterizer::FindMyNextScanline(int top) const
return top;
}
void GSRasterizer::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data)
{
Draw(data.get());
}
int GSRasterizer::GetPixels(bool reset)
{
int pixels = m_pixels.sum;
@ -140,9 +134,9 @@ int GSRasterizer::GetPixels(bool reset)
return pixels;
}
void GSRasterizer::Draw(GSRasterizerData* data)
void GSRasterizer::Draw(GSRasterizerData& data)
{
if ((data->vertex != NULL && data->vertex_count == 0) || (data->index != NULL && data->index_count == 0))
if ((data.vertex && data.vertex_count == 0) || (data.index && data.index_count == 0))
return;
m_pixels.actual = 0;
@ -150,36 +144,36 @@ void GSRasterizer::Draw(GSRasterizerData* data)
m_primcount = 0;
if constexpr (ENABLE_DRAW_STATS)
data->start = __rdtsc();
data.start = __rdtsc();
m_ds->BeginDraw(data, m_local);
GSDrawScanline::BeginDraw(static_cast<GSDrawScanline::SharedData&>(data), m_local);
const GSVertexSW* vertex = data->vertex;
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
const GSVertexSW* vertex = data.vertex;
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
const u32* index = data->index;
const u32* index_end = data->index + data->index_count;
const u32* index = data.index;
const u32* index_end = data.index + data.index_count;
u32 tmp_index[] = {0, 1, 2};
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
bool scissor_test = !data.bbox.eq(data.bbox.rintersect(data.scissor));
m_scissor = data->scissor;
m_fscissor_x = GSVector4(data->scissor).xzxz();
m_fscissor_y = GSVector4(data->scissor).ywyw();
m_scanmsk_value = data->scanmsk_value;
m_scissor = data.scissor;
m_fscissor_x = GSVector4(data.scissor).xzxz();
m_fscissor_y = GSVector4(data.scissor).ywyw();
m_scanmsk_value = data.scanmsk_value;
switch (data->primclass)
switch (data.primclass)
{
case GS_POINT_CLASS:
if (scissor_test)
{
DrawPoint<true>(vertex, data->vertex_count, index, data->index_count);
DrawPoint<true>(data, vertex, index);
}
else
{
DrawPoint<false>(vertex, data->vertex_count, index, data->index_count);
DrawPoint<false>(data, vertex, index);
}
break;
@ -190,7 +184,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawLine(vertex, index);
DrawLine(data, vertex, index);
index += 2;
} while (index < index_end);
}
@ -198,7 +192,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawLine(vertex, tmp_index);
DrawLine(data, vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
@ -211,7 +205,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawTriangle(vertex, index);
DrawTriangle(data, vertex, index);
index += 3;
} while (index < index_end);
}
@ -219,7 +213,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawTriangle(vertex, tmp_index);
DrawTriangle(data, vertex, tmp_index);
vertex += 3;
} while (vertex < vertex_end);
}
@ -232,7 +226,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawSprite(vertex, index);
DrawSprite(data, vertex, index);
index += 2;
} while (index < index_end);
}
@ -240,7 +234,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
{
do
{
DrawSprite(vertex, tmp_index);
DrawSprite(data, vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
@ -255,20 +249,23 @@ void GSRasterizer::Draw(GSRasterizerData* data)
_mm256_zeroupper();
#endif
data->pixels = m_pixels.actual;
data.pixels = m_pixels.actual;
m_pixels.sum += m_pixels.actual;
if constexpr (ENABLE_DRAW_STATS)
m_ds->EndDraw(data->frame, __rdtsc() - data->start, m_pixels.actual, m_pixels.total, m_primcount);
m_ds->UpdateDrawStats(data.frame, __rdtsc() - data.start, m_pixels.actual, m_pixels.total, m_primcount);
}
template <bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
void GSRasterizer::DrawPoint(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
if (index != NULL)
const int vertex_count = data.vertex_count;
const int index_count = data.index_count;
if (index)
{
for (int i = 0; i < index_count; i++, index++)
{
@ -280,9 +277,9 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, index, GSVertexSW::zero(), m_local);
GSDrawScanline::SetupPrim(data, vertex, index, GSVertexSW::zero(), m_local);
DrawScanline(1, p.x, p.y, v);
DrawScanline(data, 1, p.x, p.y, v);
}
}
}
@ -301,16 +298,16 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
{
if (IsOneOfMyScanlines(p.y))
{
m_ds->SetupPrim(vertex, tmp_index, GSVertexSW::zero(), m_local);
GSDrawScanline::SetupPrim(data, vertex, tmp_index, GSVertexSW::zero(), m_local);
DrawScanline(1, p.x, p.y, v);
DrawScanline(data, 1, p.x, p.y, v);
}
}
}
}
}
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawLine(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -323,12 +320,12 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy|
if (m_ds->HasEdge())
if (GSDrawScanline::HasEdge(data))
{
DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1);
DrawEdge(data, v0, v1, dv, i, 0);
DrawEdge(data, v0, v1, dv, i, 1);
Flush(vertex, index, GSVertexSW::zero(), true);
Flush(data, vertex, index, GSVertexSW::zero(), true);
return;
}
@ -369,9 +366,9 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
scan += dscan * (l - scan.p).xxxx();
m_ds->SetupPrim(vertex, index, dscan, m_local);
GSDrawScanline::SetupPrim(data, vertex, index, dscan, m_local);
DrawScanline(pixels, left, p.y, scan);
DrawScanline(data, pixels, left, p.y, scan);
}
}
}
@ -410,7 +407,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
m_edge.count = e - m_edge.buff;
Flush(vertex, index, GSVertexSW::zero());
Flush(data, vertex, index, GSVertexSW::zero());
}
}
@ -428,7 +425,7 @@ static const u8 s_ysort[8][4] =
#if _M_SSE >= 0x501
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -512,7 +509,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p.y = vertex[i[m2]].p.x;
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
DrawTriangleSection(data, tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
@ -524,7 +521,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p.y = edge.p.x;
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
DrawTriangleSection(data, tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if (tb.y < tb.w)
@ -534,13 +531,13 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p = (v0.p.xxxx() + ddx[m2] * dv0.p.yyyy()).xyzw(edge.p);
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
DrawTriangleSection(data, tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(vertex, index, (GSVertexSW&)dscan);
Flush(data, vertex, index, (GSVertexSW&)dscan);
if (m_ds->HasEdge())
if (GSDrawScanline::HasEdge(data))
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
@ -549,15 +546,15 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv0, orientation & 1, side & 1);
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv1, orientation & 2, side & 2);
DrawEdge((GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv2, orientation & 4, side & 4);
DrawEdge(data, (GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv0, orientation & 1, side & 1);
DrawEdge(data, (GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv1, orientation & 2, side & 2);
DrawEdge(data, (GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv2, orientation & 4, side & 4);
Flush(vertex, index, GSVertexSW::zero(), true);
Flush(data, vertex, index, GSVertexSW::zero(), true);
}
}
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0)
void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0)
{
ASSERT(top < bottom);
ASSERT(edge.p.x <= edge.p.y);
@ -609,7 +606,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
#else
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -691,7 +688,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p.y = vertex[i[m2]].p.x;
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
DrawTriangleSection(data, tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
@ -703,7 +700,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p.y = edge.p.x;
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
DrawTriangleSection(data, tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if (tb.y < tb.w)
@ -713,13 +710,13 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
edge.p = (v0.p.xxxx() + ddx[m2] * dv0.p.yyyy()).xyzw(edge.p);
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
DrawTriangleSection(data, tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(vertex, index, dscan);
Flush(data, vertex, index, dscan);
if (m_ds->HasEdge())
if (GSDrawScanline::HasEdge(data))
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
@ -728,15 +725,15 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(v0, v1, dv0, orientation & 1, side & 1);
DrawEdge(v0, v2, dv1, orientation & 2, side & 2);
DrawEdge(v1, v2, dv2, orientation & 4, side & 4);
DrawEdge(data, v0, v1, dv0, orientation & 1, side & 1);
DrawEdge(data, v0, v2, dv1, orientation & 2, side & 2);
DrawEdge(data, v1, v2, dv2, orientation & 4, side & 4);
Flush(vertex, index, GSVertexSW::zero(), true);
Flush(data, vertex, index, GSVertexSW::zero(), true);
}
}
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0)
void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0)
{
ASSERT(top < bottom);
ASSERT(edge.p.x <= edge.p.y);
@ -787,7 +784,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT
#endif
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -865,13 +862,13 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
scan.t = (scan.t + dt * prestep).xyzw(scan.t);
m_ds->SetupPrim(vertex, index, dscan, m_local);
GSDrawScanline::SetupPrim(data, vertex, index, dscan, m_local);
while (1)
{
if (IsOneOfMyScanlines(r.top))
{
DrawScanline(r.width(), r.left, r.top, scan);
DrawScanline(data, r.width(), r.left, r.top, scan);
}
if (++r.top >= r.bottom)
@ -881,7 +878,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
}
}
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
void GSRasterizer::DrawEdge(const GSRasterizerData& data, const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
{
// orientation:
// - true: |dv.p.y| > |dv.p.x|
@ -1086,7 +1083,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
AddScanlineInfo(e, pixels, left, top);
}
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge)
void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -1094,7 +1091,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVer
if (count > 0)
{
m_ds->SetupPrim(vertex, index, dscan, m_local);
m_ds->SetupPrim(data, vertex, index, dscan, m_local);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
@ -1107,7 +1104,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVer
int left = e->_pad.I32[1];
int top = e->_pad.I32[2];
DrawScanline(pixels, left, top, *e++);
DrawScanline(data, pixels, left, top, *e++);
} while (e < ee);
}
else
@ -1118,7 +1115,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVer
int left = e->_pad.I32[1];
int top = e->_pad.I32[2];
DrawEdge(pixels, left, top, *e++);
DrawEdge(data, pixels, left, top, *e++);
} while (e < ee);
}
@ -1132,7 +1129,7 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVer
#define PIXELS_PER_LOOP 4
#endif
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
void GSRasterizer::DrawScanline(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan)
{
if ((m_scanmsk_value & 2) && (m_scanmsk_value & 1) == (top & 1)) return;
m_pixels.actual += pixels;
@ -1141,10 +1138,10 @@ void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW&
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawScanline(pixels, left, top, scan, m_local);
GSDrawScanline::DrawScanline(data, pixels, left, top, scan, m_local);
}
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
void GSRasterizer::DrawEdge(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan)
{
if ((m_scanmsk_value & 2) && (m_scanmsk_value & 1) == (top & 1)) return;
m_pixels.actual += 1;
@ -1152,19 +1149,44 @@ void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& sca
ASSERT(m_pixels.actual <= m_pixels.total);
m_ds->DrawEdge(pixels, left, top, scan, m_local);
GSDrawScanline::DrawEdge(data, pixels, left, top, scan, m_local);
}
void GSRasterizer::Sync()
//
GSSingleRasterizer::GSSingleRasterizer()
{
m_ds = std::make_unique<GSDrawScanline>();
m_r = std::make_unique<GSRasterizer>(m_ds.get(), 0, 1);
}
GSSingleRasterizer::~GSSingleRasterizer()
{
m_r.reset();
m_ds.reset();
}
void GSSingleRasterizer::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data)
{
m_ds->SetupDraw(*data.get());
m_r->Draw(*data.get());
}
void GSSingleRasterizer::Sync()
{
}
bool GSRasterizer::IsSynced() const
bool GSSingleRasterizer::IsSynced() const
{
return true;
}
void GSRasterizer::PrintStats()
int GSSingleRasterizer::GetPixels(bool reset /*= true*/)
{
return m_r->GetPixels(reset);
}
void GSSingleRasterizer::PrintStats()
{
m_ds->PrintStats();
}
@ -1173,6 +1195,8 @@ void GSRasterizer::PrintStats()
GSRasterizerList::GSRasterizerList(int threads)
{
m_ds = std::make_unique<GSDrawScanline>();
m_thread_height = compute_best_thread_height(threads);
const int rows = (2048 >> m_thread_height) + 16;
@ -1189,6 +1213,12 @@ GSRasterizerList::GSRasterizerList(int threads)
GSRasterizerList::~GSRasterizerList()
{
PerformanceMetrics::SetGSSWThreadCount(0);
// Destruct draw scanline last.
m_workers.clear();
m_r.clear();
m_ds.reset();
_aligned_free(m_scanline);
}
@ -1222,6 +1252,8 @@ void GSRasterizerList::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data
{
GSVector4i r = data->bbox.rintersect(data->scissor);
m_ds->SetupDraw(*data.get());
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
int top = r.top >> m_thread_height;
@ -1277,17 +1309,17 @@ std::unique_ptr<IRasterizer> GSRasterizerList::Create(int threads)
if (threads == 0)
{
return std::make_unique<GSRasterizer>(new GSDrawScanline(), 0, 1);
return std::make_unique<GSSingleRasterizer>();
}
std::unique_ptr<GSRasterizerList> rl(new GSRasterizerList(threads));
for (int i = 0; i < threads; i++)
{
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(new GSDrawScanline(), i, threads)));
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(rl->m_ds.get(), i, threads)));
auto& r = *rl->m_r[i];
rl->m_workers.push_back(std::unique_ptr<GSWorker>(new GSWorker([i]() { GSRasterizerList::OnWorkerStartup(i); },
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(item.get()); },
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(*item.get()); },
[i]() { GSRasterizerList::OnWorkerShutdown(i); })));
}

View File

@ -71,19 +71,7 @@ public:
}
};
class IRasterizer : public GSVirtualAlignedClass<32>
{
public:
virtual ~IRasterizer() {}
virtual void Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data) = 0;
virtual void Sync() = 0;
virtual bool IsSynced() const = 0;
virtual int GetPixels(bool reset = true) = 0;
virtual void PrintStats() = 0;
};
class alignas(32) GSRasterizer final : public IRasterizer
class alignas(32) GSRasterizer final : public GSVirtualAlignedClass<32>
{
protected:
GSDrawScanline* m_ds;
@ -101,45 +89,69 @@ protected:
GSScanlineLocalData m_local = {};
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
// TODO: Make data pointer a class member?
// Or, at the very least, pull the function pointers out.
template <bool scissor_test>
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const u32* index);
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
void DrawSprite(const GSVertexSW* vertex, const u32* index);
void DrawPoint(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawLine(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawSprite(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
#if _M_SSE >= 0x501
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
__forceinline void DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
#else
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0);
__forceinline void DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0);
#endif
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
void DrawEdge(const GSRasterizerData& data, const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void Flush(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawScanline(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan);
public:
GSRasterizer(GSDrawScanline* ds, int id, int threads);
~GSRasterizer() override;
~GSRasterizer();
__forceinline bool IsOneOfMyScanlines(int top) const;
__forceinline bool IsOneOfMyScanlines(int top, int bottom) const;
__forceinline int FindMyNextScanline(int top) const;
void Draw(GSRasterizerData* data);
void Draw(GSRasterizerData& data);
int GetPixels(bool reset);
};
// IRasterizer
class IRasterizer : public GSVirtualAlignedClass<32>
{
public:
virtual ~IRasterizer() {}
virtual void Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data) = 0;
virtual void Sync() = 0;
virtual bool IsSynced() const = 0;
virtual int GetPixels(bool reset = true) = 0;
virtual void PrintStats() = 0;
};
class GSSingleRasterizer final : public IRasterizer
{
public:
GSSingleRasterizer();
~GSSingleRasterizer() override;
void Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data) override;
void Sync() override;
bool IsSynced() const override;
int GetPixels(bool reset) override;
int GetPixels(bool reset = true) override;
void PrintStats() override;
private:
// TODO: Get rid of indirection here
std::unique_ptr<GSDrawScanline> m_ds;
std::unique_ptr<GSRasterizer> m_r;
};
class GSRasterizerList final : public IRasterizer
@ -147,6 +159,8 @@ class GSRasterizerList final : public IRasterizer
protected:
using GSWorker = GSJobQueue<GSRingHeap::SharedPtr<GSRasterizerData>, 65536>;
std::unique_ptr<GSDrawScanline> m_ds;
// Worker threads depend on the rasterizers, so don't change the order.
std::vector<std::unique_ptr<GSRasterizer>> m_r;
std::vector<std::unique_ptr<GSWorker>> m_workers;