mirror of https://github.com/PCSX2/pcsx2.git
Tweaked the rasterizer to be about 10% faster in multi-threaded mode (2 or 3 threads), still far from optimal.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4308 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
dea0f37ca9
commit
257d57ba52
|
@ -22,39 +22,41 @@
|
||||||
#include "StdAfx.h"
|
#include "StdAfx.h"
|
||||||
#include "GPUDrawScanline.h"
|
#include "GPUDrawScanline.h"
|
||||||
|
|
||||||
GPUDrawScanline::GPUDrawScanline(const GPUScanlineGlobalData* gd)
|
GPUDrawScanline::GPUDrawScanline()
|
||||||
: m_sp_map("GPUSetupPrim", &m_local)
|
: m_sp_map("GPUSetupPrim", &m_local)
|
||||||
, m_ds_map("GPUDrawScanline", &m_local)
|
, m_ds_map("GPUDrawScanline", &m_local)
|
||||||
{
|
{
|
||||||
memset(&m_local, 0, sizeof(m_local));
|
memset(&m_local, 0, sizeof(m_local));
|
||||||
|
|
||||||
m_local.gd = gd;
|
m_local.gd = &m_global;
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUDrawScanline::~GPUDrawScanline()
|
GPUDrawScanline::~GPUDrawScanline()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
void GPUDrawScanline::BeginDraw(const void* param)
|
||||||
{
|
{
|
||||||
if(m_local.gd->sel.tme && m_local.gd->sel.twin)
|
memcpy(&m_global, param, sizeof(m_global));
|
||||||
|
|
||||||
|
if(m_global.sel.tme && m_global.sel.twin)
|
||||||
{
|
{
|
||||||
uint32 u, v;
|
uint32 u, v;
|
||||||
|
|
||||||
u = ~(m_local.gd->twin.x << 3) & 0xff; // TWW
|
u = ~(m_global.twin.x << 3) & 0xff; // TWW
|
||||||
v = ~(m_local.gd->twin.y << 3) & 0xff; // TWH
|
v = ~(m_global.twin.y << 3) & 0xff; // TWH
|
||||||
|
|
||||||
m_local.twin[0].u = GSVector4i((u << 16) | u);
|
m_local.twin[0].u = GSVector4i((u << 16) | u);
|
||||||
m_local.twin[0].v = GSVector4i((v << 16) | v);
|
m_local.twin[0].v = GSVector4i((v << 16) | v);
|
||||||
|
|
||||||
u = m_local.gd->twin.z << 3; // TWX
|
u = m_global.twin.z << 3; // TWX
|
||||||
v = m_local.gd->twin.w << 3; // TWY
|
v = m_global.twin.w << 3; // TWY
|
||||||
|
|
||||||
m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u;
|
m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u;
|
||||||
m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v;
|
m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ds = m_ds_map[m_local.gd->sel];
|
m_ds = m_ds_map[m_global.sel];
|
||||||
|
|
||||||
m_de = NULL;
|
m_de = NULL;
|
||||||
|
|
||||||
|
@ -66,10 +68,10 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
|
|
||||||
sel.key = 0;
|
sel.key = 0;
|
||||||
|
|
||||||
sel.iip = m_local.gd->sel.iip;
|
sel.iip = m_global.sel.iip;
|
||||||
sel.tfx = m_local.gd->sel.tfx;
|
sel.tfx = m_global.sel.tfx;
|
||||||
sel.twin = m_local.gd->sel.twin;
|
sel.twin = m_global.sel.twin;
|
||||||
sel.sprite = m_local.gd->sel.sprite;
|
sel.sprite = m_global.sel.sprite;
|
||||||
|
|
||||||
m_sp = m_sp_map[sel];
|
m_sp = m_sp_map[sel];
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,18 +29,19 @@
|
||||||
|
|
||||||
class GPUDrawScanline : public IDrawScanline
|
class GPUDrawScanline : public IDrawScanline
|
||||||
{
|
{
|
||||||
|
GPUScanlineGlobalData m_global;
|
||||||
GPUScanlineLocalData m_local;
|
GPUScanlineLocalData m_local;
|
||||||
|
|
||||||
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
|
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
|
||||||
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
|
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GPUDrawScanline(const GPUScanlineGlobalData* gd);
|
GPUDrawScanline();
|
||||||
virtual ~GPUDrawScanline();
|
virtual ~GPUDrawScanline();
|
||||||
|
|
||||||
// IDrawScanline
|
// IDrawScanline
|
||||||
|
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const void* param);
|
||||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||||
void PrintStats() {m_ds_map.PrintStats();}
|
void PrintStats() {m_ds_map.PrintStats();}
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,7 +27,7 @@ GPURendererSW::GPURendererSW(GSDevice* dev, int threads)
|
||||||
: GPURendererT(dev)
|
: GPURendererT(dev)
|
||||||
, m_texture(NULL)
|
, m_texture(NULL)
|
||||||
{
|
{
|
||||||
m_rl.Create<GPUDrawScanline, GPUScanlineGlobalData>(threads);
|
m_rl.Create<GPUDrawScanline>(threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPURendererSW::~GPURendererSW()
|
GPURendererSW::~GPURendererSW()
|
||||||
|
@ -129,32 +129,30 @@ void GPURendererSW::Draw()
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_rl.Draw(&data);
|
// TODO: VertexTrace
|
||||||
|
|
||||||
// TODO
|
GSVector4 tl(+1e10f);
|
||||||
|
GSVector4 br(-1e10f);
|
||||||
|
|
||||||
|
for(int i = 0, j = m_count; i < j; i++)
|
||||||
{
|
{
|
||||||
GSVector4 tl(+1e10f);
|
GSVector4 p = m_vertices[i].p;
|
||||||
GSVector4 br(-1e10f);
|
|
||||||
|
|
||||||
for(int i = 0, j = m_count; i < j; i++)
|
tl = tl.min(p);
|
||||||
{
|
br = br.max(p);
|
||||||
GSVector4 p = m_vertices[i].p;
|
|
||||||
|
|
||||||
tl = tl.min(p);
|
|
||||||
br = br.max(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
|
|
||||||
|
|
||||||
r.left >>= m_scale.x;
|
|
||||||
r.top >>= m_scale.y;
|
|
||||||
r.right >>= m_scale.x;
|
|
||||||
r.bottom >>= m_scale.y;
|
|
||||||
|
|
||||||
Invalidate(r);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
|
||||||
|
|
||||||
|
r.left >>= m_scale.x;
|
||||||
|
r.top >>= m_scale.y;
|
||||||
|
r.right >>= m_scale.x;
|
||||||
|
r.bottom >>= m_scale.y;
|
||||||
|
|
||||||
|
m_rl.Draw(&data, r.width(), r.height());
|
||||||
|
|
||||||
|
Invalidate(r);
|
||||||
|
|
||||||
m_rl.Sync();
|
m_rl.Sync();
|
||||||
|
|
||||||
GSRasterizerStats stats;
|
GSRasterizerStats stats;
|
||||||
|
|
|
@ -363,7 +363,7 @@ EXPORT_C GSvsync(int field)
|
||||||
{
|
{
|
||||||
#ifdef _WINDOWS
|
#ifdef _WINDOWS
|
||||||
|
|
||||||
if( s_gs->m_wnd.IsManaged() )
|
if(s_gs->m_wnd.IsManaged())
|
||||||
{
|
{
|
||||||
MSG msg;
|
MSG msg;
|
||||||
|
|
||||||
|
@ -375,6 +375,7 @@ EXPORT_C GSvsync(int field)
|
||||||
DispatchMessage(&msg);
|
DispatchMessage(&msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
s_gs->VSync(field);
|
s_gs->VSync(field);
|
||||||
|
@ -384,7 +385,7 @@ EXPORT_C_(uint32) GSmakeSnapshot(char* path)
|
||||||
{
|
{
|
||||||
string s(path);
|
string s(path);
|
||||||
|
|
||||||
if(s.back() != '\\')
|
if(!s.empty() && s[s.length() - 1] != '\\')
|
||||||
{
|
{
|
||||||
s = s + "\\";
|
s = s + "\\";
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,28 +23,30 @@
|
||||||
#include "GSDrawScanline.h"
|
#include "GSDrawScanline.h"
|
||||||
#include "GSTextureCacheSW.h"
|
#include "GSTextureCacheSW.h"
|
||||||
|
|
||||||
GSDrawScanline::GSDrawScanline(GSScanlineGlobalData* gd)
|
GSDrawScanline::GSDrawScanline()
|
||||||
: m_sp_map("GSSetupPrim", &m_local)
|
: m_sp_map("GSSetupPrim", &m_local)
|
||||||
, m_ds_map("GSDrawScanline", &m_local)
|
, m_ds_map("GSDrawScanline", &m_local)
|
||||||
{
|
{
|
||||||
memset(&m_local, 0, sizeof(m_local));
|
memset(&m_local, 0, sizeof(m_local));
|
||||||
|
|
||||||
m_local.gd = gd;
|
m_local.gd = &m_global;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSDrawScanline::~GSDrawScanline()
|
GSDrawScanline::~GSDrawScanline()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
void GSDrawScanline::BeginDraw(const void* param)
|
||||||
{
|
{
|
||||||
m_ds = m_ds_map[m_local.gd->sel];
|
memcpy(&m_global, param, sizeof(m_global));
|
||||||
|
|
||||||
if(m_local.gd->sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
|
m_ds = m_ds_map[m_global.sel];
|
||||||
|
|
||||||
|
if(m_global.sel.aa1)
|
||||||
{
|
{
|
||||||
GSScanlineSelector sel;
|
GSScanlineSelector sel;
|
||||||
|
|
||||||
sel.key = m_local.gd->sel.key;
|
sel.key = m_global.sel.key;
|
||||||
sel.zwrite = 0;
|
sel.zwrite = 0;
|
||||||
sel.edge = 1;
|
sel.edge = 1;
|
||||||
|
|
||||||
|
@ -55,7 +57,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
m_de = NULL;
|
m_de = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_local.gd->sel.IsSolidRect())
|
if(m_global.sel.IsSolidRect())
|
||||||
{
|
{
|
||||||
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
|
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
|
||||||
}
|
}
|
||||||
|
@ -70,15 +72,15 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
|
|
||||||
sel.key = 0;
|
sel.key = 0;
|
||||||
|
|
||||||
sel.iip = m_local.gd->sel.iip;
|
sel.iip = m_global.sel.iip;
|
||||||
sel.tfx = m_local.gd->sel.tfx;
|
sel.tfx = m_global.sel.tfx;
|
||||||
sel.tcc = m_local.gd->sel.tcc;
|
sel.tcc = m_global.sel.tcc;
|
||||||
sel.fst = m_local.gd->sel.fst;
|
sel.fst = m_global.sel.fst;
|
||||||
sel.fge = m_local.gd->sel.fge;
|
sel.fge = m_global.sel.fge;
|
||||||
sel.sprite = m_local.gd->sel.sprite;
|
sel.sprite = m_global.sel.sprite;
|
||||||
sel.fb = m_local.gd->sel.fb;
|
sel.fb = m_global.sel.fb;
|
||||||
sel.zb = m_local.gd->sel.zb;
|
sel.zb = m_global.sel.zb;
|
||||||
sel.zoverflow = m_local.gd->sel.zoverflow;
|
sel.zoverflow = m_global.sel.zoverflow;
|
||||||
|
|
||||||
m_sp = m_sp_map[sel];
|
m_sp = m_sp_map[sel];
|
||||||
}
|
}
|
||||||
|
@ -97,16 +99,16 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
uint32 m;
|
uint32 m;
|
||||||
|
|
||||||
m = m_local.gd->zm.u32[0];
|
m = m_global.zm.u32[0];
|
||||||
|
|
||||||
if(m != 0xffffffff)
|
if(m != 0xffffffff)
|
||||||
{
|
{
|
||||||
const int* zbr = m_local.gd->zbr;
|
const int* zbr = m_global.zbr;
|
||||||
const int* zbc = m_local.gd->zbc;
|
const int* zbc = m_global.zbc;
|
||||||
|
|
||||||
uint32 z = (uint32)v.p.z;
|
uint32 z = (uint32)v.p.z;
|
||||||
|
|
||||||
if(m_local.gd->sel.zpsm != 2)
|
if(m_global.sel.zpsm != 2)
|
||||||
{
|
{
|
||||||
if(m == 0)
|
if(m == 0)
|
||||||
{
|
{
|
||||||
|
@ -130,21 +132,21 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m = m_local.gd->fm.u32[0];
|
m = m_global.fm.u32[0];
|
||||||
|
|
||||||
if(m != 0xffffffff)
|
if(m != 0xffffffff)
|
||||||
{
|
{
|
||||||
const int* fbr = m_local.gd->fbr;
|
const int* fbr = m_global.fbr;
|
||||||
const int* fbc = m_local.gd->fbc;
|
const int* fbc = m_global.fbc;
|
||||||
|
|
||||||
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
||||||
|
|
||||||
if(m_local.gd->sel.fba)
|
if(m_global.sel.fba)
|
||||||
{
|
{
|
||||||
c |= 0x80000000;
|
c |= 0x80000000;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_local.gd->sel.fpsm != 2)
|
if(m_global.sel.fpsm != 2)
|
||||||
{
|
{
|
||||||
if(m == 0)
|
if(m == 0)
|
||||||
{
|
{
|
||||||
|
@ -213,7 +215,7 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
{
|
{
|
||||||
if(r.x >= r.z) return;
|
if(r.x >= r.z) return;
|
||||||
|
|
||||||
T* vm = (T*)m_local.gd->vm;
|
T* vm = (T*)m_global.vm;
|
||||||
|
|
||||||
for(int y = r.y; y < r.w; y++)
|
for(int y = r.y; y < r.w; y++)
|
||||||
{
|
{
|
||||||
|
@ -231,7 +233,7 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
|
||||||
{
|
{
|
||||||
if(r.x >= r.z) return;
|
if(r.x >= r.z) return;
|
||||||
|
|
||||||
T* vm = (T*)m_local.gd->vm;
|
T* vm = (T*)m_global.vm;
|
||||||
|
|
||||||
for(int y = r.y; y < r.w; y += 8)
|
for(int y = r.y; y < r.w; y += 8)
|
||||||
{
|
{
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
class GSDrawScanline : public IDrawScanline
|
class GSDrawScanline : public IDrawScanline
|
||||||
{
|
{
|
||||||
|
GSScanlineGlobalData m_global;
|
||||||
GSScanlineLocalData m_local;
|
GSScanlineLocalData m_local;
|
||||||
|
|
||||||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
||||||
|
@ -46,12 +47,12 @@ class GSDrawScanline : public IDrawScanline
|
||||||
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanline(GSScanlineGlobalData* gd);
|
GSDrawScanline();
|
||||||
virtual ~GSDrawScanline();
|
virtual ~GSDrawScanline();
|
||||||
|
|
||||||
// IDrawScanline
|
// IDrawScanline
|
||||||
|
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const void* param);
|
||||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||||
void PrintStats() {m_ds_map.PrintStats();}
|
void PrintStats() {m_ds_map.PrintStats();}
|
||||||
};
|
};
|
||||||
|
|
|
@ -1844,26 +1844,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
||||||
|
|
||||||
// clamp.blend8(repeat, m_local.gd->t.mask);
|
// clamp.blend8(repeat, m_local.gd->t.mask);
|
||||||
|
|
||||||
if(m_cpu.has(util::Cpu::tSSE41))
|
vmovdqa(xmm0, ptr[&m_local.gd->t.mask]);
|
||||||
{
|
|
||||||
vmovdqa(xmm0, ptr[&m_local.gd->t.mask]);
|
|
||||||
|
|
||||||
vpblendvb(uv0, xmm1, xmm0);
|
vpblendvb(uv0, xmm1, xmm0);
|
||||||
vpblendvb(uv1, xmm6, xmm0);
|
vpblendvb(uv1, xmm6, xmm0);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
vmovdqa(xmm0, ptr[&m_local.gd->t.invmask]);
|
|
||||||
vmovdqa(xmm4, xmm0);
|
|
||||||
|
|
||||||
vpand(uv0, xmm0);
|
|
||||||
vpandn(xmm0, xmm1);
|
|
||||||
vpor(uv0, xmm0);
|
|
||||||
|
|
||||||
vpand(uv1, xmm4);
|
|
||||||
vpandn(xmm4, xmm6);
|
|
||||||
vpor(uv1, xmm4);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -46,10 +46,14 @@
|
||||||
static const int ThreadMaskConst = ThreadsConst - 1;
|
static const int ThreadMaskConst = ThreadsConst - 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
// align threads to page height (1 << 5)
|
||||||
|
|
||||||
|
#define THREAD_HEIGHT 5
|
||||||
|
|
||||||
|
GSRasterizer::GSRasterizer(IDrawScanline* ds)
|
||||||
: m_ds(ds)
|
: m_ds(ds)
|
||||||
, m_id(id)
|
, m_id(0)
|
||||||
, m_threads(threads)
|
, m_threads(1)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,14 +70,14 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
return m_threads == 1 || (scanline % m_threads) == m_id;
|
return m_threads == 1 || ((scanline >> THREAD_HEIGHT) % m_threads) == m_id;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
m_ds->BeginDraw(data);
|
m_ds->BeginDraw(data->param);
|
||||||
|
|
||||||
const GSVector4i scissor = data->scissor;
|
const GSVector4i scissor = data->scissor;
|
||||||
const GSVertexSW* vertices = data->vertices;
|
const GSVertexSW* vertices = data->vertices;
|
||||||
|
@ -841,8 +845,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync)
|
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, HANDLE ready, volatile long& sync)
|
||||||
: GSRasterizer(ds, id, threads)
|
: GSRasterizer(ds)
|
||||||
, m_ready(ready)
|
, m_ready(ready)
|
||||||
, m_sync(sync)
|
, m_sync(sync)
|
||||||
, m_data(NULL)
|
, m_data(NULL)
|
||||||
|
@ -859,8 +863,8 @@ GSRasterizerMT::~GSRasterizerMT()
|
||||||
|
|
||||||
CloseThread();
|
CloseThread();
|
||||||
|
|
||||||
DeleteObject(m_exit);
|
CloseHandle(m_exit);
|
||||||
DeleteObject(m_draw);
|
CloseHandle(m_draw);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerMT::Draw(const GSRasterizerData* data)
|
void GSRasterizerMT::Draw(const GSRasterizerData* data)
|
||||||
|
@ -909,8 +913,6 @@ void GSRasterizerMT::ThreadProc()
|
||||||
|
|
||||||
GSRasterizerList::GSRasterizerList()
|
GSRasterizerList::GSRasterizerList()
|
||||||
: m_sync(0)
|
: m_sync(0)
|
||||||
, m_syncstart(0)
|
|
||||||
, m_param(NULL)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -919,8 +921,6 @@ GSRasterizerList::~GSRasterizerList()
|
||||||
for(size_t i = 0; i < size(); i++) delete (*this)[i];
|
for(size_t i = 0; i < size(); i++) delete (*this)[i];
|
||||||
|
|
||||||
for(size_t i = 0; i < m_ready.size(); i++) CloseHandle(m_ready[i]);
|
for(size_t i = 0; i < m_ready.size(); i++) CloseHandle(m_ready[i]);
|
||||||
|
|
||||||
if(m_param) _aligned_free(m_param);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerList::Sync()
|
void GSRasterizerList::Sync()
|
||||||
|
@ -931,13 +931,16 @@ void GSRasterizerList::Sync()
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
WaitForMultipleObjects(m_ready.size(), &m_ready[0], TRUE, INFINITE);
|
if(m_threads > 1)
|
||||||
|
{
|
||||||
|
WaitForMultipleObjects(m_threads - 1, &m_ready[0], TRUE, INFINITE);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
m_stats.ticks = __rdtsc() - m_start;
|
m_stats.ticks = __rdtsc() - m_start;
|
||||||
|
|
||||||
for(size_t i = 0; i < size(); i++)
|
for(int i = 0; i < m_threads; i++)
|
||||||
{
|
{
|
||||||
GSRasterizerStats s;
|
GSRasterizerStats s;
|
||||||
|
|
||||||
|
@ -948,21 +951,34 @@ void GSRasterizerList::Sync()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
void GSRasterizerList::Draw(const GSRasterizerData* data, int width, int height)
|
||||||
{
|
{
|
||||||
m_stats.Reset();
|
m_stats.Reset();
|
||||||
|
|
||||||
memcpy(m_param, data->param, m_param_size);
|
|
||||||
|
|
||||||
m_start = __rdtsc();
|
m_start = __rdtsc();
|
||||||
|
|
||||||
m_sync = m_syncstart;
|
m_threads = std::min<int>(1 + (height >> THREAD_HEIGHT), size());
|
||||||
|
|
||||||
for(size_t i = 1; i < size(); i++)
|
#ifdef UseSpinningFinish
|
||||||
|
|
||||||
|
m_sync = 0;
|
||||||
|
|
||||||
|
for(int i = 1; i < m_threads; i++)
|
||||||
{
|
{
|
||||||
|
m_sync |= 1 << i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(int i = 1; i < m_threads; i++)
|
||||||
|
{
|
||||||
|
(*this)[i]->SetThreadId(i, m_threads);
|
||||||
|
|
||||||
(*this)[i]->Draw(data);
|
(*this)[i]->Draw(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*this)[0]->SetThreadId(0, m_threads);
|
||||||
|
|
||||||
(*this)[0]->Draw(data);
|
(*this)[0]->Draw(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@ public:
|
||||||
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
|
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
|
||||||
virtual ~IDrawScanline() {}
|
virtual ~IDrawScanline() {}
|
||||||
|
|
||||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
virtual void BeginDraw(const void* param) = 0;
|
||||||
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
||||||
virtual void PrintStats() = 0;
|
virtual void PrintStats() = 0;
|
||||||
|
|
||||||
|
@ -76,6 +76,7 @@ public:
|
||||||
virtual void Draw(const GSRasterizerData* data) = 0;
|
virtual void Draw(const GSRasterizerData* data) = 0;
|
||||||
virtual void GetStats(GSRasterizerStats& stats) = 0;
|
virtual void GetStats(GSRasterizerStats& stats) = 0;
|
||||||
virtual void PrintStats() = 0;
|
virtual void PrintStats() = 0;
|
||||||
|
virtual void SetThreadId(int id, int threads) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizer : public IRasterizer
|
class GSRasterizer : public IRasterizer
|
||||||
|
@ -104,7 +105,7 @@ protected:
|
||||||
inline bool IsOneOfMyScanlines(int scanline) const;
|
inline bool IsOneOfMyScanlines(int scanline) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
|
GSRasterizer(IDrawScanline* ds);
|
||||||
virtual ~GSRasterizer();
|
virtual ~GSRasterizer();
|
||||||
|
|
||||||
// IRasterizer
|
// IRasterizer
|
||||||
|
@ -112,6 +113,7 @@ public:
|
||||||
void Draw(const GSRasterizerData* data);
|
void Draw(const GSRasterizerData* data);
|
||||||
void GetStats(GSRasterizerStats& stats);
|
void GetStats(GSRasterizerStats& stats);
|
||||||
void PrintStats() {m_ds->PrintStats();}
|
void PrintStats() {m_ds->PrintStats();}
|
||||||
|
void SetThreadId(int id, int threads) {m_id = id; m_threads = threads;}
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizerMT : public GSRasterizer, private GSThread
|
class GSRasterizerMT : public GSRasterizer, private GSThread
|
||||||
|
@ -126,7 +128,7 @@ protected:
|
||||||
void ThreadProc();
|
void ThreadProc();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync);
|
GSRasterizerMT(IDrawScanline* ds, HANDLE ready, volatile long& sync);
|
||||||
virtual ~GSRasterizerMT();
|
virtual ~GSRasterizerMT();
|
||||||
|
|
||||||
// IRasterizer
|
// IRasterizer
|
||||||
|
@ -134,49 +136,38 @@ public:
|
||||||
void Draw(const GSRasterizerData* data);
|
void Draw(const GSRasterizerData* data);
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizerList : protected vector<IRasterizer*>, public IRasterizer
|
class GSRasterizerList : protected vector<IRasterizer*>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
std::vector<HANDLE> m_ready;
|
std::vector<HANDLE> m_ready;
|
||||||
volatile long m_sync;
|
volatile long m_sync;
|
||||||
long m_syncstart;
|
|
||||||
GSRasterizerStats m_stats;
|
GSRasterizerStats m_stats;
|
||||||
int64 m_start;
|
int64 m_start;
|
||||||
void* m_param;
|
int m_threads;
|
||||||
size_t m_param_size;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizerList();
|
GSRasterizerList();
|
||||||
virtual ~GSRasterizerList();
|
virtual ~GSRasterizerList();
|
||||||
|
|
||||||
template<class DS, class PARAM> void Create(int threads)
|
template<class DS> void Create(int threads)
|
||||||
{
|
{
|
||||||
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
|
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
|
||||||
|
|
||||||
m_param = _aligned_malloc(sizeof(PARAM), 32);
|
push_back(new GSRasterizer(new DS()));
|
||||||
m_param_size = sizeof(PARAM);
|
|
||||||
|
|
||||||
m_syncstart = 0;
|
|
||||||
|
|
||||||
push_back(new GSRasterizer(new DS((PARAM*)m_param), 0, threads));
|
|
||||||
|
|
||||||
for(int i = 1; i < threads; i++)
|
for(int i = 1; i < threads; i++)
|
||||||
{
|
{
|
||||||
HANDLE ready = CreateEvent(NULL, FALSE, TRUE, NULL);
|
HANDLE ready = CreateEvent(NULL, FALSE, TRUE, NULL);
|
||||||
|
|
||||||
push_back(new GSRasterizerMT(new DS((PARAM*)m_param), i, threads, ready, m_sync));
|
push_back(new GSRasterizerMT(new DS(), ready, m_sync));
|
||||||
|
|
||||||
m_ready.push_back(ready);
|
m_ready.push_back(ready);
|
||||||
|
|
||||||
_interlockedbittestandset(&m_syncstart, i);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Sync();
|
void Sync();
|
||||||
|
|
||||||
// IRasterizer
|
void Draw(const GSRasterizerData* data, int width, int height);
|
||||||
|
|
||||||
void Draw(const GSRasterizerData* data);
|
|
||||||
void GetStats(GSRasterizerStats& stats);
|
void GetStats(GSRasterizerStats& stats);
|
||||||
void PrintStats();
|
void PrintStats();
|
||||||
};
|
};
|
||||||
|
|
|
@ -31,7 +31,7 @@ GSRendererSW::GSRendererSW(int threads)
|
||||||
|
|
||||||
memset(m_texture, 0, sizeof(m_texture));
|
memset(m_texture, 0, sizeof(m_texture));
|
||||||
|
|
||||||
m_rl.Create<GSDrawScanline, GSScanlineGlobalData>(threads);
|
m_rl.Create<GSDrawScanline>(threads);
|
||||||
|
|
||||||
InitVertexKick<GSRendererSW>();
|
InitVertexKick<GSRendererSW>();
|
||||||
}
|
}
|
||||||
|
@ -179,10 +179,10 @@ void GSRendererSW::Draw()
|
||||||
data.frame = m_perfmon.GetFrame();
|
data.frame = m_perfmon.GetFrame();
|
||||||
data.param = &gd;
|
data.param = &gd;
|
||||||
|
|
||||||
m_rl.Draw(&data);
|
|
||||||
|
|
||||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
|
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
|
||||||
|
|
||||||
|
m_rl.Draw(&data, r.width(), r.height());
|
||||||
|
|
||||||
if(gd.sel.fwrite)
|
if(gd.sel.fwrite)
|
||||||
{
|
{
|
||||||
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
|
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
|
||||||
|
|
|
@ -760,7 +760,9 @@
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="GSRendererHW.cpp" />
|
<ClCompile Include="GSRendererHW.cpp" />
|
||||||
<ClCompile Include="GSRendererNull.cpp" />
|
<ClCompile Include="GSRendererNull.cpp" />
|
||||||
<ClCompile Include="GSRendererSW.cpp" />
|
<ClCompile Include="GSRendererSW.cpp">
|
||||||
|
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="GSSetting.cpp" />
|
<ClCompile Include="GSSetting.cpp" />
|
||||||
<ClCompile Include="GSSettingsDlg.cpp" />
|
<ClCompile Include="GSSettingsDlg.cpp" />
|
||||||
<ClCompile Include="GSSetupPrimCodeGenerator.cpp" />
|
<ClCompile Include="GSSetupPrimCodeGenerator.cpp" />
|
||||||
|
|
Loading…
Reference in New Issue