Tweaked the rasterizer to be about 10% faster in multi-threaded mode (2 or 3 threads), still far from optimal.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4308 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-02-17 18:22:47 +00:00
parent dea0f37ca9
commit 257d57ba52
11 changed files with 128 additions and 130 deletions

View File

@ -22,39 +22,41 @@
#include "StdAfx.h"
#include "GPUDrawScanline.h"
GPUDrawScanline::GPUDrawScanline(const GPUScanlineGlobalData* gd)
GPUDrawScanline::GPUDrawScanline()
: m_sp_map("GPUSetupPrim", &m_local)
, m_ds_map("GPUDrawScanline", &m_local)
{
memset(&m_local, 0, sizeof(m_local));
m_local.gd = gd;
m_local.gd = &m_global;
}
GPUDrawScanline::~GPUDrawScanline()
{
}
void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
void GPUDrawScanline::BeginDraw(const void* param)
{
if(m_local.gd->sel.tme && m_local.gd->sel.twin)
memcpy(&m_global, param, sizeof(m_global));
if(m_global.sel.tme && m_global.sel.twin)
{
uint32 u, v;
u = ~(m_local.gd->twin.x << 3) & 0xff; // TWW
v = ~(m_local.gd->twin.y << 3) & 0xff; // TWH
u = ~(m_global.twin.x << 3) & 0xff; // TWW
v = ~(m_global.twin.y << 3) & 0xff; // TWH
m_local.twin[0].u = GSVector4i((u << 16) | u);
m_local.twin[0].v = GSVector4i((v << 16) | v);
u = m_local.gd->twin.z << 3; // TWX
v = m_local.gd->twin.w << 3; // TWY
u = m_global.twin.z << 3; // TWX
v = m_global.twin.w << 3; // TWY
m_local.twin[1].u = GSVector4i((u << 16) | u) & ~m_local.twin[0].u;
m_local.twin[1].v = GSVector4i((v << 16) | v) & ~m_local.twin[0].v;
}
m_ds = m_ds_map[m_local.gd->sel];
m_ds = m_ds_map[m_global.sel];
m_de = NULL;
@ -66,10 +68,10 @@ void GPUDrawScanline::BeginDraw(const GSRasterizerData* data)
sel.key = 0;
sel.iip = m_local.gd->sel.iip;
sel.tfx = m_local.gd->sel.tfx;
sel.twin = m_local.gd->sel.twin;
sel.sprite = m_local.gd->sel.sprite;
sel.iip = m_global.sel.iip;
sel.tfx = m_global.sel.tfx;
sel.twin = m_global.sel.twin;
sel.sprite = m_global.sel.sprite;
m_sp = m_sp_map[sel];
}

View File

@ -29,18 +29,19 @@
class GPUDrawScanline : public IDrawScanline
{
GPUScanlineGlobalData m_global;
GPUScanlineLocalData m_local;
GSCodeGeneratorFunctionMap<GPUSetupPrimCodeGenerator, uint32, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GPUDrawScanlineCodeGenerator, uint32, DrawScanlinePtr> m_ds_map;
public:
GPUDrawScanline(const GPUScanlineGlobalData* gd);
GPUDrawScanline();
virtual ~GPUDrawScanline();
// IDrawScanline
void BeginDraw(const GSRasterizerData* data);
void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats() {m_ds_map.PrintStats();}
};

View File

@ -27,7 +27,7 @@ GPURendererSW::GPURendererSW(GSDevice* dev, int threads)
: GPURendererT(dev)
, m_texture(NULL)
{
m_rl.Create<GPUDrawScanline, GPUScanlineGlobalData>(threads);
m_rl.Create<GPUDrawScanline>(threads);
}
GPURendererSW::~GPURendererSW()
@ -129,32 +129,30 @@ void GPURendererSW::Draw()
default: __assume(0);
}
m_rl.Draw(&data);
// TODO: VertexTrace
// TODO
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 tl(+1e10f);
GSVector4 br(-1e10f);
GSVector4 p = m_vertices[i].p;
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 p = m_vertices[i].p;
tl = tl.min(p);
br = br.max(p);
}
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
r.left >>= m_scale.x;
r.top >>= m_scale.y;
r.right >>= m_scale.x;
r.bottom >>= m_scale.y;
Invalidate(r);
tl = tl.min(p);
br = br.max(p);
}
GSVector4i r = GSVector4i(tl.xyxy(br)).rintersect(data.scissor);
r.left >>= m_scale.x;
r.top >>= m_scale.y;
r.right >>= m_scale.x;
r.bottom >>= m_scale.y;
m_rl.Draw(&data, r.width(), r.height());
Invalidate(r);
m_rl.Sync();
GSRasterizerStats stats;

View File

@ -363,7 +363,7 @@ EXPORT_C GSvsync(int field)
{
#ifdef _WINDOWS
if( s_gs->m_wnd.IsManaged() )
if(s_gs->m_wnd.IsManaged())
{
MSG msg;
@ -375,6 +375,7 @@ EXPORT_C GSvsync(int field)
DispatchMessage(&msg);
}
}
#endif
s_gs->VSync(field);
@ -384,7 +385,7 @@ EXPORT_C_(uint32) GSmakeSnapshot(char* path)
{
string s(path);
if(s.back() != '\\')
if(!s.empty() && s[s.length() - 1] != '\\')
{
s = s + "\\";
}

View File

@ -23,28 +23,30 @@
#include "GSDrawScanline.h"
#include "GSTextureCacheSW.h"
GSDrawScanline::GSDrawScanline(GSScanlineGlobalData* gd)
GSDrawScanline::GSDrawScanline()
: m_sp_map("GSSetupPrim", &m_local)
, m_ds_map("GSDrawScanline", &m_local)
{
memset(&m_local, 0, sizeof(m_local));
m_local.gd = gd;
m_local.gd = &m_global;
}
GSDrawScanline::~GSDrawScanline()
{
}
void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
void GSDrawScanline::BeginDraw(const void* param)
{
m_ds = m_ds_map[m_local.gd->sel];
memcpy(&m_global, param, sizeof(m_global));
if(m_local.gd->sel.aa1)// && (m_state->m_perfmon.GetFrame() & 0x40))
m_ds = m_ds_map[m_global.sel];
if(m_global.sel.aa1)
{
GSScanlineSelector sel;
sel.key = m_local.gd->sel.key;
sel.key = m_global.sel.key;
sel.zwrite = 0;
sel.edge = 1;
@ -55,7 +57,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
m_de = NULL;
}
if(m_local.gd->sel.IsSolidRect())
if(m_global.sel.IsSolidRect())
{
m_dr = (DrawRectPtr)&GSDrawScanline::DrawRect;
}
@ -70,15 +72,15 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
sel.key = 0;
sel.iip = m_local.gd->sel.iip;
sel.tfx = m_local.gd->sel.tfx;
sel.tcc = m_local.gd->sel.tcc;
sel.fst = m_local.gd->sel.fst;
sel.fge = m_local.gd->sel.fge;
sel.sprite = m_local.gd->sel.sprite;
sel.fb = m_local.gd->sel.fb;
sel.zb = m_local.gd->sel.zb;
sel.zoverflow = m_local.gd->sel.zoverflow;
sel.iip = m_global.sel.iip;
sel.tfx = m_global.sel.tfx;
sel.tcc = m_global.sel.tcc;
sel.fst = m_global.sel.fst;
sel.fge = m_global.sel.fge;
sel.sprite = m_global.sel.sprite;
sel.fb = m_global.sel.fb;
sel.zb = m_global.sel.zb;
sel.zoverflow = m_global.sel.zoverflow;
m_sp = m_sp_map[sel];
}
@ -97,16 +99,16 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
uint32 m;
m = m_local.gd->zm.u32[0];
m = m_global.zm.u32[0];
if(m != 0xffffffff)
{
const int* zbr = m_local.gd->zbr;
const int* zbc = m_local.gd->zbc;
const int* zbr = m_global.zbr;
const int* zbc = m_global.zbc;
uint32 z = (uint32)v.p.z;
if(m_local.gd->sel.zpsm != 2)
if(m_global.sel.zpsm != 2)
{
if(m == 0)
{
@ -130,21 +132,21 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
}
}
m = m_local.gd->fm.u32[0];
m = m_global.fm.u32[0];
if(m != 0xffffffff)
{
const int* fbr = m_local.gd->fbr;
const int* fbc = m_local.gd->fbc;
const int* fbr = m_global.fbr;
const int* fbc = m_global.fbc;
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
if(m_local.gd->sel.fba)
if(m_global.sel.fba)
{
c |= 0x80000000;
}
if(m_local.gd->sel.fpsm != 2)
if(m_global.sel.fpsm != 2)
{
if(m == 0)
{
@ -213,7 +215,7 @@ void GSDrawScanline::FillRect(const int* RESTRICT row, const int* RESTRICT col,
{
if(r.x >= r.z) return;
T* vm = (T*)m_local.gd->vm;
T* vm = (T*)m_global.vm;
for(int y = r.y; y < r.w; y++)
{
@ -231,7 +233,7 @@ void GSDrawScanline::FillBlock(const int* RESTRICT row, const int* RESTRICT col,
{
if(r.x >= r.z) return;
T* vm = (T*)m_local.gd->vm;
T* vm = (T*)m_global.vm;
for(int y = r.y; y < r.w; y += 8)
{

View File

@ -29,6 +29,7 @@
class GSDrawScanline : public IDrawScanline
{
GSScanlineGlobalData m_global;
GSScanlineLocalData m_local;
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
@ -46,12 +47,12 @@ class GSDrawScanline : public IDrawScanline
__forceinline void FillBlock(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m);
public:
GSDrawScanline(GSScanlineGlobalData* gd);
GSDrawScanline();
virtual ~GSDrawScanline();
// IDrawScanline
void BeginDraw(const GSRasterizerData* data);
void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats() {m_ds_map.PrintStats();}
};

View File

@ -1844,26 +1844,10 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask);
if(m_cpu.has(util::Cpu::tSSE41))
{
vmovdqa(xmm0, ptr[&m_local.gd->t.mask]);
vmovdqa(xmm0, ptr[&m_local.gd->t.mask]);
vpblendvb(uv0, xmm1, xmm0);
vpblendvb(uv1, xmm6, xmm0);
}
else
{
vmovdqa(xmm0, ptr[&m_local.gd->t.invmask]);
vmovdqa(xmm4, xmm0);
vpand(uv0, xmm0);
vpandn(xmm0, xmm1);
vpor(uv0, xmm0);
vpand(uv1, xmm4);
vpandn(xmm4, xmm6);
vpor(uv1, xmm4);
}
vpblendvb(uv0, xmm1, xmm0);
vpblendvb(uv1, xmm6, xmm0);
}
}
else

View File

@ -46,10 +46,14 @@
static const int ThreadMaskConst = ThreadsConst - 1;
#endif
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
// align threads to page height (1 << 5)
#define THREAD_HEIGHT 5
GSRasterizer::GSRasterizer(IDrawScanline* ds)
: m_ds(ds)
, m_id(id)
, m_threads(threads)
, m_id(0)
, m_threads(1)
{
}
@ -66,14 +70,14 @@ __forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
#else
return m_threads == 1 || (scanline % m_threads) == m_id;
return m_threads == 1 || ((scanline >> THREAD_HEIGHT) % m_threads) == m_id;
#endif
}
void GSRasterizer::Draw(const GSRasterizerData* data)
{
m_ds->BeginDraw(data);
m_ds->BeginDraw(data->param);
const GSVector4i scissor = data->scissor;
const GSVertexSW* vertices = data->vertices;
@ -841,8 +845,8 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
//
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync)
: GSRasterizer(ds, id, threads)
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, HANDLE ready, volatile long& sync)
: GSRasterizer(ds)
, m_ready(ready)
, m_sync(sync)
, m_data(NULL)
@ -859,8 +863,8 @@ GSRasterizerMT::~GSRasterizerMT()
CloseThread();
DeleteObject(m_exit);
DeleteObject(m_draw);
CloseHandle(m_exit);
CloseHandle(m_draw);
}
void GSRasterizerMT::Draw(const GSRasterizerData* data)
@ -909,8 +913,6 @@ void GSRasterizerMT::ThreadProc()
GSRasterizerList::GSRasterizerList()
: m_sync(0)
, m_syncstart(0)
, m_param(NULL)
{
}
@ -919,8 +921,6 @@ GSRasterizerList::~GSRasterizerList()
for(size_t i = 0; i < size(); i++) delete (*this)[i];
for(size_t i = 0; i < m_ready.size(); i++) CloseHandle(m_ready[i]);
if(m_param) _aligned_free(m_param);
}
void GSRasterizerList::Sync()
@ -931,13 +931,16 @@ void GSRasterizerList::Sync()
#else
WaitForMultipleObjects(m_ready.size(), &m_ready[0], TRUE, INFINITE);
if(m_threads > 1)
{
WaitForMultipleObjects(m_threads - 1, &m_ready[0], TRUE, INFINITE);
}
#endif
m_stats.ticks = __rdtsc() - m_start;
for(size_t i = 0; i < size(); i++)
for(int i = 0; i < m_threads; i++)
{
GSRasterizerStats s;
@ -948,21 +951,34 @@ void GSRasterizerList::Sync()
}
}
void GSRasterizerList::Draw(const GSRasterizerData* data)
void GSRasterizerList::Draw(const GSRasterizerData* data, int width, int height)
{
m_stats.Reset();
memcpy(m_param, data->param, m_param_size);
m_start = __rdtsc();
m_sync = m_syncstart;
m_threads = std::min<int>(1 + (height >> THREAD_HEIGHT), size());
for(size_t i = 1; i < size(); i++)
#ifdef UseSpinningFinish
m_sync = 0;
for(int i = 1; i < m_threads; i++)
{
m_sync |= 1 << i;
}
#endif
for(int i = 1; i < m_threads; i++)
{
(*this)[i]->SetThreadId(i, m_threads);
(*this)[i]->Draw(data);
}
(*this)[0]->SetThreadId(0, m_threads);
(*this)[0]->Draw(data);
}

View File

@ -55,7 +55,7 @@ public:
IDrawScanline() : m_sp(NULL), m_ds(NULL), m_de(NULL), m_dr(NULL) {}
virtual ~IDrawScanline() {}
virtual void BeginDraw(const GSRasterizerData* data) = 0;
virtual void BeginDraw(const void* param) = 0;
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
virtual void PrintStats() = 0;
@ -76,6 +76,7 @@ public:
virtual void Draw(const GSRasterizerData* data) = 0;
virtual void GetStats(GSRasterizerStats& stats) = 0;
virtual void PrintStats() = 0;
virtual void SetThreadId(int id, int threads) = 0;
};
class GSRasterizer : public IRasterizer
@ -104,7 +105,7 @@ protected:
inline bool IsOneOfMyScanlines(int scanline) const;
public:
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
GSRasterizer(IDrawScanline* ds);
virtual ~GSRasterizer();
// IRasterizer
@ -112,6 +113,7 @@ public:
void Draw(const GSRasterizerData* data);
void GetStats(GSRasterizerStats& stats);
void PrintStats() {m_ds->PrintStats();}
void SetThreadId(int id, int threads) {m_id = id; m_threads = threads;}
};
class GSRasterizerMT : public GSRasterizer, private GSThread
@ -126,7 +128,7 @@ protected:
void ThreadProc();
public:
GSRasterizerMT(IDrawScanline* ds, int id, int threads, HANDLE ready, volatile long& sync);
GSRasterizerMT(IDrawScanline* ds, HANDLE ready, volatile long& sync);
virtual ~GSRasterizerMT();
// IRasterizer
@ -134,49 +136,38 @@ public:
void Draw(const GSRasterizerData* data);
};
class GSRasterizerList : protected vector<IRasterizer*>, public IRasterizer
class GSRasterizerList : protected vector<IRasterizer*>
{
protected:
std::vector<HANDLE> m_ready;
volatile long m_sync;
long m_syncstart;
GSRasterizerStats m_stats;
int64 m_start;
void* m_param;
size_t m_param_size;
int m_threads;
public:
GSRasterizerList();
virtual ~GSRasterizerList();
template<class DS, class PARAM> void Create(int threads)
template<class DS> void Create(int threads)
{
threads = std::max<int>(threads, 1); // TODO: min(threads, number of cpu cores)
m_param = _aligned_malloc(sizeof(PARAM), 32);
m_param_size = sizeof(PARAM);
m_syncstart = 0;
push_back(new GSRasterizer(new DS((PARAM*)m_param), 0, threads));
push_back(new GSRasterizer(new DS()));
for(int i = 1; i < threads; i++)
{
HANDLE ready = CreateEvent(NULL, FALSE, TRUE, NULL);
push_back(new GSRasterizerMT(new DS((PARAM*)m_param), i, threads, ready, m_sync));
push_back(new GSRasterizerMT(new DS(), ready, m_sync));
m_ready.push_back(ready);
_interlockedbittestandset(&m_syncstart, i);
}
}
void Sync();
// IRasterizer
void Draw(const GSRasterizerData* data);
void Draw(const GSRasterizerData* data, int width, int height);
void GetStats(GSRasterizerStats& stats);
void PrintStats();
};

View File

@ -31,7 +31,7 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_texture, 0, sizeof(m_texture));
m_rl.Create<GSDrawScanline, GSScanlineGlobalData>(threads);
m_rl.Create<GSDrawScanline>(threads);
InitVertexKick<GSRendererSW>();
}
@ -179,10 +179,10 @@ void GSRendererSW::Draw()
data.frame = m_perfmon.GetFrame();
data.param = &gd;
m_rl.Draw(&data);
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
m_rl.Draw(&data, r.width(), r.height());
if(gd.sel.fwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.fb, r);

View File

@ -760,7 +760,9 @@
</ClCompile>
<ClCompile Include="GSRendererHW.cpp" />
<ClCompile Include="GSRendererNull.cpp" />
<ClCompile Include="GSRendererSW.cpp" />
<ClCompile Include="GSRendererSW.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSSetting.cpp" />
<ClCompile Include="GSSettingsDlg.cpp" />
<ClCompile Include="GSSetupPrimCodeGenerator.cpp" />