mirror of https://github.com/PCSX2/pcsx2.git
GSdx: SW rasterizer converted to use pthreads semaphores in the place of spinwaits. Performance mileage will vary on this; probably favors dual core machines over quads or i7's. Some tinkering might ink some more fps out of it and get it to be a speedup in all cases though.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2296 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
2413af5b6e
commit
97041701ae
|
@ -24,6 +24,24 @@
|
||||||
#include "StdAfx.h"
|
#include "StdAfx.h"
|
||||||
#include "GSRasterizer.h"
|
#include "GSRasterizer.h"
|
||||||
|
|
||||||
|
#include "pthread.h"
|
||||||
|
|
||||||
|
// Using a spinning finish on the main (MTGS) thread is apparently a big win still, over trying
|
||||||
|
// to wait out all the pending m_finished semaphores. It leaves one spinwait in the rasterizer,
|
||||||
|
// but that's still worlds better than 2-6 spinning threads like before.
|
||||||
|
#define UseSpinningFinish 1
|
||||||
|
|
||||||
|
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
|
||||||
|
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
|
||||||
|
#define UseConstThreadCount 0
|
||||||
|
|
||||||
|
#if !UseConstThreadCount
|
||||||
|
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
|
||||||
|
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
|
||||||
|
static const int ThreadsConst = 2;
|
||||||
|
static const int ThreadMaskConst = ThreadsConst-1;
|
||||||
|
#endif
|
||||||
|
|
||||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads)
|
||||||
: m_ds(ds)
|
: m_ds(ds)
|
||||||
, m_id(id)
|
, m_id(id)
|
||||||
|
@ -36,6 +54,15 @@ GSRasterizer::~GSRasterizer()
|
||||||
delete m_ds;
|
delete m_ds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||||
|
{
|
||||||
|
#if UseConstThreadCount
|
||||||
|
return (ThreadMaskConst==0) || ((scanline & ThreadMaskConst) == m_id);
|
||||||
|
#else
|
||||||
|
return (scanline % m_threads) == m_id;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
m_dsf.ssl = NULL;
|
m_dsf.ssl = NULL;
|
||||||
|
@ -96,7 +123,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* v, const GSVector4i& scissor)
|
||||||
|
|
||||||
if(scissor.left <= p.x && p.x < scissor.right && scissor.top <= p.y && p.y < scissor.bottom)
|
if(scissor.left <= p.x && p.x < scissor.right && scissor.top <= p.y && p.y < scissor.bottom)
|
||||||
{
|
{
|
||||||
if((p.y % m_threads) == m_id)
|
if(IsOneOfMyScanlines(p.y))
|
||||||
{
|
{
|
||||||
m_dsf.ssp(v, *v);
|
m_dsf.ssp(v, *v);
|
||||||
|
|
||||||
|
@ -458,7 +485,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if((top % m_threads) == m_id)
|
if(IsOneOfMyScanlines(top))
|
||||||
{
|
{
|
||||||
GSVector4 lr = l.p.xyxy(r).ceil();
|
GSVector4 lr = l.p.xyxy(r).ceil();
|
||||||
|
|
||||||
|
@ -499,7 +526,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if((top % m_threads) == m_id)
|
if(IsOneOfMyScanlines(top))
|
||||||
{
|
{
|
||||||
GSVector4 lr = l.p.ceil();
|
GSVector4 lr = l.p.ceil();
|
||||||
|
|
||||||
|
@ -586,7 +613,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices, const GSVector4i& scis
|
||||||
|
|
||||||
for(; r.top < r.bottom; r.top++, scan.t += dedge.t)
|
for(; r.top < r.bottom; r.top++, scan.t += dedge.t)
|
||||||
{
|
{
|
||||||
if((r.top % m_threads) == m_id)
|
if(IsOneOfMyScanlines(r.top))
|
||||||
{
|
{
|
||||||
m_dsf.ssl(r.right, r.left, r.top, scan);
|
m_dsf.ssl(r.right, r.left, r.top, scan);
|
||||||
|
|
||||||
|
@ -661,7 +688,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int xi = x >> 16;
|
int xi = x >> 16;
|
||||||
int xf = x & 0xffff;
|
int xf = x & 0xffff;
|
||||||
|
|
||||||
if(scissor.left <= xi && xi < scissor.right && (xi % m_threads) == m_id)
|
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
|
@ -689,7 +716,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int xi = (x >> 16) + 1;
|
int xi = (x >> 16) + 1;
|
||||||
int xf = x & 0xffff;
|
int xf = x & 0xffff;
|
||||||
|
|
||||||
if(scissor.left <= xi && xi < scissor.right && (xi % m_threads) == m_id)
|
if(scissor.left <= xi && xi < scissor.right && IsOneOfMyScanlines(xi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
|
@ -759,7 +786,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int yi = y >> 16;
|
int yi = y >> 16;
|
||||||
int yf = y & 0xffff;
|
int yf = y & 0xffff;
|
||||||
|
|
||||||
if(scissor.top <= yi && yi < scissor.bottom && (yi % m_threads) == m_id)
|
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
|
@ -787,7 +814,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
int yi = (y >> 16) + 1;
|
int yi = (y >> 16) + 1;
|
||||||
int yf = y & 0xffff;
|
int yf = y & 0xffff;
|
||||||
|
|
||||||
if(scissor.top <= yi && yi < scissor.bottom && (yi % m_threads) == m_id)
|
if(scissor.top <= yi && yi < scissor.bottom && IsOneOfMyScanlines(yi))
|
||||||
{
|
{
|
||||||
m_stats.pixels++;
|
m_stats.pixels++;
|
||||||
|
|
||||||
|
@ -811,108 +838,108 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, long* sync)
|
GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, int id, int threads, sem_t& finished, volatile long& sync)
|
||||||
: GSRasterizer(ds, id, threads)
|
: GSRasterizer(ds, id, threads)
|
||||||
|
, m_finished(finished)
|
||||||
, m_sync(sync)
|
, m_sync(sync)
|
||||||
, m_exit(false)
|
, m_exit(false)
|
||||||
, m_data(NULL)
|
, m_data(NULL)
|
||||||
{
|
{
|
||||||
if(id > 0)
|
sem_init(&m_semaphore, false, 0);
|
||||||
{
|
sem_init(&m_stopped, false, 0);
|
||||||
CreateThread();
|
CreateThread();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSRasterizerMT::~GSRasterizerMT()
|
GSRasterizerMT::~GSRasterizerMT()
|
||||||
{
|
{
|
||||||
m_exit = true;
|
m_exit = true;
|
||||||
|
sem_post(&m_semaphore);
|
||||||
|
sem_wait(&m_stopped);
|
||||||
|
|
||||||
|
sem_destroy(&m_semaphore);
|
||||||
|
sem_destroy(&m_stopped);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerMT::Draw(const GSRasterizerData* data)
|
void GSRasterizerMT::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
if(m_id == 0)
|
m_data = data;
|
||||||
{
|
sem_post(&m_semaphore);
|
||||||
__super::Draw(data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_data = data;
|
|
||||||
|
|
||||||
_interlockedbittestandset(m_sync, m_id);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerMT::ThreadProc()
|
void GSRasterizerMT::ThreadProc()
|
||||||
{
|
{
|
||||||
// _mm_setcsr(MXCSR);
|
// _mm_setcsr(MXCSR);
|
||||||
|
|
||||||
while(!m_exit)
|
while( true )
|
||||||
{
|
{
|
||||||
if(*m_sync & (1 << m_id))
|
sem_wait(&m_semaphore);
|
||||||
{
|
|
||||||
__super::Draw(m_data);
|
|
||||||
|
|
||||||
_interlockedbittestandreset(m_sync, m_id);
|
if(m_exit) break;
|
||||||
}
|
|
||||||
|
__super::Draw(m_data);
|
||||||
|
|
||||||
|
if( UseSpinningFinish )
|
||||||
|
_interlockedbittestandreset( &m_sync, m_id );
|
||||||
else
|
else
|
||||||
{
|
sem_post(&m_finished);
|
||||||
_mm_pause();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sem_post(&m_stopped);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
GSRasterizerList::GSRasterizerList()
|
GSRasterizerList::GSRasterizerList()
|
||||||
{
|
{
|
||||||
// User/Source Coding Rule 24. (M impact, ML generality) Place each
|
m_threadcount = 0;
|
||||||
// synchronization variable alone, separated by 128 bytes or in a separate cache
|
sem_init(&m_finished, false, 0);
|
||||||
// line.
|
|
||||||
|
|
||||||
m_sync = (long*)_aligned_malloc(128, 64);
|
|
||||||
|
|
||||||
*m_sync = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GSRasterizerList::~GSRasterizerList()
|
GSRasterizerList::~GSRasterizerList()
|
||||||
{
|
{
|
||||||
FreeRasterizers();
|
FreeRasterizers();
|
||||||
|
sem_destroy(&m_finished);
|
||||||
_aligned_free(m_sync);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerList::FreeRasterizers()
|
void GSRasterizerList::FreeRasterizers()
|
||||||
{
|
{
|
||||||
for_each(begin(), end(), delete_object());
|
for(unsigned i=0; i<size(); ++i) delete (*this)[i];
|
||||||
|
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
void GSRasterizerList::Draw(const GSRasterizerData* data)
|
||||||
{
|
{
|
||||||
*m_sync = 0;
|
|
||||||
|
|
||||||
m_stats.Reset();
|
m_stats.Reset();
|
||||||
|
|
||||||
int64 start = __rdtsc();
|
int64 start = __rdtsc();
|
||||||
|
|
||||||
for(list<IRasterizer*>::reverse_iterator i = rbegin(); i != rend(); i++)
|
m_sync = m_syncstart;
|
||||||
|
|
||||||
|
for(unsigned i=1; i<size(); ++i)
|
||||||
{
|
{
|
||||||
(*i)->Draw(data);
|
(*this)[i]->Draw(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
while(*m_sync)
|
(*this)[0]->Draw(data);
|
||||||
|
|
||||||
|
if( UseSpinningFinish )
|
||||||
{
|
{
|
||||||
_mm_pause();
|
while(m_sync) _mm_pause();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(unsigned i=1; i<size(); ++i )
|
||||||
|
sem_wait(&m_finished);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_stats.ticks = __rdtsc() - start;
|
m_stats.ticks = __rdtsc() - start;
|
||||||
|
|
||||||
for(list<IRasterizer*>::iterator i = begin(); i != end(); i++)
|
for(unsigned i=0; i<size(); ++i)
|
||||||
{
|
{
|
||||||
GSRasterizerStats s;
|
GSRasterizerStats s;
|
||||||
|
|
||||||
(*i)->GetStats(s);
|
(*this)[i]->GetStats(s);
|
||||||
|
|
||||||
m_stats.pixels += s.pixels;
|
m_stats.pixels += s.pixels;
|
||||||
m_stats.prims = max(m_stats.prims, s.prims);
|
m_stats.prims = max(m_stats.prims, s.prims);
|
||||||
|
|
|
@ -27,6 +27,9 @@
|
||||||
#include "GSThread.h"
|
#include "GSThread.h"
|
||||||
#include "GSAlignedClass.h"
|
#include "GSAlignedClass.h"
|
||||||
|
|
||||||
|
#include "pthread.h"
|
||||||
|
#include "semaphore.h"
|
||||||
|
|
||||||
__declspec(align(16)) class GSRasterizerData
|
__declspec(align(16)) class GSRasterizerData
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -93,6 +96,8 @@ protected:
|
||||||
|
|
||||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
|
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, const GSVector4i& scissor, int orientation, int side);
|
||||||
|
|
||||||
|
inline bool IsOneOfMyScanlines(int scanline) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
|
GSRasterizer(IDrawScanline* ds, int id = 0, int threads = 0);
|
||||||
virtual ~GSRasterizer();
|
virtual ~GSRasterizer();
|
||||||
|
@ -106,14 +111,18 @@ public:
|
||||||
|
|
||||||
class GSRasterizerMT : public GSRasterizer, private GSThread
|
class GSRasterizerMT : public GSRasterizer, private GSThread
|
||||||
{
|
{
|
||||||
long* m_sync;
|
protected:
|
||||||
|
sem_t& m_finished;
|
||||||
|
volatile long& m_sync;
|
||||||
|
sem_t m_semaphore;
|
||||||
|
sem_t m_stopped;
|
||||||
bool m_exit;
|
bool m_exit;
|
||||||
const GSRasterizerData* m_data;
|
const GSRasterizerData* m_data;
|
||||||
|
|
||||||
void ThreadProc();
|
void ThreadProc();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSRasterizerMT(IDrawScanline* ds, int id, int threads, long* sync);
|
GSRasterizerMT(IDrawScanline* ds, int id, int threads, sem_t& finished, volatile long& sync);
|
||||||
virtual ~GSRasterizerMT();
|
virtual ~GSRasterizerMT();
|
||||||
|
|
||||||
// IRasterizer
|
// IRasterizer
|
||||||
|
@ -121,11 +130,14 @@ public:
|
||||||
void Draw(const GSRasterizerData* data);
|
void Draw(const GSRasterizerData* data);
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizerList : protected list<IRasterizer*>, public IRasterizer
|
class GSRasterizerList : protected vector<IRasterizer*>, public IRasterizer
|
||||||
{
|
{
|
||||||
long* m_sync;
|
protected:
|
||||||
|
int m_threadcount;
|
||||||
|
sem_t m_finished;
|
||||||
|
volatile long m_sync;
|
||||||
|
long m_syncstart;
|
||||||
GSRasterizerStats m_stats;
|
GSRasterizerStats m_stats;
|
||||||
|
|
||||||
void FreeRasterizers();
|
void FreeRasterizers();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -138,9 +150,13 @@ public:
|
||||||
|
|
||||||
threads = max(threads, 1); // TODO: min(threads, number of cpu cores)
|
threads = max(threads, 1); // TODO: min(threads, number of cpu cores)
|
||||||
|
|
||||||
for(int i = 0; i < threads; i++)
|
push_back(new GSRasterizer(new DS(parent, 0), 0, threads));
|
||||||
|
|
||||||
|
m_syncstart = 0;
|
||||||
|
for(int i = 1; i < threads; i++)
|
||||||
{
|
{
|
||||||
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_sync));
|
push_back(new GSRasterizerMT(new DS(parent, i), i, threads, m_finished, m_sync));
|
||||||
|
_interlockedbittestandset(&m_syncstart, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue