mirror of https://github.com/PCSX2/pcsx2.git
GSdx: 5-10% speedup in multi-threaded mode, replaced that modulo operator with a lookup table.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4505 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
686b6da8e5
commit
d20da5f268
|
@ -24,32 +24,25 @@
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "GSRasterizer.h"
|
#include "GSRasterizer.h"
|
||||||
|
|
||||||
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
|
|
||||||
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
|
|
||||||
// note by rama: Speedup is around 5% on average.
|
|
||||||
|
|
||||||
// #define UseConstThreadCount
|
|
||||||
|
|
||||||
#ifdef UseConstThreadCount
|
|
||||||
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
|
|
||||||
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
|
|
||||||
static const int ThreadsConst = 2;
|
|
||||||
static const int ThreadMaskConst = ThreadsConst - 1;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define THREAD_HEIGHT 5
|
#define THREAD_HEIGHT 5
|
||||||
|
|
||||||
GSRasterizer::GSRasterizer(IDrawScanline* ds)
|
GSRasterizer::GSRasterizer(IDrawScanline* ds)
|
||||||
: m_ds(ds)
|
: m_ds(ds)
|
||||||
, m_id(0)
|
, m_id(-1)
|
||||||
, m_threads(1)
|
, m_threads(-1)
|
||||||
{
|
{
|
||||||
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
||||||
m_edge.count = 0;
|
m_edge.count = 0;
|
||||||
|
|
||||||
|
m_myscanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
|
||||||
|
|
||||||
|
SetThreadId(0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSRasterizer::~GSRasterizer()
|
GSRasterizer::~GSRasterizer()
|
||||||
{
|
{
|
||||||
|
_aligned_free(m_myscanline);
|
||||||
|
|
||||||
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
|
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
|
||||||
|
|
||||||
delete m_ds;
|
delete m_ds;
|
||||||
|
@ -57,15 +50,7 @@ GSRasterizer::~GSRasterizer()
|
||||||
|
|
||||||
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
|
||||||
{
|
{
|
||||||
#ifdef UseConstThreadCount
|
return m_myscanline[scanline >> THREAD_HEIGHT] != 0;
|
||||||
|
|
||||||
return ThreadMaskConst == 0 || (scanline & ThreadMaskConst) == m_id;
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
return m_threads == 1 || ((scanline >> THREAD_HEIGHT) % m_threads) == m_id;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Draw(const GSRasterizerData* data)
|
void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
|
@ -115,6 +100,32 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
|
||||||
m_ds->EndDraw(m_stats, data->frame);
|
m_ds->EndDraw(m_stats, data->frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSRasterizer::SetThreadId(int id, int threads)
|
||||||
|
{
|
||||||
|
if(m_id != id || m_threads != threads)
|
||||||
|
{
|
||||||
|
m_id = id;
|
||||||
|
m_threads = threads;
|
||||||
|
|
||||||
|
if(threads > 1)
|
||||||
|
{
|
||||||
|
int row = 0;
|
||||||
|
|
||||||
|
while(row < (2048 >> THREAD_HEIGHT))
|
||||||
|
{
|
||||||
|
for(int i = 0; i < threads; i++, row++)
|
||||||
|
{
|
||||||
|
m_myscanline[row] = i == id ? 1 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memset(m_myscanline, 1, 2048 >> THREAD_HEIGHT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
void GSRasterizer::GetStats(GSRasterizerStats& stats)
|
||||||
{
|
{
|
||||||
stats = m_stats;
|
stats = m_stats;
|
||||||
|
|
|
@ -77,9 +77,9 @@ public:
|
||||||
virtual ~IRasterizer() {}
|
virtual ~IRasterizer() {}
|
||||||
|
|
||||||
virtual void Draw(const GSRasterizerData* data) = 0;
|
virtual void Draw(const GSRasterizerData* data) = 0;
|
||||||
|
virtual void SetThreadId(int id, int threads) = 0;
|
||||||
virtual void GetStats(GSRasterizerStats& stats) = 0;
|
virtual void GetStats(GSRasterizerStats& stats) = 0;
|
||||||
virtual void PrintStats() = 0;
|
virtual void PrintStats() = 0;
|
||||||
virtual void SetThreadId(int id, int threads) = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
|
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
|
||||||
|
@ -88,6 +88,7 @@ protected:
|
||||||
IDrawScanline* m_ds;
|
IDrawScanline* m_ds;
|
||||||
int m_id;
|
int m_id;
|
||||||
int m_threads;
|
int m_threads;
|
||||||
|
uint8* m_myscanline;
|
||||||
GSRasterizerStats m_stats;
|
GSRasterizerStats m_stats;
|
||||||
GSVector4i m_scissor;
|
GSVector4i m_scissor;
|
||||||
GSVector4 m_fscissor;
|
GSVector4 m_fscissor;
|
||||||
|
@ -116,9 +117,9 @@ public:
|
||||||
// IRasterizer
|
// IRasterizer
|
||||||
|
|
||||||
void Draw(const GSRasterizerData* data);
|
void Draw(const GSRasterizerData* data);
|
||||||
|
void SetThreadId(int id, int threads);
|
||||||
void GetStats(GSRasterizerStats& stats);
|
void GetStats(GSRasterizerStats& stats);
|
||||||
void PrintStats() {m_ds->PrintStats();}
|
void PrintStats() {m_ds->PrintStats();}
|
||||||
void SetThreadId(int id, int threads) {m_id = id; m_threads = threads;}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSRasterizerMT : public GSRasterizer, private GSThread
|
class GSRasterizerMT : public GSRasterizer, private GSThread
|
||||||
|
|
Loading…
Reference in New Issue