GSdx: 5-10% speedup in multi-threaded mode, replaced that modulo operator with a lookup table.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4505 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-29 14:07:48 +00:00
parent 686b6da8e5
commit d20da5f268
2 changed files with 38 additions and 26 deletions

View File

@ -24,32 +24,25 @@
#include "stdafx.h"
#include "GSRasterizer.h"
// Set this to 1 to remove a lot of non-const div/modulus ops from the rasterization process.
// Might likely be a measurable speedup but limits threading to 1, 2, 4, and 8 threads.
// note by rama: Speedup is around 5% on average.
// #define UseConstThreadCount
#ifdef UseConstThreadCount
// ThreadsConst - const number of threads. User-configured threads (in GSdx panel) must match
// this value if UseConstThreadCount is enabled. [yeah, it's hacky for now]
static const int ThreadsConst = 2;
static const int ThreadMaskConst = ThreadsConst - 1;
#endif
#define THREAD_HEIGHT 5
GSRasterizer::GSRasterizer(IDrawScanline* ds)
: m_ds(ds)
, m_id(0)
, m_threads(1)
, m_id(-1)
, m_threads(-1)
{
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0;
m_myscanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
SetThreadId(0, 1);
}
GSRasterizer::~GSRasterizer()
{
_aligned_free(m_myscanline);
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
delete m_ds;
@ -57,15 +50,7 @@ GSRasterizer::~GSRasterizer()
bool GSRasterizer::IsOneOfMyScanlines(int scanline) const
{
#ifdef UseConstThreadCount
return ThreadMaskConst == 0 || (scanline & ThreadMaskConst) == m_id;
#else
return m_threads == 1 || ((scanline >> THREAD_HEIGHT) % m_threads) == m_id;
#endif
return m_myscanline[scanline >> THREAD_HEIGHT] != 0;
}
void GSRasterizer::Draw(const GSRasterizerData* data)
@ -115,6 +100,32 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
m_ds->EndDraw(m_stats, data->frame);
}
void GSRasterizer::SetThreadId(int id, int threads)
{
if(m_id != id || m_threads != threads)
{
m_id = id;
m_threads = threads;
if(threads > 1)
{
int row = 0;
while(row < (2048 >> THREAD_HEIGHT))
{
for(int i = 0; i < threads; i++, row++)
{
m_myscanline[row] = i == id ? 1 : 0;
}
}
}
else
{
memset(m_myscanline, 1, 2048 >> THREAD_HEIGHT);
}
}
}
void GSRasterizer::GetStats(GSRasterizerStats& stats)
{
stats = m_stats;

View File

@ -77,9 +77,9 @@ public:
virtual ~IRasterizer() {}
virtual void Draw(const GSRasterizerData* data) = 0;
virtual void SetThreadId(int id, int threads) = 0;
virtual void GetStats(GSRasterizerStats& stats) = 0;
virtual void PrintStats() = 0;
virtual void SetThreadId(int id, int threads) = 0;
};
__aligned(class, 32) GSRasterizer : public GSAlignedClass<32>, public IRasterizer
@ -88,6 +88,7 @@ protected:
IDrawScanline* m_ds;
int m_id;
int m_threads;
uint8* m_myscanline;
GSRasterizerStats m_stats;
GSVector4i m_scissor;
GSVector4 m_fscissor;
@ -116,9 +117,9 @@ public:
// IRasterizer
void Draw(const GSRasterizerData* data);
void SetThreadId(int id, int threads);
void GetStats(GSRasterizerStats& stats);
void PrintStats() {m_ds->PrintStats();}
void SetThreadId(int id, int threads) {m_id = id; m_threads = threads;}
};
class GSRasterizerMT : public GSRasterizer, private GSThread