Merge pull request #1512 from PCSX2/gsdx-dynamic-thread-height

gsdx sw: add extrathreads_height to control the quantity of pixels pr…
This commit is contained in:
Gregory Hainaut 2016-08-12 15:52:13 +02:00 committed by GitHub
commit 19e992869a
3 changed files with 49 additions and 35 deletions

View File

@ -24,14 +24,21 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSRasterizer.h" #include "GSRasterizer.h"
// - for more threads screen segments should be smaller to better distribute the pixels
// - but not too small to keep the threading overhead low
// - ideal value between 3 and 5, or log2(64 / number of threads)
#define THREAD_HEIGHT 4
int GSRasterizerData::s_counter = 0; int GSRasterizerData::s_counter = 0;
static int compute_best_thread_height(int threads) {
// - for more threads screen segments should be smaller to better distribute the pixels
// - but not too small to keep the threading overhead low
// - ideal value between 3 and 5, or log2(64 / number of threads)
int th = theApp.GetConfigI("extrathreads_height");
if (th > 0 && th < 9)
return th;
else
return 4;
}
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon) GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
: m_perfmon(perfmon) : m_perfmon(perfmon)
, m_ds(ds) , m_ds(ds)
@ -40,14 +47,16 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
{ {
memset(&m_pixels, 0, sizeof(m_pixels)); memset(&m_pixels, 0, sizeof(m_pixels));
m_thread_height = compute_best_thread_height(threads);
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false); m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
m_edge.count = 0; m_edge.count = 0;
m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64); m_scanline = (uint8*)_aligned_malloc((2048 >> m_thread_height) + 16, 64);
int row = 0; int row = 0;
while(row < (2048 >> THREAD_HEIGHT)) while(row < (2048 >> m_thread_height))
{ {
for(int i = 0; i < threads; i++, row++) for(int i = 0; i < threads; i++, row++)
{ {
@ -69,15 +78,15 @@ bool GSRasterizer::IsOneOfMyScanlines(int top) const
{ {
ASSERT(top >= 0 && top < 2048); ASSERT(top >= 0 && top < 2048);
return m_scanline[top >> THREAD_HEIGHT] != 0; return m_scanline[top >> m_thread_height] != 0;
} }
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
{ {
ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048); ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048);
top = top >> THREAD_HEIGHT; top = top >> m_thread_height;
bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT; bottom = (bottom + (1 << m_thread_height) - 1) >> m_thread_height;
while(top < bottom) while(top < bottom)
{ {
@ -92,13 +101,13 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
int GSRasterizer::FindMyNextScanline(int top) const int GSRasterizer::FindMyNextScanline(int top) const
{ {
int i = top >> THREAD_HEIGHT; int i = top >> m_thread_height;
if(m_scanline[i] == 0) if(m_scanline[i] == 0)
{ {
while(m_scanline[++i] == 0); while(m_scanline[++i] == 0);
top = i << THREAD_HEIGHT; top = i << m_thread_height;
} }
return top; return top;
@ -566,7 +575,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c
if(!IsOneOfMyScanlines(top)) if(!IsOneOfMyScanlines(top))
{ {
top += (m_threads - 1) << THREAD_HEIGHT; top += (m_threads - 1) << m_thread_height;
} }
} }
@ -758,7 +767,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
if(!IsOneOfMyScanlines(top)) if(!IsOneOfMyScanlines(top))
{ {
top += (m_threads - 1) << THREAD_HEIGHT; top += (m_threads - 1) << m_thread_height;
} }
} }
@ -810,7 +819,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
while(top < bottom) while(top < bottom)
{ {
r.top = top; r.top = top;
r.bottom = std::min<int>((top + (1 << THREAD_HEIGHT)) & ~((1 << THREAD_HEIGHT) - 1), bottom); r.bottom = std::min<int>((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom);
m_ds->DrawRect(r, scan); m_ds->DrawRect(r, scan);
@ -819,7 +828,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
m_pixels.actual += pixels; m_pixels.actual += pixels;
m_pixels.total += pixels; m_pixels.total += pixels;
top = r.bottom + ((m_threads - 1) << THREAD_HEIGHT); top = r.bottom + ((m_threads - 1) << m_thread_height);
} }
} }
@ -1132,11 +1141,13 @@ void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& sca
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon) GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
: m_perfmon(perfmon) : m_perfmon(perfmon)
{ {
m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64); m_thread_height = compute_best_thread_height(threads);
m_scanline = (uint8*)_aligned_malloc((2048 >> m_thread_height) + 16, 64);
int row = 0; int row = 0;
while(row < (2048 >> THREAD_HEIGHT)) while(row < (2048 >> m_thread_height))
{ {
for(int i = 0; i < threads; i++, row++) for(int i = 0; i < threads; i++, row++)
{ {
@ -1161,8 +1172,8 @@ void GSRasterizerList::Queue(const shared_ptr<GSRasterizerData>& data)
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048); ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
int top = r.top >> THREAD_HEIGHT; int top = r.top >> m_thread_height;
int bottom = std::min<int>((r.bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT, top + m_workers.size()); int bottom = std::min<int>((r.bottom + (1 << m_thread_height) - 1) >> m_thread_height, top + m_workers.size());
while(top < bottom) while(top < bottom)
{ {

View File

@ -129,6 +129,7 @@ protected:
IDrawScanline* m_ds; IDrawScanline* m_ds;
int m_id; int m_id;
int m_threads; int m_threads;
int m_thread_height;
uint8* m_scanline; uint8* m_scanline;
GSVector4i m_scissor; GSVector4i m_scissor;
GSVector4 m_fscissor_x; GSVector4 m_fscissor_x;
@ -198,6 +199,7 @@ protected:
GSPerfMon* m_perfmon; GSPerfMon* m_perfmon;
vector<GSWorker*> m_workers; vector<GSWorker*> m_workers;
uint8* m_scanline; uint8* m_scanline;
int m_thread_height;
GSRasterizerList(int threads, GSPerfMon* perfmon); GSRasterizerList(int threads, GSPerfMon* perfmon);

View File

@ -290,6 +290,7 @@ void GSdxApp::Init()
m_default_configuration["debug_opengl"] = "0"; m_default_configuration["debug_opengl"] = "0";
m_default_configuration["dump"] = "0"; m_default_configuration["dump"] = "0";
m_default_configuration["extrathreads"] = "2"; m_default_configuration["extrathreads"] = "2";
m_default_configuration["extrathreads_height"] = "4";
m_default_configuration["filter"] = "2"; m_default_configuration["filter"] = "2";
m_default_configuration["force_texture_clear"] = "0"; m_default_configuration["force_texture_clear"] = "0";
m_default_configuration["fxaa"] = "0"; m_default_configuration["fxaa"] = "0";