SoftRasterizer: Fix a longstanding bug where using a thread count that was not a power-of-two would result in many threads remaining idle.
This commit is contained in:
parent
cbd488e157
commit
b0a6188902
|
@ -744,13 +744,13 @@ void RasterizerUnit<RENDERER>::_runscanlines(const POLYGON_ATTR polyAttr, const
|
|||
//HACK: special handling for horizontal line poly
|
||||
if ( USELINEHACK && (left->Height == 0) && (right->Height == 0) && (left->Y < framebufferHeight) && (left->Y >= 0) )
|
||||
{
|
||||
const bool draw = (!SLI || (left->Y & this->_SLI_Mask) == this->_SLI_Value);
|
||||
const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) );
|
||||
if (draw) this->_drawscanline<ISSHADOWPOLYGON, USELINEHACK>(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right);
|
||||
}
|
||||
|
||||
while (Height--)
|
||||
{
|
||||
const bool draw = (!SLI || (left->Y & this->_SLI_Mask) == this->_SLI_Value);
|
||||
const bool draw = ( !SLI || ((left->Y >= this->_SLI_startLine) && (left->Y < this->_SLI_endLine)) );
|
||||
if (draw) this->_drawscanline<ISSHADOWPOLYGON, USELINEHACK>(polyAttr, isTranslucent, dstColor, framebufferWidth, framebufferHeight, left, right);
|
||||
const int xl = left->X;
|
||||
const int xr = right->X;
|
||||
|
@ -939,11 +939,11 @@ void RasterizerUnit<RENDERER>::_shape_engine(const POLYGON_ATTR polyAttr, const
|
|||
}
|
||||
|
||||
template<bool RENDERER>
|
||||
void RasterizerUnit<RENDERER>::SetSLI(u32 value, u32 mask, bool debug)
|
||||
void RasterizerUnit<RENDERER>::SetSLI(u32 startLine, u32 endLine, bool debug)
|
||||
{
|
||||
this->_SLI_Value = value;
|
||||
this->_SLI_Mask = mask;
|
||||
this->_debug_thisPoly = debug;
|
||||
this->_SLI_startLine = startLine;
|
||||
this->_SLI_endLine = endLine;
|
||||
}
|
||||
|
||||
template<bool RENDERER>
|
||||
|
@ -1442,27 +1442,11 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
_enableLineHack = CommonSettings.GFX3D_LineHack;
|
||||
_enableFragmentSamplingHack = CommonSettings.GFX3D_TXTHack;
|
||||
|
||||
_HACK_viewer_rasterizerUnit.SetSLI(0, 1, false);
|
||||
_HACK_viewer_rasterizerUnit.SetSLI(0, _framebufferHeight, false);
|
||||
|
||||
const size_t coreCount = CommonSettings.num_cores;
|
||||
_threadCount = coreCount;
|
||||
|
||||
// SoftRasterizer works best when the number of threads is a power-of-two.
|
||||
// Ensure that the thread count is set to the previous power-of-two if the
|
||||
// core count isn't already a power-of-two.
|
||||
_threadCount--;
|
||||
_threadCount |= (_threadCount >> 1);
|
||||
_threadCount |= (_threadCount >> 2);
|
||||
_threadCount |= (_threadCount >> 4);
|
||||
_threadCount |= (_threadCount >> 8);
|
||||
_threadCount |= (_threadCount >> 16);
|
||||
_threadCount++;
|
||||
|
||||
if (_threadCount != coreCount)
|
||||
{
|
||||
_threadCount >>= 1;
|
||||
}
|
||||
|
||||
if (_threadCount > SOFTRASTERIZER_MAX_THREADS)
|
||||
{
|
||||
_threadCount = SOFTRASTERIZER_MAX_THREADS;
|
||||
|
@ -1477,9 +1461,6 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
_customLinesPerThread = _framebufferHeight;
|
||||
_customPixelsPerThread = _framebufferPixCount;
|
||||
|
||||
_rasterizerUnit[0].SetSLI(0, 0, false);
|
||||
_rasterizerUnit[0].SetRenderer(this);
|
||||
|
||||
_threadPostprocessParam[0].renderer = this;
|
||||
_threadPostprocessParam[0].startLine = 0;
|
||||
_threadPostprocessParam[0].endLine = _framebufferHeight;
|
||||
|
@ -1491,6 +1472,9 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
_threadClearParam[0].renderer = this;
|
||||
_threadClearParam[0].startPixel = 0;
|
||||
_threadClearParam[0].endPixel = _framebufferPixCount;
|
||||
|
||||
_rasterizerUnit[0].SetSLI(_threadPostprocessParam[0].startLine, _threadPostprocessParam[0].endLine, false);
|
||||
_rasterizerUnit[0].SetRenderer(this);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1503,9 +1487,6 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
|
||||
for (size_t i = 0; i < _threadCount; i++)
|
||||
{
|
||||
_rasterizerUnit[i].SetSLI(i, _threadCount - 1, false);
|
||||
_rasterizerUnit[i].SetRenderer(this);
|
||||
|
||||
_threadPostprocessParam[i].renderer = this;
|
||||
_threadPostprocessParam[i].startLine = i * _customLinesPerThread;
|
||||
_threadPostprocessParam[i].endLine = (i < _threadCount - 1) ? (i + 1) * _customLinesPerThread : _framebufferHeight;
|
||||
|
@ -1518,6 +1499,9 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
_threadClearParam[i].startPixel = i * _customPixelsPerThread;
|
||||
_threadClearParam[i].endPixel = (i < _threadCount - 1) ? (i + 1) * _customPixelsPerThread : _framebufferPixCount;
|
||||
|
||||
_rasterizerUnit[i].SetSLI(_threadPostprocessParam[i].startLine, _threadPostprocessParam[i].endLine, false);
|
||||
_rasterizerUnit[i].SetRenderer(this);
|
||||
|
||||
#ifdef DESMUME_COCOA
|
||||
// The Cocoa port takes advantage of hand-optimized thread priorities
|
||||
// to help stabilize performance when running SoftRasterizer.
|
||||
|
@ -1531,7 +1515,15 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
|
|||
InitTables();
|
||||
Reset();
|
||||
|
||||
printf("SoftRast Initialized with cores=%d\n", (int)this->_threadCount);
|
||||
if (_threadCount == 0)
|
||||
{
|
||||
printf("SoftRasterizer: Running directly on the emulation thread. (Multithreading disabled.)\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("SoftRasterizer: Running using %d additional %s. (Multithreading enabled.)\n",
|
||||
(int)_threadCount, (_threadCount == 1) ? "thread" : "threads");
|
||||
}
|
||||
}
|
||||
|
||||
SoftRasterizerRenderer::~SoftRasterizerRenderer()
|
||||
|
@ -2342,6 +2334,8 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
this->_threadClearParam[0].startPixel = 0;
|
||||
this->_threadClearParam[0].endPixel = pixCount;
|
||||
|
||||
this->_rasterizerUnit[0].SetSLI(this->_threadPostprocessParam[0].startLine, this->_threadPostprocessParam[0].endLine, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2355,6 +2349,8 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
this->_threadClearParam[i].startPixel = i * this->_customPixelsPerThread;
|
||||
this->_threadClearParam[i].endPixel = (i < this->_threadCount - 1) ? (i + 1) * this->_customPixelsPerThread : pixCount;
|
||||
|
||||
this->_rasterizerUnit[i].SetSLI(this->_threadPostprocessParam[i].startLine, this->_threadPostprocessParam[i].endLine, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2446,6 +2442,8 @@ Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_
|
|||
|
||||
this->_threadClearParam[0].startPixel = 0;
|
||||
this->_threadClearParam[0].endPixel = pixCount;
|
||||
|
||||
this->_rasterizerUnit[0].SetSLI(this->_threadPostprocessParam[0].startLine, this->_threadPostprocessParam[0].endLine, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2461,6 +2459,8 @@ Render3DError SoftRasterizer_SIMD<SIMDBYTES>::SetFramebufferSize(size_t w, size_
|
|||
|
||||
this->_threadClearParam[i].startPixel = i * pixelsPerThread;
|
||||
this->_threadClearParam[i].endPixel = (i < this->_threadCount - 1) ? (i + 1) * pixelsPerThread : pixCount;
|
||||
|
||||
this->_rasterizerUnit[i].SetSLI(this->_threadPostprocessParam[i].startLine, this->_threadPostprocessParam[i].endLine, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -97,8 +97,8 @@ class RasterizerUnit
|
|||
{
|
||||
protected:
|
||||
bool _debug_thisPoly;
|
||||
u32 _SLI_Mask;
|
||||
u32 _SLI_Value;
|
||||
u32 _SLI_startLine;
|
||||
u32 _SLI_endLine;
|
||||
|
||||
SoftRasterizerRenderer *_softRender;
|
||||
SoftRasterizerTexture *_currentTexture;
|
||||
|
@ -120,7 +120,7 @@ protected:
|
|||
template<bool SLI, bool ISBACKWARDS, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
|
||||
|
||||
public:
|
||||
void SetSLI(u32 value, u32 mask, bool debug);
|
||||
void SetSLI(u32 startLine, u32 endLine, bool debug);
|
||||
void SetRenderer(SoftRasterizerRenderer *theRenderer);
|
||||
template<bool SLI, bool USELINEHACK> FORCEINLINE void Render();
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue