SoftRasterizer: Do some multithreading improvements, and also clean up and refactor RasterizerUnit.

- Completely encapsulate all stray global variables into the SoftRasterizer class where they belong.
- Framebuffer clears are now fully multithreaded, significantly improving clearing performance.
- Doing multithreaded texture loads and vertex calculations now requires a minimum of 2 threads, down from 4 threads.
- The maximum amount of SoftRasterizer threads has been increased from 16 to 32.
This commit is contained in:
rogerman 2018-02-12 11:35:21 -08:00
parent 9e3b694ace
commit 7509d469b9
6 changed files with 1204 additions and 968 deletions

View File

@ -1,7 +1,7 @@
/* /*
Copyright (C) 2006 yopyop Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash Copyright (C) 2006-2007 shash
Copyright (C) 2008-2017 DeSmuME team Copyright (C) 2008-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -1171,14 +1171,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
if (!doFramebufferFlip) if (!doFramebufferFlip)
{ {
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
if (!doFramebufferConvert) if (!doFramebufferConvert)
{ {
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
{ {
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8); const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
for (; i < ssePixCount; i += 8) for (; i < ssePixCount; i += 8)
{ {
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
@ -1191,7 +1189,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#pragma LOOPVECTORIZE_DISABLE #pragma LOOPVECTORIZE_DISABLE
#endif #endif
for (; i < pixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]); dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]);
@ -1202,12 +1200,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
} }
else if (dstFramebufferMain != NULL) else if (dstFramebufferMain != NULL)
{ {
ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount); ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
this->_renderNeedsFlushMain = false; this->_renderNeedsFlushMain = false;
} }
else else
{ {
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount); ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
this->_renderNeedsFlush16 = false; this->_renderNeedsFlush16 = false;
} }
} }
@ -1218,7 +1216,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
{ {
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8); const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
for (; i < ssePixCount; i += 8) for (; i < ssePixCount; i += 8)
{ {
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
@ -1231,7 +1229,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#pragma LOOPVECTORIZE_DISABLE #pragma LOOPVECTORIZE_DISABLE
#endif #endif
for (; i < pixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]); dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
@ -1242,12 +1240,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
} }
else if (dstFramebufferMain != NULL) else if (dstFramebufferMain != NULL)
{ {
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount); ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
this->_renderNeedsFlushMain = false; this->_renderNeedsFlushMain = false;
} }
else else
{ {
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
this->_renderNeedsFlush16 = false; this->_renderNeedsFlush16 = false;
} }
} }
@ -1256,7 +1254,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
{ {
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
const size_t ssePixCount = pixCount - (pixCount % 8); const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
for (; i < ssePixCount; i += 8) for (; i < ssePixCount; i += 8)
{ {
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
@ -1269,7 +1267,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
#pragma LOOPVECTORIZE_DISABLE #pragma LOOPVECTORIZE_DISABLE
#endif #endif
for (; i < pixCount; i++) for (; i < this->_framebufferPixCount; i++)
{ {
dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]); dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]);
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]); dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
@ -1280,12 +1278,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
} }
else if (dstFramebufferMain != NULL) else if (dstFramebufferMain != NULL)
{ {
ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount); ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
this->_renderNeedsFlushMain = false; this->_renderNeedsFlushMain = false;
} }
else else
{ {
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount); ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
this->_renderNeedsFlush16 = false; this->_renderNeedsFlush16 = false;
} }
} }
@ -4891,6 +4889,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
this->_framebufferWidth = w; this->_framebufferWidth = w;
this->_framebufferHeight = h; this->_framebufferHeight = h;
this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
if (this->isPBOSupported) if (this->isPBOSupported)

View File

@ -1,7 +1,7 @@
/* /*
Copyright (C) 2006 yopyop Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash Copyright (C) 2006-2007 shash
Copyright (C) 2008-2017 DeSmuME team Copyright (C) 2008-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -2064,6 +2064,7 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
this->_framebufferWidth = w; this->_framebufferWidth = w;
this->_framebufferHeight = h; this->_framebufferHeight = h;
this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO. this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2009-2017 DeSmuME team Copyright (C) 2009-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -21,10 +21,20 @@
#include "render3D.h" #include "render3D.h"
#include "gfx3d.h" #include "gfx3d.h"
#define SOFTRASTERIZER_MAX_THREADS 32
extern GPU3DInterface gpu3DRasterize; extern GPU3DInterface gpu3DRasterize;
class Task;
class SoftRasterizerRenderer; class SoftRasterizerRenderer;
struct edge_fx_fl;
struct SoftRasterizerClearParam
{
SoftRasterizerRenderer *renderer;
size_t startPixel;
size_t endPixel;
};
struct SoftRasterizerPostProcessParams struct SoftRasterizerPostProcessParams
{ {
@ -82,6 +92,39 @@ public:
void SetScalingFactor(size_t scalingFactor); void SetScalingFactor(size_t scalingFactor);
}; };
template <bool RENDERER>
class RasterizerUnit
{
protected:
bool _debug_thisPoly;
u32 _SLI_Mask;
u32 _SLI_Value;
SoftRasterizerRenderer *_softRender;
SoftRasterizerTexture *_currentTexture;
VERT *_verts[MAX_CLIPPED_VERTS];
size_t _polynum;
u8 _textureWrapMode;
Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
FORCEINLINE FragmentColor _sample(const float u, const float v);
FORCEINLINE float _round_s(double val);
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV);
template<bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z);
template<bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
template<bool SLI, bool ISSHADOWPOLYGON, bool USELINEHACK, bool ISHORIZONTAL> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right);
template<int TYPE> FORCEINLINE void _rot_verts();
template<bool ISBACKWARDS, int TYPE> void _sort_verts();
template<bool SLI, bool ISBACKWARDS, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
public:
void SetSLI(u32 value, u32 mask, bool debug);
void SetRenderer(SoftRasterizerRenderer *theRenderer);
template<bool SLI, bool USELINEHACK> FORCEINLINE void Render();
};
#if defined(ENABLE_SSE2) #if defined(ENABLE_SSE2)
class SoftRasterizerRenderer : public Render3D_SSE2 class SoftRasterizerRenderer : public Render3D_SSE2
#else #else
@ -89,12 +132,27 @@ class SoftRasterizerRenderer : public Render3D
#endif #endif
{ {
protected: protected:
Task *_task;
SoftRasterizerClearParam _threadClearParam[SOFTRASTERIZER_MAX_THREADS];
SoftRasterizerPostProcessParams _threadPostprocessParam[SOFTRASTERIZER_MAX_THREADS];
RasterizerUnit<true> _rasterizerUnit[SOFTRASTERIZER_MAX_THREADS];
RasterizerUnit<false> _HACK_viewer_rasterizerUnit;
size_t _threadCount;
size_t _nativeLinesPerThread;
size_t _nativePixelsPerThread;
size_t _customLinesPerThread;
size_t _customPixelsPerThread;
FragmentColor _clearColor6665;
FragmentAttributes _clearAttributes;
GFX3D_Clipper clipper; GFX3D_Clipper clipper;
u8 fogTable[32768]; u8 fogTable[32768];
FragmentColor edgeMarkTable[8]; FragmentColor edgeMarkTable[8];
bool edgeMarkDisabled[8]; bool edgeMarkDisabled[8];
bool _stateSetupNeedsFinish;
bool _renderGeometryNeedsFinish; bool _renderGeometryNeedsFinish;
bool _enableHighPrecisionColorInterpolation; bool _enableHighPrecisionColorInterpolation;
@ -124,7 +182,6 @@ public:
bool polyVisible[POLYLIST_SIZE]; bool polyVisible[POLYLIST_SIZE];
bool polyBackfacing[POLYLIST_SIZE]; bool polyBackfacing[POLYLIST_SIZE];
GFX3D_State *currentRenderState; GFX3D_State *currentRenderState;
SoftRasterizerPostProcessParams *postprocessParam;
bool _enableFragmentSamplingHack; bool _enableFragmentSamplingHack;
@ -148,6 +205,7 @@ public:
virtual Render3DError Render(const GFX3D &engine); virtual Render3DError Render(const GFX3D &engine);
virtual Render3DError RenderFinish(); virtual Render3DError RenderFinish();
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16); virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
virtual Render3DError SetFramebufferSize(size_t w, size_t h); virtual Render3DError SetFramebufferSize(size_t w, size_t h);
}; };
@ -155,7 +213,22 @@ public:
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
{ {
protected:
v128u32 _clearColor_v128u32;
v128u32 _clearDepth_v128u32;
v128u8 _clearAttrOpaquePolyID_v128u8;
v128u8 _clearAttrTranslucentPolyID_v128u8;
v128u8 _clearAttrStencil_v128u8;
v128u8 _clearAttrIsFogged_v128u8;
v128u8 _clearAttrIsTranslucentPoly_v128u8;
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes); virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
public:
SoftRasterizerRenderer_SSE2();
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
}; };
#endif #endif

View File

@ -1,6 +1,6 @@
/* /*
Copyright (C) 2006-2007 shash Copyright (C) 2006-2007 shash
Copyright (C) 2008-2017 DeSmuME team Copyright (C) 2008-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -244,6 +244,8 @@ Render3D::Render3D()
_framebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH; _framebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH;
_framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT; _framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
_framebufferPixCount = _framebufferWidth * _framebufferHeight;
_framebufferSIMDPixCount = 0;
_framebufferColorSizeBytes = 0; _framebufferColorSizeBytes = 0;
_framebufferColor = NULL; _framebufferColor = NULL;
@ -332,6 +334,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
this->_framebufferWidth = w; this->_framebufferWidth = w;
this->_framebufferHeight = h; this->_framebufferHeight = h;
this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine
@ -478,22 +481,20 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
if (dstFramebufferMain != NULL) if (dstFramebufferMain != NULL)
{ {
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) ) if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
{ {
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount); ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
} }
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) ) else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
{ {
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount); ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
} }
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) || else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) ) ((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
{ {
memcpy(dstFramebufferMain, srcFramebuffer, pixCount * sizeof(FragmentColor)); memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(FragmentColor));
} }
this->_renderNeedsFlushMain = false; this->_renderNeedsFlushMain = false;
@ -503,11 +504,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
{ {
if (this->_outputFormat == NDSColorFormat_BGR666_Rev) if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{ {
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount); ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
} }
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev) else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
{ {
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount); ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
} }
this->_renderNeedsFlush16 = false; this->_renderNeedsFlush16 = false;
@ -687,6 +688,42 @@ Render3DError Render3D::VramReconfigureSignal()
return RENDER3DERROR_NOERR; return RENDER3DERROR_NOERR;
} }
Render3D_SIMD128::Render3D_SIMD128()
{
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 16);
}
Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
{
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
if (error != RENDER3DERROR_NOERR)
{
return RENDER3DERROR_NOERR;
}
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 16);
return error;
}
Render3D_SIMD256::Render3D_SIMD256()
{
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 32);
}
Render3DError Render3D_SIMD256::SetFramebufferSize(size_t w, size_t h)
{
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
if (error != RENDER3DERROR_NOERR)
{
return RENDER3DERROR_NOERR;
}
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 32);
return error;
}
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState) Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)

View File

@ -1,6 +1,6 @@
/* /*
Copyright (C) 2006-2007 shash Copyright (C) 2006-2007 shash
Copyright (C) 2007-2017 DeSmuME team Copyright (C) 2007-2018 DeSmuME team
This file is free software: you can redistribute it and/or modify This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -144,6 +144,8 @@ protected:
size_t _framebufferWidth; size_t _framebufferWidth;
size_t _framebufferHeight; size_t _framebufferHeight;
size_t _framebufferPixCount;
size_t _framebufferSIMDPixCount;
size_t _framebufferColorSizeBytes; size_t _framebufferColorSizeBytes;
FragmentColor *_framebufferColor; FragmentColor *_framebufferColor;
@ -246,9 +248,25 @@ public:
Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const; Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const;
}; };
class Render3D_SIMD128 : public Render3D
{
public:
Render3D_SIMD128();
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
class Render3D_SIMD256 : public Render3D
{
public:
Render3D_SIMD256();
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
};
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
class Render3D_SSE2 : public Render3D class Render3D_SSE2 : public Render3D_SIMD128
{ {
public: public:
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState); virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);