SoftRasterizer: Do some multithreading improvements, and also clean up and refactor RasterizerUnit.
- Completely encapsulate all stray global variables into the SoftRasterizer class where they belong. - Framebuffer clears are now fully multithreaded, significantly improving clearing performance. - Doing multithreaded texture loads and vertex calculations now requires a minimum of 2 threads, down from 4 threads. - The maximum amount of SoftRasterizer threads has been increased from 16 to 32.
This commit is contained in:
parent
9e3b694ace
commit
7509d469b9
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2006-2007 shash
|
Copyright (C) 2006-2007 shash
|
||||||
Copyright (C) 2008-2017 DeSmuME team
|
Copyright (C) 2008-2018 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -1171,14 +1171,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
|
|
||||||
if (!doFramebufferFlip)
|
if (!doFramebufferFlip)
|
||||||
{
|
{
|
||||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
|
||||||
|
|
||||||
if (!doFramebufferConvert)
|
if (!doFramebufferConvert)
|
||||||
{
|
{
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
|
||||||
for (; i < ssePixCount; i += 8)
|
for (; i < ssePixCount; i += 8)
|
||||||
{
|
{
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
|
@ -1191,7 +1189,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; i < pixCount; i++)
|
for (; i < this->_framebufferPixCount; i++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]);
|
dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]);
|
||||||
dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]);
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]);
|
||||||
|
@ -1202,12 +1200,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (dstFramebufferMain != NULL)
|
||||||
{
|
{
|
||||||
ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1218,7 +1216,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
|
||||||
for (; i < ssePixCount; i += 8)
|
for (; i < ssePixCount; i += 8)
|
||||||
{
|
{
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
|
@ -1231,7 +1229,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; i < pixCount; i++)
|
for (; i < this->_framebufferPixCount; i++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||||
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||||
|
@ -1242,12 +1240,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (dstFramebufferMain != NULL)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1256,7 +1254,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8);
|
||||||
for (; i < ssePixCount; i += 8)
|
for (; i < ssePixCount; i += 8)
|
||||||
{
|
{
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
|
@ -1269,7 +1267,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; i < pixCount; i++)
|
for (; i < this->_framebufferPixCount; i++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]);
|
dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]);
|
||||||
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||||
|
@ -1280,12 +1278,12 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Fragmen
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (dstFramebufferMain != NULL)
|
||||||
{
|
{
|
||||||
ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4891,6 +4889,7 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
|
||||||
this->_framebufferWidth = w;
|
this->_framebufferWidth = w;
|
||||||
this->_framebufferHeight = h;
|
this->_framebufferHeight = h;
|
||||||
|
this->_framebufferPixCount = w * h;
|
||||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||||
|
|
||||||
if (this->isPBOSupported)
|
if (this->isPBOSupported)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006 yopyop
|
Copyright (C) 2006 yopyop
|
||||||
Copyright (C) 2006-2007 shash
|
Copyright (C) 2006-2007 shash
|
||||||
Copyright (C) 2008-2017 DeSmuME team
|
Copyright (C) 2008-2018 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -2064,6 +2064,7 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
|
||||||
this->_framebufferWidth = w;
|
this->_framebufferWidth = w;
|
||||||
this->_framebufferHeight = h;
|
this->_framebufferHeight = h;
|
||||||
|
this->_framebufferPixCount = w * h;
|
||||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||||
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.
|
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2009-2017 DeSmuME team
|
Copyright (C) 2009-2018 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -21,10 +21,20 @@
|
||||||
#include "render3D.h"
|
#include "render3D.h"
|
||||||
#include "gfx3d.h"
|
#include "gfx3d.h"
|
||||||
|
|
||||||
|
#define SOFTRASTERIZER_MAX_THREADS 32
|
||||||
|
|
||||||
extern GPU3DInterface gpu3DRasterize;
|
extern GPU3DInterface gpu3DRasterize;
|
||||||
|
|
||||||
|
class Task;
|
||||||
class SoftRasterizerRenderer;
|
class SoftRasterizerRenderer;
|
||||||
|
struct edge_fx_fl;
|
||||||
|
|
||||||
|
struct SoftRasterizerClearParam
|
||||||
|
{
|
||||||
|
SoftRasterizerRenderer *renderer;
|
||||||
|
size_t startPixel;
|
||||||
|
size_t endPixel;
|
||||||
|
};
|
||||||
|
|
||||||
struct SoftRasterizerPostProcessParams
|
struct SoftRasterizerPostProcessParams
|
||||||
{
|
{
|
||||||
|
@ -82,6 +92,39 @@ public:
|
||||||
void SetScalingFactor(size_t scalingFactor);
|
void SetScalingFactor(size_t scalingFactor);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <bool RENDERER>
|
||||||
|
class RasterizerUnit
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
bool _debug_thisPoly;
|
||||||
|
u32 _SLI_Mask;
|
||||||
|
u32 _SLI_Value;
|
||||||
|
|
||||||
|
SoftRasterizerRenderer *_softRender;
|
||||||
|
SoftRasterizerTexture *_currentTexture;
|
||||||
|
VERT *_verts[MAX_CLIPPED_VERTS];
|
||||||
|
size_t _polynum;
|
||||||
|
u8 _textureWrapMode;
|
||||||
|
|
||||||
|
Render3DError _SetupTexture(const POLY &thePoly, size_t polyRenderIndex);
|
||||||
|
FORCEINLINE FragmentColor _sample(const float u, const float v);
|
||||||
|
FORCEINLINE float _round_s(double val);
|
||||||
|
|
||||||
|
template<bool ISSHADOWPOLYGON> FORCEINLINE void _shade(const PolygonMode polygonMode, const FragmentColor src, FragmentColor &dst, const float texCoordU, const float texCoordV);
|
||||||
|
template<bool ISSHADOWPOLYGON> FORCEINLINE void _pixel(const POLYGON_ATTR polyAttr, const bool isTranslucent, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z);
|
||||||
|
template<bool ISSHADOWPOLYGON, bool USELINEHACK> FORCEINLINE void _drawscanline(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *pLeft, edge_fx_fl *pRight);
|
||||||
|
template<bool SLI, bool ISSHADOWPOLYGON, bool USELINEHACK, bool ISHORIZONTAL> void _runscanlines(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, edge_fx_fl *left, edge_fx_fl *right);
|
||||||
|
|
||||||
|
template<int TYPE> FORCEINLINE void _rot_verts();
|
||||||
|
template<bool ISBACKWARDS, int TYPE> void _sort_verts();
|
||||||
|
template<bool SLI, bool ISBACKWARDS, bool ISSHADOWPOLYGON, bool USELINEHACK> void _shape_engine(const POLYGON_ATTR polyAttr, const bool isTranslucent, FragmentColor *dstColor, const size_t framebufferWidth, const size_t framebufferHeight, int type);
|
||||||
|
|
||||||
|
public:
|
||||||
|
void SetSLI(u32 value, u32 mask, bool debug);
|
||||||
|
void SetRenderer(SoftRasterizerRenderer *theRenderer);
|
||||||
|
template<bool SLI, bool USELINEHACK> FORCEINLINE void Render();
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(ENABLE_SSE2)
|
#if defined(ENABLE_SSE2)
|
||||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||||
#else
|
#else
|
||||||
|
@ -89,12 +132,27 @@ class SoftRasterizerRenderer : public Render3D
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
Task *_task;
|
||||||
|
SoftRasterizerClearParam _threadClearParam[SOFTRASTERIZER_MAX_THREADS];
|
||||||
|
SoftRasterizerPostProcessParams _threadPostprocessParam[SOFTRASTERIZER_MAX_THREADS];
|
||||||
|
|
||||||
|
RasterizerUnit<true> _rasterizerUnit[SOFTRASTERIZER_MAX_THREADS];
|
||||||
|
RasterizerUnit<false> _HACK_viewer_rasterizerUnit;
|
||||||
|
|
||||||
|
size_t _threadCount;
|
||||||
|
size_t _nativeLinesPerThread;
|
||||||
|
size_t _nativePixelsPerThread;
|
||||||
|
size_t _customLinesPerThread;
|
||||||
|
size_t _customPixelsPerThread;
|
||||||
|
|
||||||
|
FragmentColor _clearColor6665;
|
||||||
|
FragmentAttributes _clearAttributes;
|
||||||
|
|
||||||
GFX3D_Clipper clipper;
|
GFX3D_Clipper clipper;
|
||||||
u8 fogTable[32768];
|
u8 fogTable[32768];
|
||||||
FragmentColor edgeMarkTable[8];
|
FragmentColor edgeMarkTable[8];
|
||||||
bool edgeMarkDisabled[8];
|
bool edgeMarkDisabled[8];
|
||||||
|
|
||||||
bool _stateSetupNeedsFinish;
|
|
||||||
bool _renderGeometryNeedsFinish;
|
bool _renderGeometryNeedsFinish;
|
||||||
|
|
||||||
bool _enableHighPrecisionColorInterpolation;
|
bool _enableHighPrecisionColorInterpolation;
|
||||||
|
@ -124,7 +182,6 @@ public:
|
||||||
bool polyVisible[POLYLIST_SIZE];
|
bool polyVisible[POLYLIST_SIZE];
|
||||||
bool polyBackfacing[POLYLIST_SIZE];
|
bool polyBackfacing[POLYLIST_SIZE];
|
||||||
GFX3D_State *currentRenderState;
|
GFX3D_State *currentRenderState;
|
||||||
SoftRasterizerPostProcessParams *postprocessParam;
|
|
||||||
|
|
||||||
bool _enableFragmentSamplingHack;
|
bool _enableFragmentSamplingHack;
|
||||||
|
|
||||||
|
@ -148,6 +205,7 @@ public:
|
||||||
virtual Render3DError Render(const GFX3D &engine);
|
virtual Render3DError Render(const GFX3D &engine);
|
||||||
virtual Render3DError RenderFinish();
|
virtual Render3DError RenderFinish();
|
||||||
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
|
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
|
||||||
|
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
|
||||||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -155,7 +213,22 @@ public:
|
||||||
|
|
||||||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
|
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
|
||||||
{
|
{
|
||||||
|
protected:
|
||||||
|
v128u32 _clearColor_v128u32;
|
||||||
|
v128u32 _clearDepth_v128u32;
|
||||||
|
v128u8 _clearAttrOpaquePolyID_v128u8;
|
||||||
|
v128u8 _clearAttrTranslucentPolyID_v128u8;
|
||||||
|
v128u8 _clearAttrStencil_v128u8;
|
||||||
|
v128u8 _clearAttrIsFogged_v128u8;
|
||||||
|
v128u8 _clearAttrIsTranslucentPoly_v128u8;
|
||||||
|
|
||||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor6665, const FragmentAttributes &clearAttributes);
|
||||||
|
|
||||||
|
public:
|
||||||
|
SoftRasterizerRenderer_SSE2();
|
||||||
|
|
||||||
|
virtual void ClearUsingValuesLoop(const size_t startPixel, const size_t endPixel);
|
||||||
|
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006-2007 shash
|
Copyright (C) 2006-2007 shash
|
||||||
Copyright (C) 2008-2017 DeSmuME team
|
Copyright (C) 2008-2018 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -244,6 +244,8 @@ Render3D::Render3D()
|
||||||
|
|
||||||
_framebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
_framebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||||
_framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
_framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||||
|
_framebufferPixCount = _framebufferWidth * _framebufferHeight;
|
||||||
|
_framebufferSIMDPixCount = 0;
|
||||||
_framebufferColorSizeBytes = 0;
|
_framebufferColorSizeBytes = 0;
|
||||||
_framebufferColor = NULL;
|
_framebufferColor = NULL;
|
||||||
|
|
||||||
|
@ -332,6 +334,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
|
||||||
this->_framebufferWidth = w;
|
this->_framebufferWidth = w;
|
||||||
this->_framebufferHeight = h;
|
this->_framebufferHeight = h;
|
||||||
|
this->_framebufferPixCount = w * h;
|
||||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||||
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine
|
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine
|
||||||
|
|
||||||
|
@ -478,22 +481,20 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
|
||||||
|
|
||||||
if (dstFramebufferMain != NULL)
|
if (dstFramebufferMain != NULL)
|
||||||
{
|
{
|
||||||
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
if ( (this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev) )
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
|
||||||
}
|
}
|
||||||
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
else if ( (this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev) )
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount);
|
||||||
}
|
}
|
||||||
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
else if ( ((this->_internalRenderingFormat == NDSColorFormat_BGR666_Rev) && (this->_outputFormat == NDSColorFormat_BGR666_Rev)) ||
|
||||||
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
((this->_internalRenderingFormat == NDSColorFormat_BGR888_Rev) && (this->_outputFormat == NDSColorFormat_BGR888_Rev)) )
|
||||||
{
|
{
|
||||||
memcpy(dstFramebufferMain, srcFramebuffer, pixCount * sizeof(FragmentColor));
|
memcpy(dstFramebufferMain, srcFramebuffer, this->_framebufferPixCount * sizeof(FragmentColor));
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
|
@ -503,11 +504,11 @@ Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFram
|
||||||
{
|
{
|
||||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer6665To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
|
||||||
}
|
}
|
||||||
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
else if (this ->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
|
@ -687,6 +688,42 @@ Render3DError Render3D::VramReconfigureSignal()
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Render3D_SIMD128::Render3D_SIMD128()
|
||||||
|
{
|
||||||
|
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
Render3DError Render3D_SIMD128::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
{
|
||||||
|
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
|
||||||
|
if (error != RENDER3DERROR_NOERR)
|
||||||
|
{
|
||||||
|
return RENDER3DERROR_NOERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 16);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
Render3D_SIMD256::Render3D_SIMD256()
|
||||||
|
{
|
||||||
|
_framebufferSIMDPixCount = _framebufferPixCount - (_framebufferPixCount % 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
Render3DError Render3D_SIMD256::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
{
|
||||||
|
Render3DError error = this->Render3D::SetFramebufferSize(w, h);
|
||||||
|
if (error != RENDER3DERROR_NOERR)
|
||||||
|
{
|
||||||
|
return RENDER3DERROR_NOERR;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_framebufferSIMDPixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 32);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
|
|
||||||
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
Copyright (C) 2006-2007 shash
|
Copyright (C) 2006-2007 shash
|
||||||
Copyright (C) 2007-2017 DeSmuME team
|
Copyright (C) 2007-2018 DeSmuME team
|
||||||
|
|
||||||
This file is free software: you can redistribute it and/or modify
|
This file is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -144,6 +144,8 @@ protected:
|
||||||
|
|
||||||
size_t _framebufferWidth;
|
size_t _framebufferWidth;
|
||||||
size_t _framebufferHeight;
|
size_t _framebufferHeight;
|
||||||
|
size_t _framebufferPixCount;
|
||||||
|
size_t _framebufferSIMDPixCount;
|
||||||
size_t _framebufferColorSizeBytes;
|
size_t _framebufferColorSizeBytes;
|
||||||
FragmentColor *_framebufferColor;
|
FragmentColor *_framebufferColor;
|
||||||
|
|
||||||
|
@ -246,9 +248,25 @@ public:
|
||||||
Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const;
|
Render3DTexture* GetTextureByPolygonRenderIndex(size_t polyRenderIndex) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Render3D_SIMD128 : public Render3D
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Render3D_SIMD128();
|
||||||
|
|
||||||
|
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||||
|
};
|
||||||
|
|
||||||
|
class Render3D_SIMD256 : public Render3D
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Render3D_SIMD256();
|
||||||
|
|
||||||
|
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
|
|
||||||
class Render3D_SSE2 : public Render3D
|
class Render3D_SSE2 : public Render3D_SIMD128
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||||
|
|
Loading…
Reference in New Issue