OpenGL Renderer: _FlushFramebufferFlipAndConvertOnCPU() now handles all 4 possible permutations of frame buffer flipping and color conversion.
- Also remove OpenGLRenderer_2_1::ReadBackPixels().
This commit is contained in:
parent
8c37d4acd6
commit
d08dffd122
|
@ -1153,8 +1153,9 @@ void OpenGLRenderer::SetVersion(unsigned int major, unsigned int minor, unsigned
|
||||||
this->versionRevision = revision;
|
this->versionRevision = revision;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool SWAP_RB>
|
Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer,
|
||||||
Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16)
|
FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16,
|
||||||
|
bool doFramebufferFlip, bool doFramebufferConvert)
|
||||||
{
|
{
|
||||||
if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) )
|
if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) )
|
||||||
{
|
{
|
||||||
|
@ -1167,11 +1168,11 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|
||||||
if (this->willFlipFramebufferOnGPU)
|
if (!doFramebufferFlip)
|
||||||
{
|
{
|
||||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||||
|
|
||||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
if (!doFramebufferConvert)
|
||||||
{
|
{
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
|
@ -1182,17 +1183,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||||
|
|
||||||
_mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(srcColorLo) );
|
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2<false>(srcColorLo));
|
||||||
_mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(srcColorHi) );
|
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2<false>(srcColorHi));
|
||||||
_mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(srcColorLo, srcColorHi) );
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2<false>(srcColorLo, srcColorHi) );
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; i < pixCount; i++)
|
for (; i < pixCount; i++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<SWAP_RB>(srcFramebuffer[i]);
|
dstFramebufferMain[i].color = ColorspaceCopy32<false>(srcFramebuffer[i]);
|
||||||
dstFramebuffer16[i] = ColorspaceConvert8888To5551<SWAP_RB>(srcFramebuffer[i]);
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<false>(srcFramebuffer[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
|
@ -1200,51 +1201,92 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (dstFramebufferMain != NULL)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To6665<SWAP_RB, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<SWAP_RB, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
else
|
||||||
{
|
{
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
|
||||||
for (; i < ssePixCount; i += 8)
|
|
||||||
{
|
{
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
#ifdef ENABLE_SSE2
|
||||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||||
|
for (; i < ssePixCount; i += 8)
|
||||||
|
{
|
||||||
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2<SWAP_RB>(srcColorLo));
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||||
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2<SWAP_RB>(srcColorHi));
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||||
_mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(srcColorLo, srcColorHi) );
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; i < pixCount; i++)
|
for (; i < pixCount; i++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<SWAP_RB>(srcFramebuffer[i]);
|
dstFramebufferMain[i].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[i]);
|
||||||
dstFramebuffer16[i] = ColorspaceConvert8888To5551<SWAP_RB>(srcFramebuffer[i]);
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
|
else if (dstFramebufferMain != NULL)
|
||||||
|
{
|
||||||
|
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
||||||
|
this->_renderNeedsFlushMain = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
||||||
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||||
{
|
{
|
||||||
ColorspaceCopyBuffer32<SWAP_RB, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
this->_renderNeedsFlushMain = false;
|
{
|
||||||
}
|
#ifdef ENABLE_SSE2
|
||||||
else
|
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||||
{
|
for (; i < ssePixCount; i += 8)
|
||||||
ColorspaceConvertBuffer8888To5551<SWAP_RB, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
{
|
||||||
this->_renderNeedsFlush16 = false;
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0));
|
||||||
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4));
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2<true>(srcColorLo));
|
||||||
|
_mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2<true>(srcColorHi));
|
||||||
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
|
#endif
|
||||||
|
for (; i < pixCount; i++)
|
||||||
|
{
|
||||||
|
dstFramebufferMain[i].color = ColorspaceCopy32<true>(srcFramebuffer[i]);
|
||||||
|
dstFramebuffer16[i] = ColorspaceConvert8888To5551<true>(srcFramebuffer[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_renderNeedsFlushMain = false;
|
||||||
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
|
else if (dstFramebufferMain != NULL)
|
||||||
|
{
|
||||||
|
ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, pixCount);
|
||||||
|
this->_renderNeedsFlushMain = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer, dstFramebuffer16, pixCount);
|
||||||
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1252,7 +1294,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
{
|
{
|
||||||
const size_t pixCount = this->_framebufferWidth;
|
const size_t pixCount = this->_framebufferWidth;
|
||||||
|
|
||||||
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
if (!doFramebufferConvert)
|
||||||
{
|
{
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
|
@ -1266,17 +1308,17 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||||
|
|
||||||
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(srcColorLo) );
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2<false>(srcColorLo) );
|
||||||
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceConvert8888To6665_SSE2<SWAP_RB>(srcColorHi) );
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2<false>(srcColorHi) );
|
||||||
_mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(srcColorLo, srcColorHi) );
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2<false>(srcColorLo, srcColorHi) );
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; x < pixCount; x++, ir++, iw++)
|
for (; x < pixCount; x++, ir++, iw++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[iw].color = ColorspaceConvert8888To6665<SWAP_RB>(srcFramebuffer[ir]);
|
dstFramebufferMain[iw].color = ColorspaceCopy32<false>(srcFramebuffer[ir]);
|
||||||
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<SWAP_RB>(srcFramebuffer[ir]);
|
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<false>(srcFramebuffer[ir]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1287,7 +1329,7 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
{
|
{
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To6665<SWAP_RB, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount);
|
ColorspaceCopyBuffer32<false, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
|
@ -1296,60 +1338,113 @@ Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor
|
||||||
{
|
{
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<SWAP_RB, false>((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount);
|
ColorspaceConvertBuffer8888To5551<false, false>((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
else
|
||||||
{
|
{
|
||||||
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
|
||||||
{
|
{
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
{
|
{
|
||||||
size_t x = 0;
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
#ifdef ENABLE_SSE2
|
|
||||||
const size_t ssePixCount = pixCount - (pixCount % 8);
|
|
||||||
for (; x < ssePixCount; x += 8, ir += 8, iw += 8)
|
|
||||||
{
|
{
|
||||||
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
size_t x = 0;
|
||||||
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
#ifdef ENABLE_SSE2
|
||||||
|
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||||
|
for (; x < ssePixCount; x += 8, ir += 8, iw += 8)
|
||||||
|
{
|
||||||
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||||
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||||
|
|
||||||
_mm_store_si128((__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2<SWAP_RB>(srcColorLo));
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceConvert8888To6665_SSE2<true>(srcColorLo) );
|
||||||
_mm_store_si128((__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2<SWAP_RB>(srcColorHi));
|
_mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceConvert8888To6665_SSE2<true>(srcColorHi) );
|
||||||
_mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2<SWAP_RB>(srcColorLo, srcColorHi) );
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma LOOPVECTORIZE_DISABLE
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
#endif
|
#endif
|
||||||
for (; x < pixCount; x++, ir++, iw++)
|
for (; x < pixCount; x++, ir++, iw++)
|
||||||
{
|
{
|
||||||
dstFramebufferMain[iw] = srcFramebuffer[ir];
|
dstFramebufferMain[iw].color = ColorspaceConvert8888To6665<true>(srcFramebuffer[ir]);
|
||||||
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<SWAP_RB>(srcFramebuffer[ir]);
|
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
}
|
}
|
||||||
else if (dstFramebufferMain != NULL)
|
else if (dstFramebufferMain != NULL)
|
||||||
{
|
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
|
||||||
{
|
{
|
||||||
ColorspaceCopyBuffer32<SWAP_RB, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount);
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
}
|
{
|
||||||
|
ColorspaceConvertBuffer8888To6665<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount);
|
||||||
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlushMain = false;
|
this->_renderNeedsFlushMain = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
|
||||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
|
||||||
{
|
{
|
||||||
ColorspaceConvertBuffer8888To5551<SWAP_RB, false>((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount);
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
}
|
{
|
||||||
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount);
|
||||||
|
}
|
||||||
|
|
||||||
this->_renderNeedsFlush16 = false;
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (this->_outputFormat == NDSColorFormat_BGR888_Rev)
|
||||||
|
{
|
||||||
|
if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) )
|
||||||
|
{
|
||||||
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
|
{
|
||||||
|
size_t x = 0;
|
||||||
|
#ifdef ENABLE_SSE2
|
||||||
|
const size_t ssePixCount = pixCount - (pixCount % 8);
|
||||||
|
for (; x < ssePixCount; x += 8, ir += 8, iw += 8)
|
||||||
|
{
|
||||||
|
const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0));
|
||||||
|
const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4));
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2<true>(srcColorLo));
|
||||||
|
_mm_store_si128((__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2<true>(srcColorHi));
|
||||||
|
_mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2<true>(srcColorLo, srcColorHi) );
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma LOOPVECTORIZE_DISABLE
|
||||||
|
#endif
|
||||||
|
for (; x < pixCount; x++, ir++, iw++)
|
||||||
|
{
|
||||||
|
dstFramebufferMain[iw].color = ColorspaceCopy32<true>(srcFramebuffer[ir]);
|
||||||
|
dstFramebuffer16[iw] = ColorspaceConvert8888To5551<true>(srcFramebuffer[ir]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_renderNeedsFlushMain = false;
|
||||||
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
|
else if (dstFramebufferMain != NULL)
|
||||||
|
{
|
||||||
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
|
{
|
||||||
|
ColorspaceCopyBuffer32<true, false>((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_renderNeedsFlushMain = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth)
|
||||||
|
{
|
||||||
|
ColorspaceConvertBuffer8888To5551<true, false>((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_renderNeedsFlush16 = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1366,14 +1461,9 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (this->willConvertFramebufferOnGPU)
|
this->_FlushFramebufferFlipAndConvertOnCPU(srcFramebuffer,
|
||||||
{
|
dstFramebufferMain, dstFramebuffer16,
|
||||||
return this->_FlushFramebufferConvertOnCPU<false>(srcFramebuffer, dstFramebufferMain, dstFramebuffer16);
|
!this->willFlipFramebufferOnGPU, !this->willConvertFramebufferOnGPU);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return this->_FlushFramebufferConvertOnCPU<true>(srcFramebuffer, dstFramebufferMain, dstFramebuffer16);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return RENDER3DERROR_NOERR;
|
return RENDER3DERROR_NOERR;
|
||||||
|
@ -2094,7 +2184,15 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs()
|
||||||
return OGLERROR_FBO_CREATE_ERROR;
|
return OGLERROR_FBO_CREATE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
glDrawBuffers(4, RenderDrawList);
|
if (this->isShaderSupported)
|
||||||
|
{
|
||||||
|
glDrawBuffers(4, RenderDrawList);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
}
|
||||||
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
||||||
|
@ -2137,7 +2235,15 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs()
|
||||||
return OGLERROR_FBO_CREATE_ERROR;
|
return OGLERROR_FBO_CREATE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
glDrawBuffers(4, RenderDrawList);
|
if (this->isShaderSupported)
|
||||||
|
{
|
||||||
|
glDrawBuffers(4, RenderDrawList);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
}
|
||||||
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID);
|
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID);
|
||||||
|
@ -3101,6 +3207,8 @@ Render3DError OpenGLRenderer_1_2::DownsampleFBO()
|
||||||
// Blit the color buffer
|
// Blit the color buffer
|
||||||
glBlitFramebufferEXT(0, 0, this->_framebufferWidth, this->_framebufferHeight, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
glBlitFramebufferEXT(0, 0, this->_framebufferWidth, this->_framebufferHeight, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
||||||
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3182,13 +3290,6 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels()
|
||||||
// penalty if the readback is in any other format.
|
// penalty if the readback is in any other format.
|
||||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0);
|
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0);
|
||||||
|
|
||||||
// Set the read and draw target buffers back to color attachment 0, which is always the default.
|
|
||||||
if (this->willFlipFramebufferOnGPU || this->willConvertFramebufferOnGPU)
|
|
||||||
{
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
|
||||||
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = true;
|
this->_pixelReadNeedsFinish = true;
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
@ -3699,6 +3800,8 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf
|
||||||
{
|
{
|
||||||
glBlitFramebufferEXT(0, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GPU_FRAMEBUFFER_NATIVE_WIDTH, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
glBlitFramebufferEXT(0, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GPU_FRAMEBUFFER_NATIVE_WIDTH, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
||||||
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->isMultisampledFBOSupported)
|
if (this->isMultisampledFBOSupported)
|
||||||
|
@ -3742,6 +3845,8 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf
|
||||||
// Blit the color and depth buffers.
|
// Blit the color and depth buffers.
|
||||||
glBlitFramebufferEXT(0, 0, this->_framebufferWidth, this->_framebufferHeight, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
glBlitFramebufferEXT(0, 0, this->_framebufferWidth, this->_framebufferHeight, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO);
|
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO);
|
||||||
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3783,6 +3888,12 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearCol
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (this->isFBOSupported)
|
||||||
|
{
|
||||||
|
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||||
|
}
|
||||||
|
|
||||||
glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]);
|
glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]);
|
||||||
glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF);
|
glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF);
|
||||||
glClearStencil(clearAttributes.opaquePolyID);
|
glClearStencil(clearAttributes.opaquePolyID);
|
||||||
|
@ -4434,86 +4545,6 @@ Render3DError OpenGLRenderer_2_0::SetupTexture(const POLY &thePoly, size_t polyR
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
Render3DError OpenGLRenderer_2_1::ReadBackPixels()
|
|
||||||
{
|
|
||||||
OGLRenderRef &OGLRef = *this->ref;
|
|
||||||
|
|
||||||
if (this->_mappedFramebuffer != NULL)
|
|
||||||
{
|
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
|
||||||
this->_mappedFramebuffer = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flip the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware.
|
|
||||||
if (this->willFlipFramebufferOnGPU)
|
|
||||||
{
|
|
||||||
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID);
|
|
||||||
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
|
|
||||||
glBlitFramebufferEXT(0, this->_framebufferHeight, this->_framebufferWidth, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
|
||||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID);
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this->willConvertFramebufferOnGPU)
|
|
||||||
{
|
|
||||||
// Perform the color space conversion while we're still on the GPU so
|
|
||||||
// that we can avoid having to do it on the CPU.
|
|
||||||
const GLuint convertProgramID = (this->_outputFormat == NDSColorFormat_BGR666_Rev) ? OGLRef.programFramebufferRGBA6665OutputID : OGLRef.programFramebufferRGBA8888OutputID;
|
|
||||||
glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT);
|
|
||||||
|
|
||||||
glUseProgram(convertProgramID);
|
|
||||||
glViewport(0, 0, this->_framebufferWidth, this->_framebufferHeight);
|
|
||||||
glDisable(GL_DEPTH_TEST);
|
|
||||||
glDisable(GL_STENCIL_TEST);
|
|
||||||
glDisable(GL_BLEND);
|
|
||||||
glDisable(GL_CULL_FACE);
|
|
||||||
|
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID);
|
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboPostprocessIndexID);
|
|
||||||
|
|
||||||
if (this->isVAOSupported)
|
|
||||||
{
|
|
||||||
glBindVertexArray(OGLRef.vaoPostprocessStatesID);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
glEnableVertexAttribArray(OGLVertexAttributeID_Position);
|
|
||||||
glEnableVertexAttribArray(OGLVertexAttributeID_TexCoord0);
|
|
||||||
glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_FLOAT, GL_FALSE, 0, 0);
|
|
||||||
glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, (const GLvoid *)(sizeof(GLfloat) * 8));
|
|
||||||
}
|
|
||||||
|
|
||||||
glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0);
|
|
||||||
|
|
||||||
if (this->isVAOSupported)
|
|
||||||
{
|
|
||||||
glBindVertexArray(0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
glDisableVertexAttribArray(OGLVertexAttributeID_Position);
|
|
||||||
glDisableVertexAttribArray(OGLVertexAttributeID_TexCoord0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read back the pixels.
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT2_EXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read back the pixels in BGRA format, since legacy OpenGL devices may experience a performance
|
|
||||||
// penalty if the readback is in any other format.
|
|
||||||
glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0);
|
|
||||||
|
|
||||||
// Set the read and draw target buffers back to color attachment 0, which is always the default.
|
|
||||||
if (this->willFlipFramebufferOnGPU || this->willConvertFramebufferOnGPU)
|
|
||||||
{
|
|
||||||
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
|
||||||
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = true;
|
|
||||||
return OGLERROR_NOERR;
|
|
||||||
}
|
|
||||||
|
|
||||||
Render3DError OpenGLRenderer_2_1::RenderFinish()
|
Render3DError OpenGLRenderer_2_1::RenderFinish()
|
||||||
{
|
{
|
||||||
if (!this->_renderNeedsFinish || !this->_pixelReadNeedsFinish)
|
if (!this->_renderNeedsFinish || !this->_pixelReadNeedsFinish)
|
||||||
|
|
|
@ -600,7 +600,9 @@ private:
|
||||||
unsigned int versionRevision;
|
unsigned int versionRevision;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template<bool SWAP_RB> Render3DError _FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16);
|
Render3DError _FlushFramebufferFlipAndConvertOnCPU(const FragmentColor *__restrict srcFramebuffer,
|
||||||
|
FragmentColor *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16,
|
||||||
|
bool doFramebufferFlip, bool doFramebufferConvert);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// OpenGL-specific References
|
// OpenGL-specific References
|
||||||
|
@ -783,9 +785,6 @@ protected:
|
||||||
|
|
||||||
class OpenGLRenderer_2_1 : public OpenGLRenderer_2_0
|
class OpenGLRenderer_2_1 : public OpenGLRenderer_2_0
|
||||||
{
|
{
|
||||||
protected:
|
|
||||||
virtual Render3DError ReadBackPixels();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual Render3DError RenderFinish();
|
virtual Render3DError RenderFinish();
|
||||||
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
|
virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16);
|
||||||
|
|
Loading…
Reference in New Issue