From 60385bd09951f0770887bff8b18332d83536e0a4 Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 18 Jul 2024 21:45:00 -0700 Subject: [PATCH] OpenGL Renderer: Rework how the output framebuffers work. - The output framebuffers now bind their own FBOs rather than changing draw targets with glDrawBuffer(). - Rework the general FBO management. - Legacy OpenGL now outputs native RGBA color if FBOs are supported. This should give a minor performance increase on older GPUs. - The fixed-function pipeline can now flip the framebuffer on GPU. This greatly reduces the CPU usage when doing the final color conversion and gives a significant performance increase on ancient GPUs. --- desmume/src/OGLRender.cpp | 592 +++++++++++++--------------------- desmume/src/OGLRender.h | 10 +- desmume/src/OGLRender_3_2.cpp | 106 +++--- desmume/src/OGLRender_ES3.cpp | 1 - 4 files changed, 294 insertions(+), 415 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index f1409f00b..830e8d580 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -692,8 +692,8 @@ void main()\n\ }\n\ "}; -// Fragment shader for the final RGBA6665 formatted framebuffer, GLSL 1.00 -static const char *FramebufferOutputRGBA6665FragShader_100 = {"\ +// Fragment shader for the final BGRA6665 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputBGRA6665FragShader_100 = {"\ varying vec2 texCoord;\n\ \n\ uniform sampler2D texInFragColor;\n\ @@ -711,8 +711,8 @@ void main()\n\ }\n\ "}; -// Fragment shader for the final RGBA8888 formatted framebuffer, GLSL 1.00 -static const char *FramebufferOutputRGBA8888FragShader_100 = {"\ +// Fragment shader for the final BGRA8888 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputBGRA8888FragShader_100 = {"\ varying vec2 texCoord;\n\ \n\ uniform sampler2D texInFragColor;\n\ @@ -725,6 +725,35 @@ void main()\n\ }\n\ "}; +// Fragment shader for the final RGBA6665 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputRGBA6665FragShader_100 = {"\ +varying vec2 texCoord;\n\ +\n\ +uniform sampler2D texInFragColor;\n\ +\n\ +void main()\n\ +{\n\ + vec4 colorRGBA6665 = texture2D(texInFragColor, texCoord);\n\ + colorRGBA6665 = floor((colorRGBA6665 * 255.0) + 0.5);\n\ + colorRGBA6665.rgb = floor(colorRGBA6665.rgb / 4.0);\n\ + colorRGBA6665.a = floor(colorRGBA6665.a / 8.0);\n\ + \n\ + gl_FragColor = (colorRGBA6665 / 255.0);\n\ +}\n\ +"}; + +// Fragment shader for the final RGBA8888 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputRGBA8888FragShader_100 = {"\ +varying vec2 texCoord;\n\ +\n\ +uniform sampler2D texInFragColor;\n\ +\n\ +void main()\n\ +{\n\ + gl_FragColor = texture2D(texInFragColor, texCoord);\n\ +}\n\ +"}; + bool IsOpenGLDriverVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) { bool result = false; @@ -1273,7 +1302,6 @@ OpenGLRenderer::OpenGLRenderer() isShaderSupported = false; isVAOSupported = false; _isDepthLEqualPolygonFacingSupported = false; - willFlipOnlyFramebufferOnGPU = false; willFlipAndConvertFramebufferOnGPU = false; willUsePerSampleZeroDstPass = false; @@ -1476,297 +1504,92 @@ bool OpenGLRenderer::IsVersionSupported(unsigned int checkVersionMajor, unsigned return result; } -Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, - Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, - bool doFramebufferFlip, bool doFramebufferConvert) +template +Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, + bool doFramebufferConvert) { if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) ) { return RENDER3DERROR_NOERR; } - // Convert from 32-bit BGRA8888 format to 32-bit RGBA6665 reversed format. OpenGL - // stores pixels using a flipped Y-coordinate, so this needs to be flipped back - // to the DS Y-coordinate. - size_t i = 0; - if (!doFramebufferFlip) + if ( !doFramebufferConvert || (this->_outputFormat == NDSColorFormat_BGR888_Rev) ) { - if (!doFramebufferConvert) + if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { #ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } + const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); + for (; i < ssePixCount; i += 8) + { + const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); + const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); + _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); + _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); + _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); + } + #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) + for (; i < this->_framebufferPixCount; i++) { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; + dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); + dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } + + this->_renderNeedsFlushMain = false; + this->_renderNeedsFlush16 = false; + } + else if (dstFramebufferMain != NULL) + { + ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); + this->_renderNeedsFlushMain = false; } else { - if (this->_outputFormat == NDSColorFormat_BGR666_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { -#ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceConvert8888To6665(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; - } - } - else if (this->_outputFormat == NDSColorFormat_BGR888_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { -#ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; - } - } + ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); + this->_renderNeedsFlush16 = false; } } - else // In the case where OpenGL couldn't flip the framebuffer on the GPU, we'll instead need to flip the framebuffer during conversion. + else if (this->_outputFormat == NDSColorFormat_BGR666_Rev) { - const size_t pixCount = this->_framebufferWidth; - - if (!doFramebufferConvert) + if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; #ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - + const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); + for (; i < ssePixCount; i += 8) + { + const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); + const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); + + _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); + _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); + _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); + } + #pragma LOOPVECTORIZE_DISABLE #endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) + for (; i < this->_framebufferPixCount; i++) { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; + dstFramebufferMain[i].value = ColorspaceConvert8888To6665(srcFramebuffer[i]); + dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } + + this->_renderNeedsFlushMain = false; + this->_renderNeedsFlush16 = false; + } + else if (dstFramebufferMain != NULL) + { + ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); + this->_renderNeedsFlushMain = false; } else { - if (this->_outputFormat == NDSColorFormat_BGR666_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceConvert8888To6665(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; - } - } - else if (this->_outputFormat == NDSColorFormat_BGR888_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; - } - } + ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); + this->_renderNeedsFlush16 = false; } } @@ -1775,6 +1598,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) { + OGLRenderRef &OGLRef = *this->ref; + if (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) { this->_renderNeedsFlushMain = false; @@ -1782,9 +1607,18 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFra } else { - return this->_FlushFramebufferFlipAndConvertOnCPU(srcFramebuffer, - dstFramebufferMain, dstFramebuffer16, - !this->willFlipOnlyFramebufferOnGPU, !this->willFlipAndConvertFramebufferOnGPU); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + return this->_FlushFramebufferConvertOnCPU(srcFramebuffer, + dstFramebufferMain, dstFramebuffer16, + !this->willFlipAndConvertFramebufferOnGPU); + } + else + { + return this->_FlushFramebufferConvertOnCPU(srcFramebuffer, + dstFramebufferMain, dstFramebuffer16, + !this->willFlipAndConvertFramebufferOnGPU); + } } return RENDER3DERROR_NOERR; @@ -2544,10 +2378,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL); this->_deviceInfo.maxAnisotropy = maxAnisotropyOGL; - // This is traditionally the fastest format and data type for glReadPixels in legacy mode. - OGLRef.readPixelsBestFormat = GL_BGRA; - OGLRef.readPixelsBestDataType = GL_UNSIGNED_BYTE; - // Need to generate this texture first because FBO creation needs it. // This texture is only required by shaders, and so if shader creation // fails, then we can immediately delete this texture if an error occurs. @@ -2613,6 +2443,12 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() INFO("OpenGL: FBOs are unsupported. Some emulation features will be disabled.\n"); } + // The internal format of FBOs is GL_RGBA, so we will match that format for glReadPixels. + // But the traditional format before FBOs was GL_BGRA, which is also the fastest format + // for glReadPixels when using legacy back buffers. + OGLRef.readPixelsBestFormat = (this->isFBOSupported) ? GL_RGBA : GL_BGRA; + OGLRef.readPixelsBestDataType = GL_UNSIGNED_BYTE; + this->_isFBOBlitSupported = this->isFBOSupported && this->IsExtensionPresent(&oglExtensionSet, "GL_EXT_framebuffer_blit"); if (!this->_isFBOBlitSupported) @@ -2698,11 +2534,22 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() { INFO("OpenGL: Successfully created geometry shaders.\n"); - error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, - EdgeMarkFragShader_100, - FramebufferOutputVtxShader_100, - FramebufferOutputRGBA6665FragShader_100, - FramebufferOutputRGBA8888FragShader_100); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, + EdgeMarkFragShader_100, + FramebufferOutputVtxShader_100, + FramebufferOutputBGRA6665FragShader_100, + FramebufferOutputBGRA8888FragShader_100); + } + else + { + error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, + EdgeMarkFragShader_100, + FramebufferOutputVtxShader_100, + FramebufferOutputRGBA6665FragShader_100, + FramebufferOutputRGBA8888FragShader_100); + } } } @@ -2750,7 +2597,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() // Set rendering support flags based on driver features. this->willFlipAndConvertFramebufferOnGPU = this->isShaderSupported && this->isVBOSupported; - this->willFlipOnlyFramebufferOnGPU = this->willFlipAndConvertFramebufferOnGPU || this->_isFBOBlitSupported; this->_deviceInfo.isEdgeMarkSupported = this->isShaderSupported && this->isVBOSupported && this->isFBOSupported; this->_deviceInfo.isFogSupported = this->isShaderSupported && this->isVBOSupported; this->_deviceInfo.isTextureSmoothingSupported = this->isShaderSupported; @@ -2988,14 +2834,29 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() // Set up FBOs glGenFramebuffersEXT(1, &OGLRef.fboClearImageID); - glGenFramebuffersEXT(1, &OGLRef.fboFramebufferFlipID); glGenFramebuffersEXT(1, &OGLRef.fboRenderID); + glGenFramebuffersEXT(1, &OGLRef.fboColorOutMainID); + glGenFramebuffersEXT(1, &OGLRef.fboColorOutWorkingID); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboClearImageID); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_FOGATTRIBUTES_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIFogAttrID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutWorkingID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, OGLRef.texGColorID, 0); if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { @@ -3006,12 +2867,14 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() } // Assign the default read/draw buffers. - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texGColorID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_WORKING_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboClearImageID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_FOGATTRIBUTES_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIFogAttrID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { @@ -3062,8 +2925,9 @@ void OpenGLRenderer_1_2::DestroyFBOs() glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); glDeleteFramebuffersEXT(1, &OGLRef.fboClearImageID); - glDeleteFramebuffersEXT(1, &OGLRef.fboFramebufferFlipID); glDeleteFramebuffersEXT(1, &OGLRef.fboRenderID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutMainID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutWorkingID); glDeleteTextures(1, &OGLRef.texCIColorID); glDeleteTextures(1, &OGLRef.texCIFogAttrID); glDeleteTextures(1, &OGLRef.texCIDepthStencilID); @@ -3073,8 +2937,9 @@ void OpenGLRenderer_1_2::DestroyFBOs() glDeleteTextures(1, &OGLRef.texGDepthStencilID); OGLRef.fboClearImageID = 0; - OGLRef.fboFramebufferFlipID = 0; OGLRef.fboRenderID = 0; + OGLRef.fboColorOutMainID = 0; + OGLRef.fboColorOutWorkingID = 0; OGLRef.texCIColorID = 0; OGLRef.texCIFogAttrID = 0; OGLRef.texCIDepthStencilID = 0; @@ -3994,10 +3859,6 @@ void OpenGLRenderer_1_2::_SetupGeometryShaders(const OGLGeometryFlags flags) if (!this->isShaderSupported) { - if (this->isFBOSupported) - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } return; } @@ -4006,11 +3867,6 @@ void OpenGLRenderer_1_2::_SetupGeometryShaders(const OGLGeometryFlags flags) glUniform1i(OGLRef.uniformTexDrawOpaque[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformDrawModeDepthEqualsTest[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformPolyDrawShadow[flags.value], GL_FALSE); - - if (this->isFBOSupported) - { - glDrawBuffers(4, GeometryDrawBuffersEnum[flags.DrawBuffersMode]); - } } Render3DError OpenGLRenderer_1_2::EnableVertexAttributes() @@ -4162,6 +4018,7 @@ Render3DError OpenGLRenderer_1_2::ZeroDstAlphaPass(const POLY *rawPolyList, cons // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; this->_SetupGeometryShaders(this->_geometryProgramFlags); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glClear(GL_STENCIL_BUFFER_BIT); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -4200,7 +4057,6 @@ void OpenGLRenderer_1_2::_ResolveWorkingBackFacing() glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboMSIntermediateRenderID); - glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); } void OpenGLRenderer_1_2::_ResolveGeometry() @@ -4245,9 +4101,8 @@ void OpenGLRenderer_1_2::_ResolveGeometry() // Blit the color buffer glBlitFramebufferEXT(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + // Reset framebuffer targets glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } } @@ -4255,26 +4110,24 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels() { OGLRenderRef &OGLRef = *this->ref; + // Both flips and converts the framebuffer on the GPU. No additional postprocessing + // should be necessary at this point. if (this->willFlipAndConvertFramebufferOnGPU) { - // Both flips and converts the framebuffer on the GPU. No additional postprocessing - // should be necessary at this point. if (this->isFBOSupported) { if (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) { const GLuint convertProgramID = (this->_outputFormat == NDSColorFormat_BGR666_Rev) ? OGLRef.programFramebufferRGBA6665OutputID[1] : OGLRef.programFramebufferRGBA8888OutputID[1]; glUseProgram(convertProgramID); - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutWorkingID); this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; } else { const GLuint convertProgramID = (this->_outputFormat == NDSColorFormat_BGR666_Rev) ? OGLRef.programFramebufferRGBA6665OutputID[0] : OGLRef.programFramebufferRGBA8888OutputID[0]; glUseProgram(convertProgramID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); this->_lastTextureDrawTarget = OGLTextureUnitID_GColor; } } @@ -4319,21 +4172,6 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels() glDisableVertexAttribArray(OGLVertexAttributeID_TexCoord0); } } - else if (this->willFlipOnlyFramebufferOnGPU) - { - // Just flips the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware. - // Further colorspace conversion will need to be done in a later step. - - const GLenum flipTarget = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGL_WORKING_ATTACHMENT_ID : OGL_COLOROUT_ATTACHMENT_ID; - - glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glDrawBuffer(flipTarget); - - glBlitFramebufferEXT(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glReadBuffer(flipTarget); - } if (this->isPBOSupported) { @@ -4568,28 +4406,37 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co glMatrixMode(GL_PROJECTION); glLoadIdentity(); - - if (this->isFBOSupported) - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } + glScalef(1.0f, -1.0f, 1.0f); } #endif } else { - if (this->isShaderSupported && this->isFBOSupported) - { - // Even with no polygons to draw, we always need to set these 3 flags so that - // glDrawBuffers() can reference the correct set of FBO attachments using - // OGLGeometryFlags.DrawBuffersMode. - this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; - this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; - this->_geometryProgramFlags.OpaqueDrawMode = 1; - } + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } + + if (this->isFBOSupported) + { + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); + + if (this->isShaderSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } + else + { + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + } + + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -4663,6 +4510,10 @@ Render3DError OpenGLRenderer_1_2::RenderGeometry() glStencilMask(0xFF); this->_SetupGeometryShaders(this->_geometryProgramFlags); + if (this->isFBOSupported && this->isShaderSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } } if (this->_clippedPolyOpaqueCount == 0) @@ -4813,6 +4664,11 @@ Render3DError OpenGLRenderer_1_2::PostprocessFramebuffer() glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA, GL_DST_ALPHA); glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); + + if (this->isFBOSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } } if (this->isVAOSupported) @@ -4898,8 +4754,6 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf { glBlitFramebufferEXT(0, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GPU_FRAMEBUFFER_NATIVE_WIDTH, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } if (this->isMultisampledFBOSupported) @@ -4949,10 +4803,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf { // Blit the color and depth buffers. glBlitFramebufferEXT(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } } } @@ -5004,12 +4855,6 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const Color4u8 &clearColor666 } else { - if (this->isFBOSupported) - { - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } - glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]); glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF); glClearStencil(clearAttributes.opaquePolyID); @@ -5413,9 +5258,7 @@ Render3DError OpenGLRenderer_1_2::RenderPowerOff() if (this->isFBOSupported) { - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); } glClearColor(0.0f, 0.0f, 0.0f, 0.0f); @@ -5499,6 +5342,7 @@ Render3DError OpenGLRenderer_1_2::RenderFlush(bool willFlushBuffer32, bool willF Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) { Render3DError error = OGLERROR_NOERR; + OGLRenderRef &OGLRef = *this->ref; if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT) { @@ -5593,10 +5437,20 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) this->CreateEdgeMarkProgram(EdgeMarkVtxShader_100, EdgeMarkFragShader_100); } - this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); - this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); - this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); - this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputBGRA6665FragShader_100); + this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputBGRA6665FragShader_100); + this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputBGRA8888FragShader_100); + this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputBGRA8888FragShader_100); + } + else + { + this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); + this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); + this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + } } if (oglrender_framebufferDidResizeCallback != NULL) @@ -5814,18 +5668,22 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co } else { - if (this->isFBOSupported) - { - // Even with no polygons to draw, we always need to set these 3 flags so that - // glDrawBuffers() can reference the correct set of FBO attachments using - // OGLGeometryFlags.DrawBuffersMode. - this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; - this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; - this->_geometryProgramFlags.OpaqueDrawMode = 1; - } + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } + + if (this->isFBOSupported) + { + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index ca3283fdd..167ce044c 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -628,7 +628,8 @@ struct OGLRenderRef GLuint fboClearImageID; GLuint fboRenderID; - GLuint fboFramebufferFlipID; + GLuint fboColorOutMainID; + GLuint fboColorOutWorkingID; GLuint fboMSIntermediateRenderID; GLuint selectedRenderingFBO; @@ -799,9 +800,9 @@ private: unsigned int versionRevision; private: - Render3DError _FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, - Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, - bool doFramebufferFlip, bool doFramebufferConvert); + template Render3DError _FlushFramebufferConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, + bool doFramebufferConvert); protected: // OpenGL-specific References @@ -816,7 +817,6 @@ protected: bool isMultisampledFBOSupported; bool isShaderSupported; bool isVAOSupported; - bool willFlipOnlyFramebufferOnGPU; bool willFlipAndConvertFramebufferOnGPU; bool willUsePerSampleZeroDstPass; diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 6b43a376d..1c4c32790 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -695,7 +695,6 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() glActiveTexture(GL_TEXTURE0); // OpenGL v3.2 Core Profile should have all the necessary features to be able to flip and convert the framebuffer. - this->willFlipOnlyFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true; this->_isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading"); @@ -925,6 +924,40 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() // Set up FBOs glGenFramebuffers(1, &OGLRef.fboClearImageID); glGenFramebuffers(1, &OGLRef.fboRenderID); + glGenFramebuffers(1, &OGLRef.fboColorOutMainID); + glGenFramebuffers(1, &OGLRef.fboColorOutWorkingID); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glReadBuffer(GL_COLOR_ATTACHMENT0); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutWorkingID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboClearImageID); glFramebufferTexture2D(GL_FRAMEBUFFER, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); @@ -980,6 +1013,8 @@ void OpenGLRenderer_3_2::DestroyFBOs() glBindFramebuffer(GL_FRAMEBUFFER, 0); glDeleteFramebuffers(1, &OGLRef.fboClearImageID); glDeleteFramebuffers(1, &OGLRef.fboRenderID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutMainID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutWorkingID); glDeleteTextures(1, &OGLRef.texCIColorID); glDeleteTextures(1, &OGLRef.texCIFogAttrID); glDeleteTextures(1, &OGLRef.texCIDepthStencilID); @@ -990,6 +1025,8 @@ void OpenGLRenderer_3_2::DestroyFBOs() OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; + OGLRef.fboColorOutMainID = 0; + OGLRef.fboColorOutWorkingID = 0; OGLRef.texCIColorID = 0; OGLRef.texCIFogAttrID = 0; OGLRef.texCIDepthStencilID = 0; @@ -1993,8 +2030,6 @@ void OpenGLRenderer_3_2::_SetupGeometryShaders(const OGLGeometryFlags flags) glUniform1i(OGLRef.uniformTexDrawOpaque[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformDrawModeDepthEqualsTest[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformPolyDrawShadow[flags.value], GL_FALSE); - - glDrawBuffers(4, GeometryDrawBuffersEnum[flags.DrawBuffersMode]); } Render3DError OpenGLRenderer_3_2::EnableVertexAttributes() @@ -2071,6 +2106,7 @@ Render3DError OpenGLRenderer_3_2::ZeroDstAlphaPass(const POLY *rawPolyList, cons // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; this->_SetupGeometryShaders(this->_geometryProgramFlags); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glClearBufferfi(GL_DEPTH_STENCIL, 0, 0.0f, 0); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -2160,15 +2196,15 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels() { // Use the alternate program where the output color is not at index 0. glUseProgram(OGLRef.programFramebufferRGBA6665OutputID[1]); - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutWorkingID); + this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; } else { // Use the program where the output color is from index 0. glUseProgram(OGLRef.programFramebufferRGBA6665OutputID[0]); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); + this->_lastTextureDrawTarget = OGLTextureUnitID_GColor; } glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); @@ -2180,44 +2216,29 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels() glBindVertexArray(OGLRef.vaoPostprocessStatesID); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glBindVertexArray(0); - - if (this->_mappedFramebuffer != NULL) - { - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - this->_mappedFramebuffer = NULL; - } - - glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); } else { // Just flips the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware. - if (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) - { - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); - } - else - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); - glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } + const GLuint fboOut = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGLRef.fboColorOutWorkingID : OGLRef.fboColorOutMainID; + glBindFramebuffer(GL_FRAMEBUFFER, fboOut); + glReadBuffer(GL_COLOR_ATTACHMENT1); + glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glReadBuffer(GL_COLOR_ATTACHMENT0); - // Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this - // format without a performance penalty. - if (this->_mappedFramebuffer != NULL) - { - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - this->_mappedFramebuffer = NULL; - } - - glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); + this->_lastTextureDrawTarget = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGLTextureUnitID_FinalColor : OGLTextureUnitID_GColor; } + // Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this + // format without a performance penalty. + if (this->_mappedFramebuffer != NULL) + { + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + this->_mappedFramebuffer = NULL; + } + + glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); + this->_pixelReadNeedsFinish = true; return OGLERROR_NOERR; } @@ -2443,7 +2464,10 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co this->_geometryProgramFlags.OpaqueDrawMode = 1; } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.selectedRenderingFBO); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + glReadBuffer(GL_COLOR_ATTACHMENT0); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -2911,9 +2935,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff() return OGLERROR_BEGINGL_FAILED; } - glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); glClearBufferfv(GL_COLOR, 0, oglColor); if (this->_mappedFramebuffer != NULL) diff --git a/desmume/src/OGLRender_ES3.cpp b/desmume/src/OGLRender_ES3.cpp index 83270710f..0721bd79e 100644 --- a/desmume/src/OGLRender_ES3.cpp +++ b/desmume/src/OGLRender_ES3.cpp @@ -321,7 +321,6 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions() glActiveTexture(GL_TEXTURE0); // OpenGL ES v3.0 should have all the necessary features to be able to flip and convert the framebuffer. - this->willFlipOnlyFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true; this->_enableTextureSmoothing = CommonSettings.GFX3D_Renderer_TextureSmoothing;