From 0c7cb99d78f0dd190aa23e8b3191b6af79b69c1e Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 18 Jul 2024 12:17:25 -0700 Subject: [PATCH 1/4] OpenGL Renderer: Greatly simplify the fog rendering pass. - There is no more need to switch to the working texture as the destination for the fog output. This change will be essential for future commits. - The dual-source blending method has been obsoleted and removed. - FBOs are no longer required for the fog feature, easing requirements for ancient GPUs. - Ancient GPUs may see a small performance benefit due to shader simplification. --- desmume/src/OGLRender.cpp | 75 +++++++++++--------- desmume/src/OGLRender.h | 57 ++++++++++++++- desmume/src/OGLRender_3_2.cpp | 128 ++++++++++------------------------ desmume/src/OGLRender_3_2.h | 1 - desmume/src/OGLRender_ES3.cpp | 7 -- 5 files changed, 133 insertions(+), 135 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index 0c68d4d0a..d5024b873 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -646,20 +646,17 @@ void main() \n\ static const char *FogFragShader_100 = {"\ varying vec2 texCoord;\n\ \n\ -uniform sampler2D texInFragColor;\n\ uniform sampler2D texInFragDepth;\n\ uniform sampler2D texInFogAttributes;\n\ uniform sampler1D texFogDensityTable;\n\ uniform bool stateEnableFogAlphaOnly;\n\ -uniform vec4 stateFogColor;\n\ \n\ void main()\n\ {\n\ - vec4 inFragColor = texture2D(texInFragColor, texCoord);\n\ float inFragDepth = texture2D(texInFragDepth, texCoord).r;\n\ vec4 inFogAttributes = texture2D(texInFogAttributes, texCoord);\n\ bool polyEnableFog = (inFogAttributes.r > 0.999);\n\ - vec4 newFoggedColor = inFragColor;\n\ + vec4 outFogWeight = vec4(0.0);\n\ \n\ float fogMixWeight = 0.0;\n\ if (FOG_STEP == 0)\n\ @@ -673,10 +670,10 @@ void main()\n\ \n\ if (polyEnableFog)\n\ {\n\ - newFoggedColor = mix(inFragColor, (stateEnableFogAlphaOnly) ? vec4(inFragColor.rgb, stateFogColor.a) : stateFogColor, fogMixWeight);\n\ + outFogWeight = (stateEnableFogAlphaOnly) ? vec4(vec3(0.0), fogMixWeight) : vec4(fogMixWeight);\n\ }\n\ \n\ - gl_FragColor = newFoggedColor;\n\ + gl_FragColor = outFogWeight;\n\ }\n\ "}; @@ -2753,7 +2750,7 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() this->willFlipAndConvertFramebufferOnGPU = this->isShaderSupported && this->isVBOSupported; this->willFlipOnlyFramebufferOnGPU = this->willFlipAndConvertFramebufferOnGPU || this->_isFBOBlitSupported; this->_deviceInfo.isEdgeMarkSupported = this->isShaderSupported && this->isVBOSupported && this->isFBOSupported; - this->_deviceInfo.isFogSupported = this->isShaderSupported && this->isVBOSupported && this->isFBOSupported; + this->_deviceInfo.isFogSupported = this->isShaderSupported && this->isVBOSupported; this->_deviceInfo.isTextureSmoothingSupported = this->isShaderSupported; this->_isDepthLEqualPolygonFacingSupported = this->isShaderSupported && this->isVBOSupported && this->isFBOSupported; @@ -3584,7 +3581,6 @@ Render3DError OpenGLRenderer_1_2::CreateFogProgram(const OGLFogProgramKey fogPro glUniform1i(uniformTexFogDensityTable, OGLTextureUnitID_LookupTable); OGLRef.uniformStateEnableFogAlphaOnly = glGetUniformLocation(shaderID.program, "stateEnableFogAlphaOnly"); - OGLRef.uniformStateFogColor = glGetUniformLocation(shaderID.program, "stateFogColor"); return OGLERROR_NOERR; } @@ -4698,32 +4694,30 @@ Render3DError OpenGLRenderer_1_2::PostprocessFramebuffer() return OGLERROR_NOERR; } + if ( !(this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) && + !(this->_enableFog && this->_deviceInfo.isFogSupported) ) + { + return OGLERROR_NOERR; + } + OGLRenderRef &OGLRef = *this->ref; - if ( (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) || - (this->_enableFog && this->_deviceInfo.isFogSupported) ) + // Set up the postprocessing states + glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); + glDisable(GL_DEPTH_TEST); + + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + + if (this->isVAOSupported) { - // Set up the postprocessing states - glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); - glDisable(GL_DEPTH_TEST); - - glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); - - if (this->isVAOSupported) - { - glBindVertexArray(OGLRef.vaoPostprocessStatesID); - } - else - { - glEnableVertexAttribArray(OGLVertexAttributeID_Position); - glEnableVertexAttribArray(OGLVertexAttributeID_TexCoord0); - glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_FLOAT, GL_FALSE, 0, 0); - glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, (const GLvoid *)(sizeof(GLfloat) * 8)); - } + glBindVertexArray(OGLRef.vaoPostprocessStatesID); } else { - return OGLERROR_NOERR; + glEnableVertexAttribArray(OGLVertexAttributeID_Position); + glEnableVertexAttribArray(OGLVertexAttributeID_TexCoord0); + glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_FLOAT, GL_FALSE, 0, 0); + glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, (const GLvoid *)(sizeof(GLfloat) * 8)); } if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) @@ -4795,16 +4789,28 @@ Render3DError OpenGLRenderer_1_2::PostprocessFramebuffer() OGLFogShaderID shaderID = this->_fogProgramMap[this->_fogProgramKey.key]; - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); + if (this->isFBOSupported) + { + glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); + } + glUseProgram(shaderID.program); glUniform1i(OGLRef.uniformStateEnableFogAlphaOnly, this->_pendingRenderStates.enableFogAlphaOnly); - glUniform4fv(OGLRef.uniformStateFogColor, 1, (const GLfloat *)&this->_pendingRenderStates.fogColor); + glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, GL_CONSTANT_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); + glBlendColor( this->_pendingRenderStates.fogColor.r, + this->_pendingRenderStates.fogColor.g, + this->_pendingRenderStates.fogColor.b, + this->_pendingRenderStates.fogColor.a ); + + glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); - glDisable(GL_BLEND); + glEnable(GL_BLEND); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA, GL_DST_ALPHA); + glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); } if (this->isVAOSupported) @@ -4826,6 +4832,11 @@ Render3DError OpenGLRenderer_1_2::EndRender() texCache.Evict(); this->ReadBackPixels(); + GLenum oglerror = glGetError(); + if (oglerror != GL_NO_ERROR) + { + INFO("OpenGL: error = %i\n", (int)oglerror); + } ENDGL(); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index 2153e3960..bfa917086 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -28,7 +28,7 @@ #include "types.h" // OPENGL PLATFORM-SPECIFIC INCLUDES -#if defined(__ANGLE__) || defined(__ANDROID__) +#if defined(__ANGLE__) || defined(__ANDROID__) || defined(__linux__) #define OPENGL_VARIANT_ES #define _NO_SDL_TYPES #include @@ -331,7 +331,10 @@ EXTERNOGLEXT(PFNGLDELETERENDERBUFFERSEXTPROC, glDeleteRenderbuffersEXT) // modification. In other words, these are one-to-one drop-in replacements. typedef GLclampf GLclampd; #define glClearDepth(depth) glClearDepthf(depth) + +#ifndef OPENGL_VARIANT_ES #define glDrawBuffer(x) glDrawBuffers(1, ((GLenum[]){x})) +#endif // 1D textures may not exist for a particular OpenGL variant, so they will be promoted to // 2D textures instead. Implementations need to modify their GLSL shaders accordingly to @@ -627,8 +630,16 @@ struct OGLRenderRef GLuint fboClearImageID; GLuint fboRenderID; + GLuint fboRenderColor0ID[8]; + GLuint fboRenderWorking0ID[8]; + GLuint fboPolyID; + GLuint fboFogAttrID; GLuint fboFramebufferFlipID; + GLuint fboColorOutMainID; + GLuint fboColorOutWorkingID; GLuint fboMSIntermediateRenderID; + GLuint fboMSIntermediateRenderColor0ID[8]; + GLuint fboMSFogAttrID; GLuint selectedRenderingFBO; // Shader states @@ -658,7 +669,6 @@ struct OGLRenderRef GLint uniformStateEnableFogAlphaOnly; GLint uniformStateClearPolyID; GLint uniformStateClearDepth; - GLint uniformStateFogColor; GLint uniformStateAlphaTestRef[256]; GLint uniformPolyTexScale[256]; @@ -1030,4 +1040,47 @@ public: virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16); }; +#ifdef OPENGL_VARIANT_ES + +#define glDrawBuffer my_glDrawBuffer +/* +static inline void my_glDrawBuffer(GLenum attach) { + switch(attach) { + case GL_NONE: { + GLenum bufs[1] = {GL_NONE }; + glDrawBuffers(1, bufs); + break; + } + case GL_COLOR_ATTACHMENT0: { + GLenum bufs[1] = { attach }; + glDrawBuffers(1, bufs); + break; + } + case GL_COLOR_ATTACHMENT1: { + GLenum bufs[2] = {GL_NONE, attach }; + glDrawBuffers(2, bufs); + break; + } + case GL_COLOR_ATTACHMENT2: { + GLenum bufs[3] = {GL_NONE, GL_NONE, attach }; + glDrawBuffers(3, bufs); + break; + } + case GL_COLOR_ATTACHMENT3: { + GLenum bufs[4] = {GL_NONE, GL_NONE, GL_NONE, attach }; + glDrawBuffers(4, bufs); + break; + } + } +} +*/ + +static inline void my_glDrawBuffer(GLenum attach) +{ + GLenum bufs[] = {attach}; + glDrawBuffers(1, bufs); +} + +#endif + #endif // OGLRENDER_H diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 6ab93abb8..6b43a376d 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -559,45 +559,28 @@ uniform sampler2D texInFragDepth;\n\ uniform sampler2D texInFogAttributes;\n\ uniform sampler2D texFogDensityTable;\n\ \n\ -#if USE_DUAL_SOURCE_BLENDING\n\ -OUT_FOG_COLOR vec4 outFogColor;\n\ -OUT_FOG_WEIGHT vec4 outFogWeight;\n\ -#else\n\ -uniform sampler2D texInFragColor;\n\ -OUT_COLOR vec4 outFragColor;\n\ -#endif\n\ +OUT_COLOR vec4 outFogWeight;\n\ \n\ void main()\n\ {\n\ -#if USE_DUAL_SOURCE_BLENDING\n\ - outFogColor = state.fogColor;\n\ - outFogWeight = vec4(0.0);\n\ -#else\n\ - outFragColor = texture(texInFragColor, texCoord);\n\ -#endif\n\ - \n\ float inFragDepth = texture(texInFragDepth, texCoord).r;\n\ vec4 inFogAttributes = texture(texInFogAttributes, texCoord);\n\ bool polyEnableFog = (inFogAttributes.r > 0.999);\n\ - \n\ - float fogMixWeight = 0.0;\n\ - if (FOG_STEP == 0)\n\ - {\n\ - fogMixWeight = texture( texFogDensityTable, vec2( (inFragDepth <= FOG_OFFSETF) ? 0.0 : 1.0, 0.0 ) ).r;\n\ - }\n\ - else\n\ - {\n\ - fogMixWeight = texture( texFogDensityTable, vec2( (inFragDepth * (1024.0/float(FOG_STEP))) + (((-float(FOG_OFFSET)/float(FOG_STEP)) - 0.5) / 32.0), 0.0 ) ).r;\n\ - }\n\ + outFogWeight = vec4(0.0);\n\ \n\ if (polyEnableFog)\n\ {\n\ + float fogMixWeight = 0.0;\n\ + if (FOG_STEP == 0)\n\ + {\n\ + fogMixWeight = texture( texFogDensityTable, vec2( (inFragDepth <= FOG_OFFSETF) ? 0.0 : 1.0, 0.0 ) ).r;\n\ + }\n\ + else\n\ + {\n\ + fogMixWeight = texture( texFogDensityTable, vec2( (inFragDepth * (1024.0/float(FOG_STEP))) + (((-float(FOG_OFFSET)/float(FOG_STEP)) - 0.5) / 32.0), 0.0 ) ).r;\n\ + }\n\ \n\ -#if USE_DUAL_SOURCE_BLENDING\n\ outFogWeight = (state.enableFogAlphaOnly) ? vec4(vec3(0.0), fogMixWeight) : vec4(fogMixWeight);\n\ -#else\n\ - outFragColor = mix(outFragColor, (state.enableFogAlphaOnly) ? vec4(outFragColor.rgb, state.fogColor.a) : state.fogColor, fogMixWeight);\n\ -#endif\n\ }\n\ }\n\ "}; @@ -649,7 +632,6 @@ OpenGLRenderer_3_2::OpenGLRenderer_3_2() _is64kUBOSupported = false; _isTBOSupported = false; _isShaderFixedLocationSupported = false; - _isDualSourceBlendingSupported = false; _isSampleShadingSupported = false; _isConservativeDepthSupported = false; _isConservativeDepthAMDSupported = false; @@ -716,9 +698,6 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() this->willFlipOnlyFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true; -#ifdef GL_VERSION_3_3 - this->_isDualSourceBlendingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_blend_func_extended"); -#endif this->_isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading"); this->_isConservativeDepthSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_conservative_depth") && IsOpenGLDriverVersionSupported(4, 0, 0); this->_isConservativeDepthAMDSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_AMD_conservative_depth") && IsOpenGLDriverVersionSupported(4, 0, 0); @@ -1812,8 +1791,6 @@ Render3DError OpenGLRenderer_3_2::CreateFogProgram(const OGLFogProgramKey fogPro } std::stringstream fsHeader; - fsHeader << "#define USE_DUAL_SOURCE_BLENDING " << ((this->_isDualSourceBlendingSupported) ? 1 : 0) << "\n"; - fsHeader << "\n"; fsHeader << "#define FOG_OFFSET " << fogOffset << "\n"; fsHeader << "#define FOG_OFFSETF " << fogOffsetf << (((fogOffsetf == 0.0f) || (fogOffsetf == 1.0f)) ? ".0" : "") << "\n"; fsHeader << "#define FOG_STEP " << fogStep << "\n"; @@ -1821,14 +1798,10 @@ Render3DError OpenGLRenderer_3_2::CreateFogProgram(const OGLFogProgramKey fogPro if (this->_isShaderFixedLocationSupported) { - fsHeader << "#define OUT_FOG_COLOR layout (location = 0, index = 0) out\n"; - fsHeader << "#define OUT_FOG_WEIGHT layout (location = 0, index = 1) out\n"; fsHeader << "#define OUT_COLOR layout (location = 0) out\n"; } else { - fsHeader << "#define OUT_FOG_COLOR out\n"; - fsHeader << "#define OUT_FOG_WEIGHT out\n"; fsHeader << "#define OUT_COLOR out\n"; } @@ -1860,18 +1833,7 @@ Render3DError OpenGLRenderer_3_2::CreateFogProgram(const OGLFogProgramKey fogPro { glBindAttribLocation(shaderID.program, OGLVertexAttributeID_Position, "inPosition"); glBindAttribLocation(shaderID.program, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); - -#ifdef GL_VERSION_3_3 - if (this->_isDualSourceBlendingSupported) - { - glBindFragDataLocationIndexed(shaderID.program, 0, 0, "outFogColor"); - glBindFragDataLocationIndexed(shaderID.program, 0, 1, "outFogWeight"); - } - else -#endif - { - glBindFragDataLocation(shaderID.program, 0, "outFragColor"); - } + glBindFragDataLocation(shaderID.program, 0, "outFogWeight"); } #endif @@ -1897,12 +1859,6 @@ Render3DError OpenGLRenderer_3_2::CreateFogProgram(const OGLFogProgramKey fogPro glUniform1i(uniformTexGFog, OGLTextureUnitID_FogAttr); glUniform1i(uniformTexFogDensityTable, OGLTextureUnitID_LookupTable); - if (!this->_isDualSourceBlendingSupported) - { - const GLint uniformTexGColor = glGetUniformLocation(shaderID.program, "texInFragColor"); - glUniform1i(uniformTexGColor, OGLTextureUnitID_GColor); - } - return OGLERROR_NOERR; } @@ -2498,26 +2454,20 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co Render3DError OpenGLRenderer_3_2::PostprocessFramebuffer() { - if (this->_clippedPolyCount < 1) + if ( (this->_clippedPolyCount < 1) || + (!this->_enableEdgeMark && !this->_enableFog) ) { return OGLERROR_NOERR; } OGLRenderRef &OGLRef = *this->ref; - if (this->_enableEdgeMark || this->_enableFog) - { - // Set up the postprocessing states - glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); - glDisable(GL_DEPTH_TEST); - - glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); - glBindVertexArray(OGLRef.vaoPostprocessStatesID); - } - else - { - return OGLERROR_NOERR; - } + // Set up the postprocessing states + glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); + glDisable(GL_DEPTH_TEST); + + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + glBindVertexArray(OGLRef.vaoPostprocessStatesID); if (this->_enableEdgeMark) { @@ -2571,31 +2521,23 @@ Render3DError OpenGLRenderer_3_2::PostprocessFramebuffer() } OGLFogShaderID shaderID = this->_fogProgramMap[this->_fogProgramKey.key]; + glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); glUseProgram(shaderID.program); - glDisable(GL_STENCIL_TEST); -#ifdef GL_VERSION_3_3 - if (this->_isDualSourceBlendingSupported) - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glEnable(GL_BLEND); - glBlendFunc(GL_SRC1_COLOR, GL_ONE_MINUS_SRC1_COLOR); - glBlendEquation(GL_FUNC_ADD); - - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA, GL_DST_ALPHA); - glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); - } - else -#endif - { - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glDisable(GL_BLEND); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; - } + glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, GL_CONSTANT_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); + glBlendColor( this->_pendingRenderStates.fogColor.r, + this->_pendingRenderStates.fogColor.g, + this->_pendingRenderStates.fogColor.b, + this->_pendingRenderStates.fogColor.a ); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glEnable(GL_BLEND); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA, GL_DST_ALPHA); + glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); } glBindVertexArray(0); diff --git a/desmume/src/OGLRender_3_2.h b/desmume/src/OGLRender_3_2.h index 700b9497c..ee47a905a 100644 --- a/desmume/src/OGLRender_3_2.h +++ b/desmume/src/OGLRender_3_2.h @@ -64,7 +64,6 @@ protected: bool _is64kUBOSupported; bool _isTBOSupported; bool _isShaderFixedLocationSupported; - bool _isDualSourceBlendingSupported; bool _isSampleShadingSupported; bool _isConservativeDepthSupported; bool _isConservativeDepthAMDSupported; diff --git a/desmume/src/OGLRender_ES3.cpp b/desmume/src/OGLRender_ES3.cpp index 1004c5468..83270710f 100644 --- a/desmume/src/OGLRender_ES3.cpp +++ b/desmume/src/OGLRender_ES3.cpp @@ -778,14 +778,10 @@ Render3DError OpenGLESRenderer_3_0::CreateFogProgram(const OGLFogProgramKey fogP vsHeader << "#define IN_VTX_COLOR layout (location = " << OGLVertexAttributeID_Color << ") in\n"; std::stringstream fsHeader; - fsHeader << "#define USE_DUAL_SOURCE_BLENDING " << ((this->_isDualSourceBlendingSupported) ? 1 : 0) << "\n"; - fsHeader << "\n"; fsHeader << "#define FOG_OFFSET " << fogOffset << "\n"; fsHeader << "#define FOG_OFFSETF " << fogOffsetf << (((fogOffsetf == 0.0f) || (fogOffsetf == 1.0f)) ? ".0" : "") << "\n"; fsHeader << "#define FOG_STEP " << fogStep << "\n"; fsHeader << "\n"; - fsHeader << "#define OUT_FOG_COLOR layout (location = 0, index = 0) out\n"; - fsHeader << "#define OUT_FOG_WEIGHT layout (location = 0, index = 1) out\n"; fsHeader << "#define OUT_COLOR layout (location = 0) out\n"; std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vtxShaderCString); @@ -833,9 +829,6 @@ Render3DError OpenGLESRenderer_3_0::CreateFogProgram(const OGLFogProgramKey fogP glUniform1i(uniformTexGFog, OGLTextureUnitID_FogAttr); glUniform1i(uniformTexFogDensityTable, OGLTextureUnitID_LookupTable); - const GLint uniformTexGColor = glGetUniformLocation(shaderID.program, "texInFragColor"); - glUniform1i(uniformTexGColor, OGLTextureUnitID_GColor); - return OGLERROR_NOERR; } From 3e650f2f73be18ca7d7fdd7b0da6abf2234b1ae2 Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 18 Jul 2024 12:26:53 -0700 Subject: [PATCH 2/4] OpenGL Renderer: Add entry point for glBlendColor, fixing compiling on Windows. (Regression from commit 0c7cb99.) --- desmume/src/OGLRender.cpp | 2 ++ desmume/src/OGLRender.h | 1 + 2 files changed, 3 insertions(+) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index d5024b873..3a1a4e459 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -177,6 +177,7 @@ OGLEXT(PFNGLACTIVETEXTUREPROC, glActiveTexture) // Core in v1.3 // Blending #if !defined(GLX_H) +OGLEXT(PFNGLBLENDCOLORPROC, glBlendColor) // Core in v1.2 OGLEXT(PFNGLBLENDEQUATIONPROC, glBlendEquation) // Core in v1.2 #endif OGLEXT(PFNGLBLENDFUNCSEPARATEPROC, glBlendFuncSeparate) // Core in v1.4 @@ -256,6 +257,7 @@ static void OGLLoadEntryPoints_Legacy() // Blending #if !defined(GLX_H) + INITOGLEXT(PFNGLBLENDCOLORPROC, glBlendColor) // Core in v1.2 INITOGLEXT(PFNGLBLENDEQUATIONPROC, glBlendEquation) // Core in v1.2 #endif INITOGLEXT(PFNGLBLENDFUNCSEPARATEPROC, glBlendFuncSeparate) // Core in v1.4 diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index bfa917086..b327a06b3 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -134,6 +134,7 @@ EXTERNOGLEXT(PFNGLACTIVETEXTUREPROC, glActiveTexture) // Core in v1.3 // Blending #if !defined(GLX_H) +EXTERNOGLEXT(PFNGLBLENDCOLORPROC, glBlendColor) // Core in v1.2 EXTERNOGLEXT(PFNGLBLENDEQUATIONPROC, glBlendEquation) // Core in v1.2 #endif EXTERNOGLEXT(PFNGLBLENDFUNCSEPARATEPROC, glBlendFuncSeparate) // Core in v1.4 From 811e1de45e7dd9bfba02707c23c4a828df4a353b Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 18 Jul 2024 12:46:11 -0700 Subject: [PATCH 3/4] OpenGL Renderer: Oops! Roll back some internal test code that accidentally slipped into commit 0c7cb99. --- desmume/src/OGLRender.cpp | 5 ---- desmume/src/OGLRender.h | 58 ++------------------------------------- 2 files changed, 2 insertions(+), 61 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index 3a1a4e459..f1409f00b 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -4834,11 +4834,6 @@ Render3DError OpenGLRenderer_1_2::EndRender() texCache.Evict(); this->ReadBackPixels(); - GLenum oglerror = glGetError(); - if (oglerror != GL_NO_ERROR) - { - INFO("OpenGL: error = %i\n", (int)oglerror); - } ENDGL(); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index b327a06b3..ca3283fdd 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -28,7 +28,7 @@ #include "types.h" // OPENGL PLATFORM-SPECIFIC INCLUDES -#if defined(__ANGLE__) || defined(__ANDROID__) || defined(__linux__) +#if defined(__ANGLE__) || defined(__ANDROID__) #define OPENGL_VARIANT_ES #define _NO_SDL_TYPES #include @@ -332,11 +332,8 @@ EXTERNOGLEXT(PFNGLDELETERENDERBUFFERSEXTPROC, glDeleteRenderbuffersEXT) // modification. In other words, these are one-to-one drop-in replacements. typedef GLclampf GLclampd; #define glClearDepth(depth) glClearDepthf(depth) - -#ifndef OPENGL_VARIANT_ES #define glDrawBuffer(x) glDrawBuffers(1, ((GLenum[]){x})) -#endif - + // 1D textures may not exist for a particular OpenGL variant, so they will be promoted to // 2D textures instead. Implementations need to modify their GLSL shaders accordingly to // treat any 1D textures as 2D textures instead. @@ -631,16 +628,8 @@ struct OGLRenderRef GLuint fboClearImageID; GLuint fboRenderID; - GLuint fboRenderColor0ID[8]; - GLuint fboRenderWorking0ID[8]; - GLuint fboPolyID; - GLuint fboFogAttrID; GLuint fboFramebufferFlipID; - GLuint fboColorOutMainID; - GLuint fboColorOutWorkingID; GLuint fboMSIntermediateRenderID; - GLuint fboMSIntermediateRenderColor0ID[8]; - GLuint fboMSFogAttrID; GLuint selectedRenderingFBO; // Shader states @@ -1041,47 +1030,4 @@ public: virtual Render3DError RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16); }; -#ifdef OPENGL_VARIANT_ES - -#define glDrawBuffer my_glDrawBuffer -/* -static inline void my_glDrawBuffer(GLenum attach) { - switch(attach) { - case GL_NONE: { - GLenum bufs[1] = {GL_NONE }; - glDrawBuffers(1, bufs); - break; - } - case GL_COLOR_ATTACHMENT0: { - GLenum bufs[1] = { attach }; - glDrawBuffers(1, bufs); - break; - } - case GL_COLOR_ATTACHMENT1: { - GLenum bufs[2] = {GL_NONE, attach }; - glDrawBuffers(2, bufs); - break; - } - case GL_COLOR_ATTACHMENT2: { - GLenum bufs[3] = {GL_NONE, GL_NONE, attach }; - glDrawBuffers(3, bufs); - break; - } - case GL_COLOR_ATTACHMENT3: { - GLenum bufs[4] = {GL_NONE, GL_NONE, GL_NONE, attach }; - glDrawBuffers(4, bufs); - break; - } - } -} -*/ - -static inline void my_glDrawBuffer(GLenum attach) -{ - GLenum bufs[] = {attach}; - glDrawBuffers(1, bufs); -} - -#endif - #endif // OGLRENDER_H From 60385bd09951f0770887bff8b18332d83536e0a4 Mon Sep 17 00:00:00 2001 From: rogerman Date: Thu, 18 Jul 2024 21:45:00 -0700 Subject: [PATCH 4/4] OpenGL Renderer: Rework how the output framebuffers work. - The output framebuffers now bind their own FBOs rather than changing draw targets with glDrawBuffer(). - Rework the general FBO management. - Legacy OpenGL now outputs native RGBA color if FBOs are supported. This should give a minor performance increase on older GPUs. - The fixed-function pipeline can now flip the framebuffer on GPU. This greatly reduces the CPU usage when doing the final color conversion and gives a significant performance increase on ancient GPUs. --- desmume/src/OGLRender.cpp | 592 +++++++++++++--------------------- desmume/src/OGLRender.h | 10 +- desmume/src/OGLRender_3_2.cpp | 106 +++--- desmume/src/OGLRender_ES3.cpp | 1 - 4 files changed, 294 insertions(+), 415 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index f1409f00b..830e8d580 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -692,8 +692,8 @@ void main()\n\ }\n\ "}; -// Fragment shader for the final RGBA6665 formatted framebuffer, GLSL 1.00 -static const char *FramebufferOutputRGBA6665FragShader_100 = {"\ +// Fragment shader for the final BGRA6665 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputBGRA6665FragShader_100 = {"\ varying vec2 texCoord;\n\ \n\ uniform sampler2D texInFragColor;\n\ @@ -711,8 +711,8 @@ void main()\n\ }\n\ "}; -// Fragment shader for the final RGBA8888 formatted framebuffer, GLSL 1.00 -static const char *FramebufferOutputRGBA8888FragShader_100 = {"\ +// Fragment shader for the final BGRA8888 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputBGRA8888FragShader_100 = {"\ varying vec2 texCoord;\n\ \n\ uniform sampler2D texInFragColor;\n\ @@ -725,6 +725,35 @@ void main()\n\ }\n\ "}; +// Fragment shader for the final RGBA6665 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputRGBA6665FragShader_100 = {"\ +varying vec2 texCoord;\n\ +\n\ +uniform sampler2D texInFragColor;\n\ +\n\ +void main()\n\ +{\n\ + vec4 colorRGBA6665 = texture2D(texInFragColor, texCoord);\n\ + colorRGBA6665 = floor((colorRGBA6665 * 255.0) + 0.5);\n\ + colorRGBA6665.rgb = floor(colorRGBA6665.rgb / 4.0);\n\ + colorRGBA6665.a = floor(colorRGBA6665.a / 8.0);\n\ + \n\ + gl_FragColor = (colorRGBA6665 / 255.0);\n\ +}\n\ +"}; + +// Fragment shader for the final RGBA8888 formatted framebuffer, GLSL 1.00 +static const char *FramebufferOutputRGBA8888FragShader_100 = {"\ +varying vec2 texCoord;\n\ +\n\ +uniform sampler2D texInFragColor;\n\ +\n\ +void main()\n\ +{\n\ + gl_FragColor = texture2D(texInFragColor, texCoord);\n\ +}\n\ +"}; + bool IsOpenGLDriverVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) { bool result = false; @@ -1273,7 +1302,6 @@ OpenGLRenderer::OpenGLRenderer() isShaderSupported = false; isVAOSupported = false; _isDepthLEqualPolygonFacingSupported = false; - willFlipOnlyFramebufferOnGPU = false; willFlipAndConvertFramebufferOnGPU = false; willUsePerSampleZeroDstPass = false; @@ -1476,297 +1504,92 @@ bool OpenGLRenderer::IsVersionSupported(unsigned int checkVersionMajor, unsigned return result; } -Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, - Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, - bool doFramebufferFlip, bool doFramebufferConvert) +template +Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, + bool doFramebufferConvert) { if ( ((dstFramebufferMain == NULL) && (dstFramebuffer16 == NULL)) || (srcFramebuffer == NULL) ) { return RENDER3DERROR_NOERR; } - // Convert from 32-bit BGRA8888 format to 32-bit RGBA6665 reversed format. OpenGL - // stores pixels using a flipped Y-coordinate, so this needs to be flipped back - // to the DS Y-coordinate. - size_t i = 0; - if (!doFramebufferFlip) + if ( !doFramebufferConvert || (this->_outputFormat == NDSColorFormat_BGR888_Rev) ) { - if (!doFramebufferConvert) + if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { #ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } + const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); + for (; i < ssePixCount; i += 8) + { + const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); + const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); + _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); + _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); + _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); + } + #pragma LOOPVECTORIZE_DISABLE #endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) + for (; i < this->_framebufferPixCount; i++) { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; + dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); + dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } + + this->_renderNeedsFlushMain = false; + this->_renderNeedsFlush16 = false; + } + else if (dstFramebufferMain != NULL) + { + ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); + this->_renderNeedsFlushMain = false; } else { - if (this->_outputFormat == NDSColorFormat_BGR666_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { -#ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceConvert8888To6665(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; - } - } - else if (this->_outputFormat == NDSColorFormat_BGR888_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { -#ifdef ENABLE_SSE2 - const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); - for (; i < ssePixCount; i += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + i + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; i < this->_framebufferPixCount; i++) - { - dstFramebufferMain[i].value = ColorspaceCopy32(srcFramebuffer[i]); - dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); - this->_renderNeedsFlushMain = false; - } - else - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); - this->_renderNeedsFlush16 = false; - } - } + ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); + this->_renderNeedsFlush16 = false; } } - else // In the case where OpenGL couldn't flip the framebuffer on the GPU, we'll instead need to flip the framebuffer during conversion. + else if (this->_outputFormat == NDSColorFormat_BGR666_Rev) { - const size_t pixCount = this->_framebufferWidth; - - if (!doFramebufferConvert) + if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; #ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - + const size_t ssePixCount = this->_framebufferPixCount - (this->_framebufferPixCount % 8); + for (; i < ssePixCount; i += 8) + { + const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + i + 0)); + const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + i + 4)); + + _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); + _mm_store_si128( (__m128i *)(dstFramebufferMain + i + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); + _mm_store_si128( (__m128i *)(dstFramebuffer16 + i), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); + } + #pragma LOOPVECTORIZE_DISABLE #endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) + for (; i < this->_framebufferPixCount; i++) { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; + dstFramebufferMain[i].value = ColorspaceConvert8888To6665(srcFramebuffer[i]); + dstFramebuffer16[i] = ColorspaceConvert8888To5551(srcFramebuffer[i]); } + + this->_renderNeedsFlushMain = false; + this->_renderNeedsFlush16 = false; + } + else if (dstFramebufferMain != NULL) + { + ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer, (u32 *)dstFramebufferMain, this->_framebufferPixCount); + this->_renderNeedsFlushMain = false; } else { - if (this->_outputFormat == NDSColorFormat_BGR666_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 0), ColorspaceConvert8888To6665_SSE2(srcColorLo) ); - _mm_store_si128( (__m128i *)(dstFramebufferMain + iw + 4), ColorspaceConvert8888To6665_SSE2(srcColorHi) ); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceConvert8888To6665(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To6665((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; - } - } - else if (this->_outputFormat == NDSColorFormat_BGR888_Rev) - { - if ( (dstFramebufferMain != NULL) && (dstFramebuffer16 != NULL) ) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - size_t x = 0; -#ifdef ENABLE_SSE2 - const size_t ssePixCount = pixCount - (pixCount % 8); - for (; x < ssePixCount; x += 8, ir += 8, iw += 8) - { - const __m128i srcColorLo = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 0)); - const __m128i srcColorHi = _mm_load_si128((__m128i *)(srcFramebuffer + ir + 4)); - - _mm_store_si128((__m128i *)(dstFramebufferMain + iw + 0), ColorspaceCopy32_SSE2(srcColorLo)); - _mm_store_si128((__m128i *)(dstFramebufferMain + iw + 4), ColorspaceCopy32_SSE2(srcColorHi)); - _mm_store_si128( (__m128i *)(dstFramebuffer16 + iw), ColorspaceConvert8888To5551_SSE2(srcColorLo, srcColorHi) ); - } - -#pragma LOOPVECTORIZE_DISABLE -#endif - for (; x < pixCount; x++, ir++, iw++) - { - dstFramebufferMain[iw].value = ColorspaceCopy32(srcFramebuffer[ir]); - dstFramebuffer16[iw] = ColorspaceConvert8888To5551(srcFramebuffer[ir]); - } - } - - this->_renderNeedsFlushMain = false; - this->_renderNeedsFlush16 = false; - } - else if (dstFramebufferMain != NULL) - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceCopyBuffer32((u32 *)srcFramebuffer + ir, (u32 *)dstFramebufferMain + iw, pixCount); - } - - this->_renderNeedsFlushMain = false; - } - else - { - for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, ir += this->_framebufferWidth, iw -= this->_framebufferWidth) - { - ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer + ir, dstFramebuffer16 + iw, pixCount); - } - - this->_renderNeedsFlush16 = false; - } - } + ColorspaceConvertBuffer8888To5551((u32 *)srcFramebuffer, dstFramebuffer16, this->_framebufferPixCount); + this->_renderNeedsFlush16 = false; } } @@ -1775,6 +1598,8 @@ Render3DError OpenGLRenderer::_FlushFramebufferFlipAndConvertOnCPU(const Color4u Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFramebuffer, Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16) { + OGLRenderRef &OGLRef = *this->ref; + if (this->willFlipAndConvertFramebufferOnGPU && this->isPBOSupported) { this->_renderNeedsFlushMain = false; @@ -1782,9 +1607,18 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const Color4u8 *__restrict srcFra } else { - return this->_FlushFramebufferFlipAndConvertOnCPU(srcFramebuffer, - dstFramebufferMain, dstFramebuffer16, - !this->willFlipOnlyFramebufferOnGPU, !this->willFlipAndConvertFramebufferOnGPU); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + return this->_FlushFramebufferConvertOnCPU(srcFramebuffer, + dstFramebufferMain, dstFramebuffer16, + !this->willFlipAndConvertFramebufferOnGPU); + } + else + { + return this->_FlushFramebufferConvertOnCPU(srcFramebuffer, + dstFramebufferMain, dstFramebuffer16, + !this->willFlipAndConvertFramebufferOnGPU); + } } return RENDER3DERROR_NOERR; @@ -2544,10 +2378,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL); this->_deviceInfo.maxAnisotropy = maxAnisotropyOGL; - // This is traditionally the fastest format and data type for glReadPixels in legacy mode. - OGLRef.readPixelsBestFormat = GL_BGRA; - OGLRef.readPixelsBestDataType = GL_UNSIGNED_BYTE; - // Need to generate this texture first because FBO creation needs it. // This texture is only required by shaders, and so if shader creation // fails, then we can immediately delete this texture if an error occurs. @@ -2613,6 +2443,12 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() INFO("OpenGL: FBOs are unsupported. Some emulation features will be disabled.\n"); } + // The internal format of FBOs is GL_RGBA, so we will match that format for glReadPixels. + // But the traditional format before FBOs was GL_BGRA, which is also the fastest format + // for glReadPixels when using legacy back buffers. + OGLRef.readPixelsBestFormat = (this->isFBOSupported) ? GL_RGBA : GL_BGRA; + OGLRef.readPixelsBestDataType = GL_UNSIGNED_BYTE; + this->_isFBOBlitSupported = this->isFBOSupported && this->IsExtensionPresent(&oglExtensionSet, "GL_EXT_framebuffer_blit"); if (!this->_isFBOBlitSupported) @@ -2698,11 +2534,22 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() { INFO("OpenGL: Successfully created geometry shaders.\n"); - error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, - EdgeMarkFragShader_100, - FramebufferOutputVtxShader_100, - FramebufferOutputRGBA6665FragShader_100, - FramebufferOutputRGBA8888FragShader_100); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, + EdgeMarkFragShader_100, + FramebufferOutputVtxShader_100, + FramebufferOutputBGRA6665FragShader_100, + FramebufferOutputBGRA8888FragShader_100); + } + else + { + error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_100, + EdgeMarkFragShader_100, + FramebufferOutputVtxShader_100, + FramebufferOutputRGBA6665FragShader_100, + FramebufferOutputRGBA8888FragShader_100); + } } } @@ -2750,7 +2597,6 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() // Set rendering support flags based on driver features. this->willFlipAndConvertFramebufferOnGPU = this->isShaderSupported && this->isVBOSupported; - this->willFlipOnlyFramebufferOnGPU = this->willFlipAndConvertFramebufferOnGPU || this->_isFBOBlitSupported; this->_deviceInfo.isEdgeMarkSupported = this->isShaderSupported && this->isVBOSupported && this->isFBOSupported; this->_deviceInfo.isFogSupported = this->isShaderSupported && this->isVBOSupported; this->_deviceInfo.isTextureSmoothingSupported = this->isShaderSupported; @@ -2988,14 +2834,29 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() // Set up FBOs glGenFramebuffersEXT(1, &OGLRef.fboClearImageID); - glGenFramebuffersEXT(1, &OGLRef.fboFramebufferFlipID); glGenFramebuffersEXT(1, &OGLRef.fboRenderID); + glGenFramebuffersEXT(1, &OGLRef.fboColorOutMainID); + glGenFramebuffersEXT(1, &OGLRef.fboColorOutWorkingID); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboClearImageID); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_FOGATTRIBUTES_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIFogAttrID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutWorkingID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, OGLRef.texGColorID, 0); if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { @@ -3006,12 +2867,14 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() } // Assign the default read/draw buffers. - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texGColorID, 0); - glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_WORKING_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboClearImageID); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, OGL_FOGATTRIBUTES_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIFogAttrID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texCIDepthStencilID, 0); if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { @@ -3062,8 +2925,9 @@ void OpenGLRenderer_1_2::DestroyFBOs() glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); glDeleteFramebuffersEXT(1, &OGLRef.fboClearImageID); - glDeleteFramebuffersEXT(1, &OGLRef.fboFramebufferFlipID); glDeleteFramebuffersEXT(1, &OGLRef.fboRenderID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutMainID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutWorkingID); glDeleteTextures(1, &OGLRef.texCIColorID); glDeleteTextures(1, &OGLRef.texCIFogAttrID); glDeleteTextures(1, &OGLRef.texCIDepthStencilID); @@ -3073,8 +2937,9 @@ void OpenGLRenderer_1_2::DestroyFBOs() glDeleteTextures(1, &OGLRef.texGDepthStencilID); OGLRef.fboClearImageID = 0; - OGLRef.fboFramebufferFlipID = 0; OGLRef.fboRenderID = 0; + OGLRef.fboColorOutMainID = 0; + OGLRef.fboColorOutWorkingID = 0; OGLRef.texCIColorID = 0; OGLRef.texCIFogAttrID = 0; OGLRef.texCIDepthStencilID = 0; @@ -3994,10 +3859,6 @@ void OpenGLRenderer_1_2::_SetupGeometryShaders(const OGLGeometryFlags flags) if (!this->isShaderSupported) { - if (this->isFBOSupported) - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } return; } @@ -4006,11 +3867,6 @@ void OpenGLRenderer_1_2::_SetupGeometryShaders(const OGLGeometryFlags flags) glUniform1i(OGLRef.uniformTexDrawOpaque[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformDrawModeDepthEqualsTest[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformPolyDrawShadow[flags.value], GL_FALSE); - - if (this->isFBOSupported) - { - glDrawBuffers(4, GeometryDrawBuffersEnum[flags.DrawBuffersMode]); - } } Render3DError OpenGLRenderer_1_2::EnableVertexAttributes() @@ -4162,6 +4018,7 @@ Render3DError OpenGLRenderer_1_2::ZeroDstAlphaPass(const POLY *rawPolyList, cons // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; this->_SetupGeometryShaders(this->_geometryProgramFlags); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glClear(GL_STENCIL_BUFFER_BIT); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -4200,7 +4057,6 @@ void OpenGLRenderer_1_2::_ResolveWorkingBackFacing() glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboMSIntermediateRenderID); - glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); } void OpenGLRenderer_1_2::_ResolveGeometry() @@ -4245,9 +4101,8 @@ void OpenGLRenderer_1_2::_ResolveGeometry() // Blit the color buffer glBlitFramebufferEXT(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + // Reset framebuffer targets glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } } @@ -4255,26 +4110,24 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels() { OGLRenderRef &OGLRef = *this->ref; + // Both flips and converts the framebuffer on the GPU. No additional postprocessing + // should be necessary at this point. if (this->willFlipAndConvertFramebufferOnGPU) { - // Both flips and converts the framebuffer on the GPU. No additional postprocessing - // should be necessary at this point. if (this->isFBOSupported) { if (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) { const GLuint convertProgramID = (this->_outputFormat == NDSColorFormat_BGR666_Rev) ? OGLRef.programFramebufferRGBA6665OutputID[1] : OGLRef.programFramebufferRGBA8888OutputID[1]; glUseProgram(convertProgramID); - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutWorkingID); this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; } else { const GLuint convertProgramID = (this->_outputFormat == NDSColorFormat_BGR666_Rev) ? OGLRef.programFramebufferRGBA6665OutputID[0] : OGLRef.programFramebufferRGBA8888OutputID[0]; glUseProgram(convertProgramID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); this->_lastTextureDrawTarget = OGLTextureUnitID_GColor; } } @@ -4319,21 +4172,6 @@ Render3DError OpenGLRenderer_1_2::ReadBackPixels() glDisableVertexAttribArray(OGLVertexAttributeID_TexCoord0); } } - else if (this->willFlipOnlyFramebufferOnGPU) - { - // Just flips the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware. - // Further colorspace conversion will need to be done in a later step. - - const GLenum flipTarget = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGL_WORKING_ATTACHMENT_ID : OGL_COLOROUT_ATTACHMENT_ID; - - glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glDrawBuffer(flipTarget); - - glBlitFramebufferEXT(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboFramebufferFlipID); - glReadBuffer(flipTarget); - } if (this->isPBOSupported) { @@ -4568,28 +4406,37 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co glMatrixMode(GL_PROJECTION); glLoadIdentity(); - - if (this->isFBOSupported) - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } + glScalef(1.0f, -1.0f, 1.0f); } #endif } else { - if (this->isShaderSupported && this->isFBOSupported) - { - // Even with no polygons to draw, we always need to set these 3 flags so that - // glDrawBuffers() can reference the correct set of FBO attachments using - // OGLGeometryFlags.DrawBuffersMode. - this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; - this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; - this->_geometryProgramFlags.OpaqueDrawMode = 1; - } + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } + + if (this->isFBOSupported) + { + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); + + if (this->isShaderSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } + else + { + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + } + + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -4663,6 +4510,10 @@ Render3DError OpenGLRenderer_1_2::RenderGeometry() glStencilMask(0xFF); this->_SetupGeometryShaders(this->_geometryProgramFlags); + if (this->isFBOSupported && this->isShaderSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } } if (this->_clippedPolyOpaqueCount == 0) @@ -4813,6 +4664,11 @@ Render3DError OpenGLRenderer_1_2::PostprocessFramebuffer() glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA, GL_DST_ALPHA); glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); + + if (this->isFBOSupported) + { + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + } } if (this->isVAOSupported) @@ -4898,8 +4754,6 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf { glBlitFramebufferEXT(0, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GPU_FRAMEBUFFER_NATIVE_WIDTH, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } if (this->isMultisampledFBOSupported) @@ -4949,10 +4803,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf { // Blit the color and depth buffers. glBlitFramebufferEXT(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST); - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); } } } @@ -5004,12 +4855,6 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const Color4u8 &clearColor666 } else { - if (this->isFBOSupported) - { - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } - glClearColor(divide6bitBy63_LUT[clearColor6665.r], divide6bitBy63_LUT[clearColor6665.g], divide6bitBy63_LUT[clearColor6665.b], divide5bitBy31_LUT[clearColor6665.a]); glClearDepth((GLclampd)clearAttributes.depth / (GLclampd)0x00FFFFFF); glClearStencil(clearAttributes.opaquePolyID); @@ -5413,9 +5258,7 @@ Render3DError OpenGLRenderer_1_2::RenderPowerOff() if (this->isFBOSupported) { - glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboColorOutMainID); } glClearColor(0.0f, 0.0f, 0.0f, 0.0f); @@ -5499,6 +5342,7 @@ Render3DError OpenGLRenderer_1_2::RenderFlush(bool willFlushBuffer32, bool willF Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) { Render3DError error = OGLERROR_NOERR; + OGLRenderRef &OGLRef = *this->ref; if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT) { @@ -5593,10 +5437,20 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) this->CreateEdgeMarkProgram(EdgeMarkVtxShader_100, EdgeMarkFragShader_100); } - this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); - this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); - this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); - this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + if (OGLRef.readPixelsBestFormat == GL_BGRA) + { + this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputBGRA6665FragShader_100); + this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputBGRA6665FragShader_100); + this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputBGRA8888FragShader_100); + this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputBGRA8888FragShader_100); + } + else + { + this->CreateFramebufferOutput6665Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); + this->CreateFramebufferOutput6665Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA6665FragShader_100); + this->CreateFramebufferOutput8888Program(0, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + this->CreateFramebufferOutput8888Program(1, FramebufferOutputVtxShader_100, FramebufferOutputRGBA8888FragShader_100); + } } if (oglrender_framebufferDidResizeCallback != NULL) @@ -5814,18 +5668,22 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co } else { - if (this->isFBOSupported) - { - // Even with no polygons to draw, we always need to set these 3 flags so that - // glDrawBuffers() can reference the correct set of FBO attachments using - // OGLGeometryFlags.DrawBuffersMode. - this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; - this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; - this->_geometryProgramFlags.OpaqueDrawMode = 1; - } + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } + + if (this->isFBOSupported) + { + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.selectedRenderingFBO); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index ca3283fdd..167ce044c 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -628,7 +628,8 @@ struct OGLRenderRef GLuint fboClearImageID; GLuint fboRenderID; - GLuint fboFramebufferFlipID; + GLuint fboColorOutMainID; + GLuint fboColorOutWorkingID; GLuint fboMSIntermediateRenderID; GLuint selectedRenderingFBO; @@ -799,9 +800,9 @@ private: unsigned int versionRevision; private: - Render3DError _FlushFramebufferFlipAndConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, - Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, - bool doFramebufferFlip, bool doFramebufferConvert); + template Render3DError _FlushFramebufferConvertOnCPU(const Color4u8 *__restrict srcFramebuffer, + Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16, + bool doFramebufferConvert); protected: // OpenGL-specific References @@ -816,7 +817,6 @@ protected: bool isMultisampledFBOSupported; bool isShaderSupported; bool isVAOSupported; - bool willFlipOnlyFramebufferOnGPU; bool willFlipAndConvertFramebufferOnGPU; bool willUsePerSampleZeroDstPass; diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 6b43a376d..1c4c32790 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -695,7 +695,6 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() glActiveTexture(GL_TEXTURE0); // OpenGL v3.2 Core Profile should have all the necessary features to be able to flip and convert the framebuffer. - this->willFlipOnlyFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true; this->_isSampleShadingSupported = this->IsExtensionPresent(&oglExtensionSet, "GL_ARB_sample_shading"); @@ -925,6 +924,40 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() // Set up FBOs glGenFramebuffers(1, &OGLRef.fboClearImageID); glGenFramebuffers(1, &OGLRef.fboRenderID); + glGenFramebuffers(1, &OGLRef.fboColorOutMainID); + glGenFramebuffers(1, &OGLRef.fboColorOutWorkingID); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glReadBuffer(GL_COLOR_ATTACHMENT0); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutWorkingID); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, OGLRef.texGColorID, 0); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create FBOs!\n"); + this->DestroyFBOs(); + + return OGLERROR_FBO_CREATE_ERROR; + } + + // Assign the default read/draw buffers. + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboClearImageID); glFramebufferTexture2D(GL_FRAMEBUFFER, OGL_COLOROUT_ATTACHMENT_ID, GL_TEXTURE_2D, OGLRef.texCIColorID, 0); @@ -980,6 +1013,8 @@ void OpenGLRenderer_3_2::DestroyFBOs() glBindFramebuffer(GL_FRAMEBUFFER, 0); glDeleteFramebuffers(1, &OGLRef.fboClearImageID); glDeleteFramebuffers(1, &OGLRef.fboRenderID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutMainID); + glDeleteFramebuffers(1, &OGLRef.fboColorOutWorkingID); glDeleteTextures(1, &OGLRef.texCIColorID); glDeleteTextures(1, &OGLRef.texCIFogAttrID); glDeleteTextures(1, &OGLRef.texCIDepthStencilID); @@ -990,6 +1025,8 @@ void OpenGLRenderer_3_2::DestroyFBOs() OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; + OGLRef.fboColorOutMainID = 0; + OGLRef.fboColorOutWorkingID = 0; OGLRef.texCIColorID = 0; OGLRef.texCIFogAttrID = 0; OGLRef.texCIDepthStencilID = 0; @@ -1993,8 +2030,6 @@ void OpenGLRenderer_3_2::_SetupGeometryShaders(const OGLGeometryFlags flags) glUniform1i(OGLRef.uniformTexDrawOpaque[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformDrawModeDepthEqualsTest[flags.value], GL_FALSE); glUniform1i(OGLRef.uniformPolyDrawShadow[flags.value], GL_FALSE); - - glDrawBuffers(4, GeometryDrawBuffersEnum[flags.DrawBuffersMode]); } Render3DError OpenGLRenderer_3_2::EnableVertexAttributes() @@ -2071,6 +2106,7 @@ Render3DError OpenGLRenderer_3_2::ZeroDstAlphaPass(const POLY *rawPolyList, cons // Restore OpenGL states back to normal. this->_geometryProgramFlags = oldGProgramFlags; this->_SetupGeometryShaders(this->_geometryProgramFlags); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); glClearBufferfi(GL_DEPTH_STENCIL, 0, 0.0f, 0); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -2160,15 +2196,15 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels() { // Use the alternate program where the output color is not at index 0. glUseProgram(OGLRef.programFramebufferRGBA6665OutputID[1]); - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutWorkingID); + this->_lastTextureDrawTarget = OGLTextureUnitID_FinalColor; } else { // Use the program where the output color is from index 0. glUseProgram(OGLRef.programFramebufferRGBA6665OutputID[0]); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); + this->_lastTextureDrawTarget = OGLTextureUnitID_GColor; } glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); @@ -2180,44 +2216,29 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels() glBindVertexArray(OGLRef.vaoPostprocessStatesID); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glBindVertexArray(0); - - if (this->_mappedFramebuffer != NULL) - { - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - this->_mappedFramebuffer = NULL; - } - - glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); } else { // Just flips the framebuffer in Y to match the coordinates of OpenGL and the NDS hardware. - if (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) - { - glDrawBuffer(OGL_WORKING_ATTACHMENT_ID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); - } - else - { - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glReadBuffer(OGL_WORKING_ATTACHMENT_ID); - glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - } + const GLuint fboOut = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGLRef.fboColorOutWorkingID : OGLRef.fboColorOutMainID; + glBindFramebuffer(GL_FRAMEBUFFER, fboOut); + glReadBuffer(GL_COLOR_ATTACHMENT1); + glBlitFramebuffer(0, (GLint)this->_framebufferHeight, (GLint)this->_framebufferWidth, 0, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + glReadBuffer(GL_COLOR_ATTACHMENT0); - // Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this - // format without a performance penalty. - if (this->_mappedFramebuffer != NULL) - { - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - this->_mappedFramebuffer = NULL; - } - - glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); + this->_lastTextureDrawTarget = (this->_lastTextureDrawTarget == OGLTextureUnitID_GColor) ? OGLTextureUnitID_FinalColor : OGLTextureUnitID_GColor; } + // Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this + // format without a performance penalty. + if (this->_mappedFramebuffer != NULL) + { + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + this->_mappedFramebuffer = NULL; + } + + glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); + this->_pixelReadNeedsFinish = true; return OGLERROR_NOERR; } @@ -2443,7 +2464,10 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co this->_geometryProgramFlags.OpaqueDrawMode = 1; } - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + OGLRef.selectedRenderingFBO = (this->_enableMultisampledRendering) ? OGLRef.fboMSIntermediateRenderID : OGLRef.fboRenderID; + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.selectedRenderingFBO); + glDrawBuffers(4, GeometryDrawBuffersEnum[this->_geometryProgramFlags.DrawBuffersMode]); + glReadBuffer(GL_COLOR_ATTACHMENT0); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); @@ -2911,9 +2935,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff() return OGLERROR_BEGINGL_FAILED; } - glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboRenderID); - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboColorOutMainID); glClearBufferfv(GL_COLOR, 0, oglColor); if (this->_mappedFramebuffer != NULL) diff --git a/desmume/src/OGLRender_ES3.cpp b/desmume/src/OGLRender_ES3.cpp index 83270710f..0721bd79e 100644 --- a/desmume/src/OGLRender_ES3.cpp +++ b/desmume/src/OGLRender_ES3.cpp @@ -321,7 +321,6 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions() glActiveTexture(GL_TEXTURE0); // OpenGL ES v3.0 should have all the necessary features to be able to flip and convert the framebuffer. - this->willFlipOnlyFramebufferOnGPU = true; this->willFlipAndConvertFramebufferOnGPU = true; this->_enableTextureSmoothing = CommonSettings.GFX3D_Renderer_TextureSmoothing;