From e2a25e2412f306a314678c9625711c825fd036c4 Mon Sep 17 00:00:00 2001 From: rogerman Date: Tue, 23 Jul 2024 21:28:15 -0700 Subject: [PATCH] OpenGL Renderer: Fix various rendering bugs. - Fix a potential unaligned access crashing bug in ES when clear images are to be rendered. - Fix an ES bug where clear images would fail to render when MSAA is enabled. - Fix a legacy OpenGL bug where toon table colors were not ignoring their alpha bit, according to GBATEK. --- desmume/src/OGLRender.cpp | 57 ++++---- desmume/src/OGLRender.h | 48 ++++++- desmume/src/OGLRender_3_2.cpp | 248 +++++++++++++++++++++++++++++++--- desmume/src/OGLRender_3_2.h | 4 + desmume/src/OGLRender_ES3.cpp | 88 +++++++++++- desmume/src/OGLRender_ES3.h | 1 + 6 files changed, 392 insertions(+), 54 deletions(-) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index 8bb53eda1..aa3b92202 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -38,27 +38,6 @@ #include "./utils/colorspacehandler/colorspacehandler_SSE2.h" #endif -#if defined(OPENGL_VARIANT_STANDARD) - #if MSB_FIRST - #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_SHORT_1_5_5_5_REV - #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_INT_8_8_8_8_REV - #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_INT_8_8_8_8 - #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_1_5_5_5_REV - #else - #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_SHORT_1_5_5_5_REV - #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_INT_8_8_8_8_REV - #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_INT_8_8_8_8_REV - #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_1_5_5_5_REV - #endif -#elif defined(OPENGL_VARIANT_ES) - #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_BYTE - #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_BYTE - #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_BYTE - #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_BYTE -#else - #error Unknown OpenGL variant. -#endif - typedef struct { unsigned int major; @@ -1229,7 +1208,7 @@ GPU3DInterface gpu3Dgl_3_2 = { // OpenGL ES 3.0 (this is the only version of ES that is supported right now) GPU3DInterface gpu3Dgl_ES_3_0 = { "OpenGL ES 3.0", - OpenGLRendererCreate, + OpenGLRendererCreate, OpenGLRendererDestroy }; @@ -1270,7 +1249,7 @@ OpenGLRenderer::OpenGLRenderer() _emulateDepthLEqualPolygonFacing = false; // Init OpenGL rendering states - ref = (OGLRenderRef *)malloc(sizeof(OGLRenderRef)); + ref = (OGLRenderRef *)malloc_alignedPage(sizeof(OGLRenderRef)); memset(ref, 0, sizeof(OGLRenderRef)); _mappedFramebuffer = NULL; @@ -1294,7 +1273,7 @@ OpenGLRenderer::~OpenGLRenderer() free_aligned(this->_workingTextureUnpackBuffer); // Destroy OpenGL rendering states - free(this->ref); + free_aligned(this->ref); this->ref = NULL; } @@ -2771,11 +2750,10 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glActiveTexture(GL_TEXTURE0); - CACHE_ALIGN GLint tempClearImageBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; memset(tempClearImageBuffer, 0, sizeof(tempClearImageBuffer)); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -2783,6 +2761,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, tempClearImageBuffer); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIDepth); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -2791,6 +2770,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8_EXT, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, tempClearImageBuffer); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIFogAttr); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -2813,7 +2793,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texGDepthStencilID, 0); glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_TEXTURE_2D, OGLRef.texGDepthStencilID, 0); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE) + if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { INFO("OpenGL: Failed to create FBOs!\n"); this->DestroyFBOs(); @@ -2931,6 +2911,7 @@ void OpenGLRenderer_1_2::DestroyFBOs() OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; + OGLRef.fboRenderMutableID = 0; OGLRef.fboColorOutMainID = 0; OGLRef.fboColorOutWorkingID = 0; OGLRef.texCIColorID = 0; @@ -3309,6 +3290,18 @@ void OpenGLRenderer_1_2::DestroyGeometryZeroDstAlphaProgram() OGLRef.fragShaderGeometryZeroDstAlphaID = 0; } +Render3DError OpenGLRenderer_1_2::CreateClearImageProgram(const char *vsCString, const char *fsCString) +{ + Render3DError error = OGLERROR_NOERR; + // TODO: Add support for ancient GPUs that support shaders but not GL_EXT_framebuffer_blit. + return error; +} + +void OpenGLRenderer_1_2::DestroyClearImageProgram() +{ + // Do nothing for now. +} + Render3DError OpenGLRenderer_1_2::CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString) { Render3DError error = OGLERROR_NOERR; @@ -3825,11 +3818,10 @@ Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBu const bool didDepthStencilChange = (memcmp(OGLRef.workingCIDepthStencilBuffer[this->_clearImageIndex], OGLRef.workingCIDepthStencilBuffer[this->_clearImageIndex ^ 0x01], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(GLuint)) != 0); const bool didFogAttributesChange = this->_enableFog && this->_deviceInfo.isFogSupported && (memcmp(OGLRef.workingCIFogAttributesBuffer[this->_clearImageIndex], OGLRef.workingCIFogAttributesBuffer[this->_clearImageIndex ^ 0x01], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(GLuint)) != 0); - glActiveTexture(GL_TEXTURE0); - if (didColorChange) { memcpy(OGLRef.workingCIColorBuffer16, colorBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID); if (OGL_TEXTURE_SRC_CI_COLOR == GL_UNSIGNED_BYTE) @@ -3845,16 +3837,19 @@ Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBu if (didDepthStencilChange) { + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIDepth); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, OGLRef.workingCIDepthStencilBuffer[this->_clearImageIndex]); } if (didFogAttributesChange) { + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIFogAttr); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, OGL_TEXTURE_SRC_CI_FOG, OGLRef.workingCIFogAttributesBuffer[this->_clearImageIndex]); } + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); return OGLERROR_NOERR; @@ -4419,7 +4414,7 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co if (OGL_TEXTURE_SRC_TOON_TABLE == GL_UNSIGNED_BYTE) { - ColorspaceConvertBuffer5551To8888(renderState.toonTable16, OGLRef.toonTable32, 32); + ColorspaceConvertBuffer555xTo8888Opaque(renderState.toonTable16, OGLRef.toonTable32, 32); glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, OGLRef.toonTable32); } else @@ -5693,7 +5688,7 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co if (OGL_TEXTURE_SRC_TOON_TABLE == GL_UNSIGNED_BYTE) { - ColorspaceConvertBuffer5551To8888(renderState.toonTable16, OGLRef.toonTable32, 32); + ColorspaceConvertBuffer555xTo8888Opaque(renderState.toonTable16, OGLRef.toonTable32, 32); glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, OGLRef.toonTable32); } else diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index e5214c918..cef30c48c 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -385,6 +385,27 @@ EXTERNOGLEXT(PFNGLDELETERENDERBUFFERSEXTPROC, glDeleteRenderbuffersEXT) #define OGL_CI_FOGATTRIBUTES_ATTACHMENT_ID GL_COLOR_ATTACHMENT1_EXT #endif +#if defined(OPENGL_VARIANT_STANDARD) + #if MSB_FIRST + #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_SHORT_1_5_5_5_REV + #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_INT_8_8_8_8_REV + #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_INT_8_8_8_8 + #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_1_5_5_5_REV + #else + #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_SHORT_1_5_5_5_REV + #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_INT_8_8_8_8_REV + #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_INT_8_8_8_8_REV + #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_1_5_5_5_REV + #endif +#elif defined(OPENGL_VARIANT_ES) + #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_BYTE + #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_BYTE + #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_BYTE + #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_BYTE +#else + #error Unknown OpenGL variant. +#endif + enum OpenGLVariantID { OpenGLVariantID_Unknown = 0, @@ -394,7 +415,18 @@ enum OpenGLVariantID OpenGLVariantID_Legacy_2_1 = 0x1021, OpenGLVariantID_CoreProfile_3_2 = 0x2032, OpenGLVariantID_StandardAuto = 0x3000, - OpenGLVariantID_ES_3_0 = 0x4030 + OpenGLVariantID_ES3_Auto = 0x4000, + OpenGLVariantID_ES3_3_0 = 0x4030, + OpenGLVariantID_ES_Auto = 0x6000 +}; + +enum OpenGLVariantFamily +{ + OpenGLVariantFamily_Standard = (3 << 12), + OpenGLVariantFamily_Legacy = (1 << 12), + OpenGLVariantFamily_CoreProfile = (1 << 13), + OpenGLVariantFamily_ES = (3 << 14), + OpenGLVariantFamily_ES3 = (1 << 14) }; enum OGLVertexAttributeID @@ -413,7 +445,10 @@ enum OGLTextureUnitID OGLTextureUnitID_GPolyID, OGLTextureUnitID_FogAttr, OGLTextureUnitID_PolyStates, - OGLTextureUnitID_LookupTable + OGLTextureUnitID_LookupTable, + OGLTextureUnitID_CIColor, + OGLTextureUnitID_CIDepth, + OGLTextureUnitID_CIFogAttr }; enum OGLBindingPointID @@ -640,6 +675,7 @@ struct OGLRenderRef GLuint fboRenderMutableID; GLuint fboColorOutMainID; GLuint fboColorOutWorkingID; + GLuint fboMSClearImageID; GLuint fboMSIntermediateColorOutMainID; GLuint fboMSIntermediateRenderID; GLuint fboMSIntermediateRenderMutableID; @@ -655,6 +691,10 @@ struct OGLRenderRef GLuint fragShaderGeometryZeroDstAlphaID; GLuint programGeometryZeroDstAlphaID; + GLuint vsClearImageID; + GLuint fsClearImageID; + GLuint pgClearImageID; + GLuint vtxShaderMSGeometryZeroDstAlphaID; GLuint fragShaderMSGeometryZeroDstAlphaID; GLuint programMSGeometryZeroDstAlphaID; @@ -896,6 +936,8 @@ protected: virtual Render3DError CreateGeometryPrograms() = 0; virtual void DestroyGeometryPrograms() = 0; + virtual Render3DError CreateClearImageProgram(const char *vsCString, const char *fsCString) = 0; + virtual void DestroyClearImageProgram() = 0; virtual Render3DError CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString) = 0; virtual void DestroyGeometryZeroDstAlphaProgram() = 0; virtual Render3DError CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString) = 0; @@ -972,6 +1014,8 @@ protected: virtual Render3DError CreateGeometryPrograms(); virtual void DestroyGeometryPrograms(); + virtual Render3DError CreateClearImageProgram(const char *vsCString, const char *fsCString); + virtual void DestroyClearImageProgram(); virtual Render3DError CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString); virtual void DestroyGeometryZeroDstAlphaProgram(); virtual Render3DError CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString); diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index ff88937ad..cd9275422 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -393,6 +393,38 @@ void main()\n\ }\n\ "}; +// Vertex shader for drawing the NDS Clear Image, GLSL 1.50 +const char *ClearImageVtxShader_150 = {"\ +IN_VTX_POSITION vec2 inPosition;\n\ +IN_VTX_TEXCOORD0 vec2 inTexCoord0;\n\ +out vec2 texCoord;\n\ +\n\ +void main()\n\ +{\n\ + texCoord = vec2(inTexCoord0.x, 1.0 - inTexCoord0.y);\n\ + gl_Position = vec4(inPosition, 0.0, 1.0);\n\ +}\n\ +"}; + +// Fragment shader for drawing the NDS Clear Image, GLSL 1.50 +const char *ClearImageFragShader_150 = {"\ +in vec2 texCoord;\n\ +\n\ +uniform sampler2D texCIColor;\n\ +uniform sampler2D texCIFogAttr;\n\ +uniform sampler2D texCIDepth;\n\ +\n\ +OUT_FOGATTR vec4 outGFogAttr;\n\ +OUT_COLOR vec4 outGColor;\n\ +\n\ +void main()\n\ +{\n\ + outGFogAttr = texture(texCIFogAttr, texCoord);\n\ + outGColor = texture(texCIColor, texCoord);\n\ + gl_FragDepth = texture(texCIDepth, texCoord).r;\n\ +}\n\ +"}; + // Vertex shader for applying edge marking, GLSL 1.50 const char *EdgeMarkVtxShader_150 = {"\ IN_VTX_POSITION vec2 inPosition;\n\ @@ -823,11 +855,22 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() return error; } + error = this->CreateClearImageProgram(ClearImageVtxShader_150, ClearImageFragShader_150); + if (error != OGLERROR_NOERR) + { + glUseProgram(0); + this->DestroyGeometryPrograms(); + this->isShaderSupported = false; + + return error; + } + error = this->CreateGeometryZeroDstAlphaProgram(GeometryZeroDstAlphaPixelMaskVtxShader_150, GeometryZeroDstAlphaPixelMaskFragShader_150); if (error != OGLERROR_NOERR) { glUseProgram(0); this->DestroyGeometryPrograms(); + this->DestroyClearImageProgram(); this->isShaderSupported = false; return error; @@ -852,6 +895,7 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() { glUseProgram(0); this->DestroyGeometryPrograms(); + this->DestroyClearImageProgram(); this->DestroyGeometryZeroDstAlphaProgram(); this->DestroyMSGeometryZeroDstAlphaProgram(); this->isShaderSupported = false; @@ -985,18 +1029,18 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glActiveTexture(GL_TEXTURE0); - CACHE_ALIGN GLint tempClearImageBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; memset(tempClearImageBuffer, 0, sizeof(tempClearImageBuffer)); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, tempClearImageBuffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, tempClearImageBuffer); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIDepth); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -1005,13 +1049,15 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, tempClearImageBuffer); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_CIFogAttr); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, tempClearImageBuffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, OGL_TEXTURE_SRC_CI_FOG, tempClearImageBuffer); + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); // Set up FBOs @@ -1211,12 +1257,39 @@ Render3DError OpenGLRenderer_3_2::CreateMultisampledFBO(GLsizei numSamples) } // Set up multisampled rendering FBO + glGenFramebuffers(1, &OGLRef.fboMSClearImageID); glGenFramebuffers(1, &OGLRef.fboMSIntermediateColorOutMainID); glGenFramebuffers(1, &OGLRef.fboMSIntermediateRenderID); glGenFramebuffers(1, &OGLRef.fboMSIntermediateRenderMutableID); - glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSIntermediateColorOutMainID); + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSClearImageID); +#ifdef GL_VERSION_3_2 + if (this->_isSampleShadingSupported) + { + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, OGLRef.texMSGColorID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D_MULTISAMPLE, OGLRef.texMSGFogAttrID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, OGLRef.texMSGDepthStencilID, 0); + } + else +#endif + { + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, OGLRef.rboMSGColorID); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_RENDERBUFFER, OGLRef.rboMSGFogAttrID); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, OGLRef.rboMSGDepthStencilID); + } + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + INFO("OpenGL: Failed to create multisampled FBO. Multisample antialiasing will be disabled.\n"); + this->DestroyMultisampledFBO(); + + return OGLERROR_FBO_CREATE_ERROR; + } + const GLenum ciDrawBuffers[] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1}; + glDrawBuffers(2, ciDrawBuffers); + glReadBuffer(GL_COLOR_ATTACHMENT0); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSIntermediateColorOutMainID); #ifdef GL_VERSION_3_2 if (this->_isSampleShadingSupported) { @@ -1239,12 +1312,10 @@ Render3DError OpenGLRenderer_3_2::CreateMultisampledFBO(GLsizei numSamples) return OGLERROR_FBO_CREATE_ERROR; } - glDrawBuffer(GL_COLOR_ATTACHMENT0); glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSIntermediateRenderID); - #ifdef GL_VERSION_3_2 if (this->_isSampleShadingSupported) { @@ -1264,7 +1335,6 @@ Render3DError OpenGLRenderer_3_2::CreateMultisampledFBO(GLsizei numSamples) glFramebufferRenderbuffer(GL_FRAMEBUFFER, OGL_FOGATTRIBUTES_ATTACHMENT_ID, GL_RENDERBUFFER, OGLRef.rboMSGFogAttrID); } - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { INFO("OpenGL: Failed to create multisampled FBO. Multisample antialiasing will be disabled.\n"); @@ -1272,12 +1342,10 @@ Render3DError OpenGLRenderer_3_2::CreateMultisampledFBO(GLsizei numSamples) return OGLERROR_FBO_CREATE_ERROR; } - glDrawBuffer(GL_COLOR_ATTACHMENT0); glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSIntermediateRenderMutableID); - #ifdef GL_VERSION_3_2 if (this->_isSampleShadingSupported) { @@ -1304,7 +1372,6 @@ Render3DError OpenGLRenderer_3_2::CreateMultisampledFBO(GLsizei numSamples) return OGLERROR_FBO_CREATE_ERROR; } - glDrawBuffer(GL_COLOR_ATTACHMENT0); glReadBuffer(GL_COLOR_ATTACHMENT0); @@ -1723,6 +1790,120 @@ void OpenGLRenderer_3_2::DestroyGeometryPrograms() OpenGLRenderer_2_1::DestroyGeometryPrograms(); } +Render3DError OpenGLRenderer_3_2::CreateClearImageProgram(const char *vsCString, const char *fsCString) +{ + Render3DError error = OGLERROR_NOERR; + OGLRenderRef &OGLRef = *this->ref; + + std::stringstream shaderHeader; + if (this->_isShaderFixedLocationSupported) + { + shaderHeader << "#version 330\n"; + } + else + { + shaderHeader << "#version 150\n"; + } + shaderHeader << "\n"; + + std::stringstream vsHeader; + if (this->_isShaderFixedLocationSupported) + { + vsHeader << "#define IN_VTX_POSITION layout (location = " << OGLVertexAttributeID_Position << ") in\n"; + vsHeader << "#define IN_VTX_TEXCOORD0 layout (location = " << OGLVertexAttributeID_TexCoord0 << ") in\n"; + } + else + { + vsHeader << "#define IN_VTX_POSITION in\n"; + vsHeader << "#define IN_VTX_TEXCOORD0 in\n"; + } + vsHeader << "\n"; + + std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vsCString); + std::stringstream fsHeader; + if (this->_isShaderFixedLocationSupported) + { + fsHeader << "#define OUT_COLOR layout (location = 0) out\n"; + fsHeader << "#define OUT_FOGATTR layout (location = 1) out\n"; + } + else + { + fsHeader << "#define OUT_COLOR out\n"; + fsHeader << "#define OUT_FOG_ATTRIBUTES out\n"; + } + fsHeader << "\n"; + + std::string fragShaderCodeFogColor = shaderHeader.str() + fsHeader.str() + std::string(fsCString); + error = this->ShaderProgramCreate(OGLRef.vsClearImageID, + OGLRef.fsClearImageID, + OGLRef.pgClearImageID, + vtxShaderCode.c_str(), + fragShaderCodeFogColor.c_str()); + if (error != OGLERROR_NOERR) + { + INFO("OpenGL: Failed to create the CLEAR_IMAGE shader program.\n"); + glUseProgram(0); + this->DestroyClearImageProgram(); + return error; + } + +#if defined(GL_VERSION_3_0) + if (!this->_isShaderFixedLocationSupported) + { + glBindAttribLocation(OGLRef.pgClearImageID, OGLVertexAttributeID_Position, "inPosition"); + glBindAttribLocation(OGLRef.pgClearImageID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); + glBindFragDataLocation(OGLRef.pgClearImageID, 0, "outGColor"); + glBindFragDataLocation(OGLRef.pgClearImageID, 1, "outGFogAttr"); + } +#endif + + glLinkProgram(OGLRef.pgClearImageID); + if (!this->ValidateShaderProgramLink(OGLRef.pgClearImageID)) + { + INFO("OpenGL: Failed to link the CLEAR_IMAGE shader color/fog program.\n"); + glUseProgram(0); + this->DestroyClearImageProgram(); + return OGLERROR_SHADER_CREATE_ERROR; + } + + glValidateProgram(OGLRef.pgClearImageID); + glUseProgram(OGLRef.pgClearImageID); + + const GLint uniformTexCIColor = glGetUniformLocation(OGLRef.pgClearImageID, "texCIColor"); + const GLint uniformTexCIFogAttr = glGetUniformLocation(OGLRef.pgClearImageID, "texCIFogAttr"); + const GLint uniformTexCIDepthCF = glGetUniformLocation(OGLRef.pgClearImageID, "texCIDepth"); + glUniform1i(uniformTexCIColor, OGLTextureUnitID_CIColor); + glUniform1i(uniformTexCIFogAttr, OGLTextureUnitID_CIFogAttr); + glUniform1i(uniformTexCIDepthCF, OGLTextureUnitID_CIDepth); + + return error; +} + +void OpenGLRenderer_3_2::DestroyClearImageProgram() +{ + if (!this->isShaderSupported) + { + return; + } + + OGLRenderRef &OGLRef = *this->ref; + + if (OGLRef.vsClearImageID == 0) + { + return; + } + + glDetachShader(OGLRef.pgClearImageID, OGLRef.vsClearImageID); + glDetachShader(OGLRef.pgClearImageID, OGLRef.fsClearImageID); + glDeleteShader(OGLRef.vsClearImageID); + glDeleteShader(OGLRef.fsClearImageID); + glDeleteProgram(OGLRef.pgClearImageID); + + OGLRef.vsClearImageID = 0; + OGLRef.fsClearImageID = 0; + OGLRef.pgClearImageID = 0; +} + Render3DError OpenGLRenderer_3_2::CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString) { Render3DError error = OGLERROR_NOERR; @@ -2905,17 +3086,44 @@ Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuf glClearBufferfv(GL_COLOR, GeometryAttachmentPolyID[this->_geometryProgramFlags.DrawBuffersMode], oglPolyID); } - if (this->_enableFog) + if (this->_variantID & OpenGLVariantFamily_Standard) { - glReadBuffer(OGL_FOGATTRIBUTES_ATTACHMENT_ID); - glDrawBuffer(OGL_FOGATTRIBUTES_ATTACHMENT_ID); - glBlitFramebuffer(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + // Standard OpenGL supports blitting from non-multisampled to multisampled + // framebuffers, so do that for best performance. + if (this->_enableFog) + { + glReadBuffer(OGL_FOGATTRIBUTES_ATTACHMENT_ID); + glBlitFramebuffer(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + } + + // Blit the color and depth/stencil buffers. Do this last so that color attachment 0 is set to the read FBO. + glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OGLRef.fboMSIntermediateColorOutMainID); + glBlitFramebuffer(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST); + } + else + { + // Other OpenGL variants may support blitting from multisampled to non-multisampled + // framebuffers, but NOT from non-multisampled to multisampled. So instead, we'll use + // the alternative method of copying the clear image data via a rendered quad. + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboMSClearImageID); + glStencilMask(0xFF); + glClearBufferfi(GL_DEPTH_STENCIL, 0, 1.0f, opaquePolyID); + + glUseProgram(OGLRef.pgClearImageID); + glViewport(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight); + glDisable(GL_BLEND); + glDisable(GL_STENCIL_TEST); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_ALWAYS); + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + glBindVertexArray(OGLRef.vaoPostprocessStatesID); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glBindVertexArray(0); + + glUseProgram(OGLRef.programGeometryID[this->_geometryProgramFlags.value]); + glDepthFunc(GL_LESS); } - - // Blit the color and depth/stencil buffers. Do this last so that color attachment 0 is set to the read FBO. - glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glBlitFramebuffer(0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, 0, 0, (GLint)this->_framebufferWidth, (GLint)this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST); } glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.selectedRenderingFBO); diff --git a/desmume/src/OGLRender_3_2.h b/desmume/src/OGLRender_3_2.h index 8b36f6c08..478a77342 100644 --- a/desmume/src/OGLRender_3_2.h +++ b/desmume/src/OGLRender_3_2.h @@ -29,6 +29,8 @@ extern const char *GeometryFragShader_150; extern const char *GeometryZeroDstAlphaPixelMaskVtxShader_150; extern const char *GeometryZeroDstAlphaPixelMaskFragShader_150; extern const char *MSGeometryZeroDstAlphaPixelMaskFragShader_150; +extern const char *ClearImageVtxShader_150; +extern const char *ClearImageFragShader_150; extern const char *EdgeMarkVtxShader_150; extern const char *EdgeMarkFragShader_150; extern const char *FogVtxShader_150; @@ -81,6 +83,8 @@ protected: virtual Render3DError CreateGeometryPrograms(); virtual void DestroyGeometryPrograms(); + virtual Render3DError CreateClearImageProgram(const char *vsCString, const char *fsCString); + virtual void DestroyClearImageProgram(); virtual Render3DError CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString); virtual Render3DError CreateMSGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString); virtual void DestroyMSGeometryZeroDstAlphaProgram(); diff --git a/desmume/src/OGLRender_ES3.cpp b/desmume/src/OGLRender_ES3.cpp index 63d802e1a..16d6a0a78 100644 --- a/desmume/src/OGLRender_ES3.cpp +++ b/desmume/src/OGLRender_ES3.cpp @@ -258,7 +258,7 @@ void OGLCreateRenderer_ES_3_0(OpenGLRenderer **rendererPtr) OpenGLESRenderer_3_0::OpenGLESRenderer_3_0() { - _variantID = OpenGLVariantID_ES_3_0; + _variantID = OpenGLVariantID_ES3_3_0; } Render3DError OpenGLESRenderer_3_0::InitExtensions() @@ -342,11 +342,22 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions() return error; } + error = this->CreateClearImageProgram(ClearImageVtxShader_150, ClearImageFragShader_150); + if (error != OGLERROR_NOERR) + { + glUseProgram(0); + this->DestroyGeometryPrograms(); + this->isShaderSupported = false; + + return error; + } + error = this->CreateGeometryZeroDstAlphaProgram(GeometryZeroDstAlphaPixelMaskVtxShader_150, GeometryZeroDstAlphaPixelMaskFragShader_150); if (error != OGLERROR_NOERR) { glUseProgram(0); this->DestroyGeometryPrograms(); + this->DestroyClearImageProgram(); this->isShaderSupported = false; return error; @@ -362,6 +373,7 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions() { glUseProgram(0); this->DestroyGeometryPrograms(); + this->DestroyClearImageProgram(); this->DestroyGeometryZeroDstAlphaProgram(); this->isShaderSupported = false; @@ -622,6 +634,80 @@ Render3DError OpenGLESRenderer_3_0::CreateGeometryPrograms() return error; } +Render3DError OpenGLESRenderer_3_0::CreateClearImageProgram(const char *vsCString, const char *fsCString) +{ + Render3DError error = OGLERROR_NOERR; + OGLRenderRef &OGLRef = *this->ref; + + std::stringstream shaderHeader; + shaderHeader << "#version 300 es\n"; + shaderHeader << "precision highp float;\n"; + shaderHeader << "precision highp int;\n"; + shaderHeader << "\n"; + + std::stringstream vsHeader; + if (this->_isShaderFixedLocationSupported) + { + vsHeader << "#define IN_VTX_POSITION layout (location = " << OGLVertexAttributeID_Position << ") in\n"; + vsHeader << "#define IN_VTX_TEXCOORD0 layout (location = " << OGLVertexAttributeID_TexCoord0 << ") in\n"; + } + else + { + vsHeader << "#define IN_VTX_POSITION in\n"; + vsHeader << "#define IN_VTX_TEXCOORD0 in\n"; + } + vsHeader << "\n"; + + std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vsCString); + std::stringstream fsHeader; + if (this->_isShaderFixedLocationSupported) + { + fsHeader << "#define OUT_COLOR layout (location = 0) out\n"; + fsHeader << "#define OUT_FOGATTR layout (location = 1) out\n"; + } + else + { + fsHeader << "#define OUT_COLOR out\n"; + fsHeader << "#define OUT_FOG_ATTRIBUTES out\n"; + } + fsHeader << "\n"; + + std::string fragShaderCodeFogColor = shaderHeader.str() + fsHeader.str() + std::string(fsCString); + error = this->ShaderProgramCreate(OGLRef.vsClearImageID, + OGLRef.fsClearImageID, + OGLRef.pgClearImageID, + vtxShaderCode.c_str(), + fragShaderCodeFogColor.c_str()); + if (error != OGLERROR_NOERR) + { + INFO("OpenGL ES: Failed to create the CLEAR_IMAGE shader program.\n"); + glUseProgram(0); + this->DestroyClearImageProgram(); + return error; + } + + glLinkProgram(OGLRef.pgClearImageID); + if (!this->ValidateShaderProgramLink(OGLRef.pgClearImageID)) + { + INFO("OpenGL ES: Failed to link the CLEAR_IMAGE shader color/fog program.\n"); + glUseProgram(0); + this->DestroyClearImageProgram(); + return OGLERROR_SHADER_CREATE_ERROR; + } + + glValidateProgram(OGLRef.pgClearImageID); + glUseProgram(OGLRef.pgClearImageID); + + const GLint uniformTexCIColor = glGetUniformLocation(OGLRef.pgClearImageID, "texCIColor"); + const GLint uniformTexCIFogAttr = glGetUniformLocation(OGLRef.pgClearImageID, "texCIFogAttr"); + const GLint uniformTexCIDepthCF = glGetUniformLocation(OGLRef.pgClearImageID, "texCIDepth"); + glUniform1i(uniformTexCIColor, OGLTextureUnitID_CIColor); + glUniform1i(uniformTexCIFogAttr, OGLTextureUnitID_CIFogAttr); + glUniform1i(uniformTexCIDepthCF, OGLTextureUnitID_CIDepth); + + return error; +} + Render3DError OpenGLESRenderer_3_0::CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString) { Render3DError error = OGLERROR_NOERR; diff --git a/desmume/src/OGLRender_ES3.h b/desmume/src/OGLRender_ES3.h index 4a492b4f8..eff863b32 100644 --- a/desmume/src/OGLRender_ES3.h +++ b/desmume/src/OGLRender_ES3.h @@ -44,6 +44,7 @@ class OpenGLESRenderer_3_0 : public OpenGLRenderer_3_2 { protected: virtual Render3DError CreateGeometryPrograms(); + virtual Render3DError CreateClearImageProgram(const char *vsCString, const char *fsCString); virtual Render3DError CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString); virtual Render3DError CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString); virtual Render3DError CreateFogProgram(const OGLFogProgramKey fogProgramKey, const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString);