From d6ae36e068c1aa43dbb0e3bff04e352adca0af54 Mon Sep 17 00:00:00 2001 From: rogerman Date: Wed, 17 Feb 2016 10:33:44 +0000 Subject: [PATCH] OpenGL Renderer: - Perform the RGBA6665 color space conversion of the 3D framebuffer on the GPU before pixel read back, and then read that 3D framebuffer directly. --- desmume/src/GPU.cpp | 8 +- desmume/src/OGLRender.cpp | 795 +++++++++++++++++++++++----------- desmume/src/OGLRender.h | 59 ++- desmume/src/OGLRender_3_2.cpp | 162 ++++++- desmume/src/OGLRender_3_2.h | 5 +- desmume/src/rasterize.cpp | 25 +- desmume/src/render3D.cpp | 52 +-- desmume/src/render3D.h | 8 +- 8 files changed, 791 insertions(+), 323 deletions(-) diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index 59f3e86a9..7878fb7fd 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -4220,9 +4220,15 @@ void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstColorLine, const size_t // will be rendered as garbage pixels unless the DISPCNT.BG0_Enable flag is ignored. if (layerID == GPULayerID_BG0 && (this->_IORegisterMap->DISPCNT.BG0_3D != 0)) { + const FragmentColor *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer(); + if (framebuffer3D == NULL) + { + continue; + } + const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); const float customWidthScale = (float)dispInfo.customWidth / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH; - const FragmentColor *__restrict srcLine = this->_3DFramebufferRGBA6665 + (dstLineIndex * dispInfo.customWidth); + const FragmentColor *__restrict srcLine = framebuffer3D + (dstLineIndex * dispInfo.customWidth); u16 *__restrict dstColorLinePtr = dstColorLine; u8 *__restrict layerIDLine = this->_dstLayerID; diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index e0b695671..8e31d0d8d 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -483,6 +483,38 @@ static const char *FogFragShader_100 = {"\ }\n\ "}; +// Vertex shader for the final framebuffer, GLSL 1.00 +static const char *FramebufferOutputVtxShader_100 = {"\ + attribute vec2 inPosition;\n\ + attribute vec2 inTexCoord0;\n\ + varying vec2 texCoord;\n\ + \n\ + void main()\n\ + {\n\ + texCoord = inTexCoord0;\n\ + gl_Position = vec4(inPosition, 0.0, 1.0);\n\ + }\n\ +"}; + +// Fragment shader for the final framebuffer, GLSL 1.00 +static const char *FramebufferOutputFragShader_100 = {"\ + varying vec2 texCoord;\n\ + \n\ + uniform sampler2D texInFragColor;\n\ + \n\ + void main()\n\ + {\n\ + // Note that we swap B and R since pixel readbacks are done in BGRA format for fastest\n\ + // performance. The final color is still in RGBA format.\n\ + vec4 colorRGBA6665 = texture2D(texInFragColor, texCoord).bgra;\n\ + colorRGBA6665 = floor((colorRGBA6665 * 255.0) + 0.5);\n\ + colorRGBA6665.rgb = floor(colorRGBA6665.rgb / 4.0);\n\ + colorRGBA6665.a = floor(colorRGBA6665.a / 8.0);\n\ + \n\ + gl_FragData[0] = (colorRGBA6665 / 255.0);\n\ + }\n\ +"}; + FORCEINLINE u32 BGRA8888_32_To_RGBA6665_32(const u32 srcPix) { const u32 dstPix = (srcPix >> 2) & 0x3F3F3F3F; @@ -797,6 +829,7 @@ OpenGLRenderer::OpenGLRenderer() isMultisampledFBOSupported = false; isShaderSupported = false; isVAOSupported = false; + willConvertFramebufferOnGPU = false; // Init OpenGL rendering states ref = new OGLRenderRef; @@ -806,6 +839,7 @@ OpenGLRenderer::OpenGLRenderer() ref->selectedRenderingFBO = 0; currTexture = NULL; + _mappedFramebuffer = NULL; _pixelReadNeedsFinish = false; _currentPolyIndex = 0; } @@ -895,9 +929,9 @@ void OpenGLRenderer::SetVersion(unsigned int major, unsigned int minor, unsigned this->versionRevision = revision; } -Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcRGBA8888, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) +Render3DError OpenGLRenderer::_FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) { - if ( srcRGBA8888 == NULL || ((dstRGBA6665 == NULL) && (dstRGBA5551 == NULL)) ) + if ( ((dstRGBA6665 == NULL) && (dstRGBA5551 == NULL)) || (srcFramebuffer == NULL) ) { return RENDER3DERROR_NOERR; } @@ -918,44 +952,44 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s #if defined(ENABLE_SSSE3) && defined(LOCAL_LE) for (; x < ssePixCount; x += 4, ir += 4, iw += 4) { - __m128i color; + const __m128i srcColor = _mm_load_si128((__m128i *)(srcFramebuffer + ir)); // Convert to RGBA6665 - color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir)); - color = _mm_srli_epi32(color, 2); + __m128i color6665 = srcColor; + color6665 = _mm_srli_epi32(color6665, 2); - __m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha + __m128i a = _mm_srli_epi32(color6665, 1); // Special handling for 5-bit alpha a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000)); - color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F)); - color = _mm_or_si128(color, a); - color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA - _mm_store_si128((__m128i *)(dstRGBA6665 + iw), color); + color6665 = _mm_and_si128(color6665, _mm_set1_epi32(0x003F3F3F)); + color6665 = _mm_or_si128(color6665, a); + color6665 = _mm_shuffle_epi8(color6665, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA + _mm_store_si128((__m128i *)(dstRGBA6665 + iw), color6665); // Convert to RGBA5551 - color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir)); + __m128 color5551 = srcColor; - __m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R - b = _mm_slli_epi32(b, 7); // Shift to B + __m128i b = _mm_and_si128(color5551, _mm_set1_epi32(0x000000F8)); // Read from R + b = _mm_slli_epi32(b, 7); // Shift to B - __m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G - g = _mm_srli_epi32(g, 6); // Shift in G + __m128i g = _mm_and_si128(color5551, _mm_set1_epi32(0x0000F800)); // Read from G + g = _mm_srli_epi32(g, 6); // Shift in G - __m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B - r = _mm_srli_epi32(r, 19); // Shift to R + __m128i r = _mm_and_si128(color5551, _mm_set1_epi32(0x00F80000)); // Read from B + r = _mm_srli_epi32(r, 19); // Shift to R - a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A - a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A - a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A + a = _mm_and_si128(color5551, _mm_set1_epi32(0xFF000000)); // Read from A + a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A + a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A - color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a); + color5551 = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a); // All the colors are currently placed on 32 bit boundaries, so we need to swizzle them // to the lower 64 bits of our vector before we store them back to memory. // Note: Do not attempt to use packssdw here since packing with the 0x8000 bit set will // result in values of 0x7FFF, which are incorrect values in this case. - color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0)); - _mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color); + color5551 = _mm_shuffle_epi8(color5551, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0)); + _mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color5551); } #endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE) @@ -964,17 +998,17 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s // Use the correct endian format since OpenGL uses the native endian of // the architecture it is running on. #ifdef LOCAL_BE - dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color); - dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, - (srcRGBA8888[ir].g >> 3) & 0x1F, - (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); + dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[ir].color); + dstRGBA5551[iw] = R5G5B5TORGB15( (srcFramebuffer[ir].b >> 3) & 0x1F, + (srcFramebuffer[ir].g >> 3) & 0x1F, + (srcFramebuffer[ir].r >> 3) & 0x1F) | + ((srcFramebuffer[ir].a == 0) ? 0x0000 : 0x8000); #else - dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color); - dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, - (srcRGBA8888[ir].g >> 3) & 0x1F, - (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); + dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[ir].color); + dstRGBA5551[iw] = R5G5B5TORGB15( (srcFramebuffer[ir].b >> 3) & 0x1F, + (srcFramebuffer[ir].g >> 3) & 0x1F, + (srcFramebuffer[ir].r >> 3) & 0x1F) | + ((srcFramebuffer[ir].a == 0) ? 0x0000 : 0x8000); #endif } } @@ -988,25 +1022,25 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s #if defined(ENABLE_SSSE3) && defined(LOCAL_LE) for (; x < ssePixCount; x += 4, ir += 4, iw += 4) { - __m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir)); - color = _mm_srli_epi32(color, 2); + __m128i color6665 = _mm_load_si128((__m128i *)(srcFramebuffer + ir)); + color6665 = _mm_srli_epi32(color6665, 2); - __m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha + __m128i a = _mm_srli_epi32(color6665, 1); // Special handling for 5-bit alpha a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000)); - color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F)); - color = _mm_or_si128(color, a); - color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA - _mm_store_si128((__m128i *)(dstRGBA6665 + iw), color); + color6665 = _mm_and_si128(color6665, _mm_set1_epi32(0x003F3F3F)); + color6665 = _mm_or_si128(color6665, a); + color6665 = _mm_shuffle_epi8(color6665, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA + _mm_store_si128((__m128i *)(dstRGBA6665 + iw), color6665); } #endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE) for (; x < pixCount; x++, ir++, iw++) { #ifdef LOCAL_BE - dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcRGBA8888[ir].color); + dstRGBA6665[iw].color = BGRA8888_32_To_RGBA6665_32(srcFramebuffer[ir].color); #else - dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcRGBA8888[ir].color); + dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(srcFramebuffer[ir].color); #endif } } @@ -1020,39 +1054,39 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s #if defined(ENABLE_SSSE3) && defined(LOCAL_LE) for (; x < ssePixCount; x += 4, ir += 4, iw += 4) { - __m128i color = _mm_load_si128((__m128i *)(srcRGBA8888 + ir)); + __m128i color5551 = _mm_load_si128((__m128i *)(srcFramebuffer + ir)); - __m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R - b = _mm_slli_epi32(b, 7); // Shift to B + __m128i b = _mm_and_si128(color5551, _mm_set1_epi32(0x000000F8)); // Read from R + b = _mm_slli_epi32(b, 7); // Shift to B - __m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G - g = _mm_srli_epi32(g, 6); // Shift in G + __m128i g = _mm_and_si128(color5551, _mm_set1_epi32(0x0000F800)); // Read from G + g = _mm_srli_epi32(g, 6); // Shift in G - __m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B - r = _mm_srli_epi32(r, 19); // Shift to R + __m128i r = _mm_and_si128(color5551, _mm_set1_epi32(0x00F80000)); // Read from B + r = _mm_srli_epi32(r, 19); // Shift to R - __m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A - a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A - a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A + __m128i a = _mm_and_si128(color5551, _mm_set1_epi32(0xFF000000)); // Read from A + a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A + a = _mm_andnot_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A - color = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a); - color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0)); - _mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color); + color5551 = _mm_or_si128(_mm_or_si128(_mm_or_si128(b, g), r), a); + color5551 = _mm_shuffle_epi8(color5551, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0)); + _mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color5551); } #endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE) for (; x < pixCount; x++, ir++, iw++) { #ifdef LOCAL_BE - dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, - (srcRGBA8888[ir].g >> 3) & 0x1F, - (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); + dstRGBA5551[iw] = R5G5B5TORGB15( (srcFramebuffer[ir].b >> 3) & 0x1F, + (srcFramebuffer[ir].g >> 3) & 0x1F, + (srcFramebuffer[ir].r >> 3) & 0x1F) | + ((srcFramebuffer[ir].a == 0) ? 0x0000 : 0x8000); #else - dstRGBA5551[iw] = R5G5B5TORGB15( (srcRGBA8888[ir].b >> 3) & 0x1F, - (srcRGBA8888[ir].g >> 3) & 0x1F, - (srcRGBA8888[ir].r >> 3) & 0x1F) | - ((srcRGBA8888[ir].a == 0) ? 0x0000 : 0x8000); + dstRGBA5551[iw] = R5G5B5TORGB15( (srcFramebuffer[ir].b >> 3) & 0x1F, + (srcFramebuffer[ir].g >> 3) & 0x1F, + (srcFramebuffer[ir].r >> 3) & 0x1F) | + ((srcFramebuffer[ir].a == 0) ? 0x0000 : 0x8000); #endif } } @@ -1061,9 +1095,27 @@ Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict s return RENDER3DERROR_NOERR; } -Render3DError OpenGLRenderer::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) +Render3DError OpenGLRenderer::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) { - return this->FlushFramebuffer(this->_framebufferColor, dstRGBA6665, dstRGBA5551); + if (this->willConvertFramebufferOnGPU) + { +#ifdef ENABLE_SSE2 + return Render3D_SSE2::FlushFramebuffer(srcFramebuffer, NULL, dstRGBA5551); +#else + return Render3D::FlushFramebuffer(srcFramebuffer, NULL, dstRGBA5551); +#endif + } + else + { + return this->_FlushFramebufferConvertOnCPU(srcFramebuffer, dstRGBA6665, dstRGBA5551); + } + + return RENDER3DERROR_NOERR; +} + +FragmentColor* OpenGLRenderer::GetFramebuffer() +{ + return (this->_mappedFramebuffer != NULL) ? this->_mappedFramebuffer : GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); } OpenGLRenderer_1_2::~OpenGLRenderer_1_2() @@ -1129,7 +1181,14 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() std::string edgeMarkFragShaderString = std::string(EdgeMarkFragShader_100); std::string fogVtxShaderString = std::string(FogVtxShader_100); std::string fogFragShaderString = std::string(FogFragShader_100); - error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, edgeMarkFragShaderString, fogVtxShaderString, fogFragShaderString); + std::string framebufferOutputVtxShaderString = std::string(FramebufferOutputVtxShader_100); + std::string framebufferOutputFragShaderString = std::string(FramebufferOutputFragShader_100); + error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, + edgeMarkFragShaderString, + fogVtxShaderString, + fogFragShaderString, + framebufferOutputVtxShaderString, + framebufferOutputFragShaderString); if (error != OGLERROR_NOERR) { INFO("OpenGL: Edge mark and fog require OpenGL v2.0 or later. These features will be disabled.\n"); @@ -1179,6 +1238,8 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() this->IsExtensionPresent(&oglExtensionSet, "GL_EXT_packed_depth_stencil"); if (this->isFBOSupported) { + this->willConvertFramebufferOnGPU = (this->isShaderSupported && this->isVAOSupported && this->isPBOSupported && this->isFBOSupported); + error = this->CreateFBOs(); if (error != OGLERROR_NOERR) { @@ -1206,6 +1267,8 @@ Render3DError OpenGLRenderer_1_2::InitExtensions() INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } + this->willConvertFramebufferOnGPU = (this->isShaderSupported && this->isVAOSupported && this->isPBOSupported && this->isFBOSupported); + this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last @@ -1307,7 +1370,8 @@ Render3DError OpenGLRenderer_1_2::InitGeometryProgramShaderLocations() { OGLRenderRef &OGLRef = *this->ref; - // Set up shader uniforms + glUseProgram(OGLRef.programGeometryID); + const GLint uniformTexRenderObject = glGetUniformLocation(OGLRef.programGeometryID, "texRenderObject"); const GLint uniformTexToonTable = glGetUniformLocation(OGLRef.programGeometryID, "texToonTable"); glUniform1i(uniformTexRenderObject, 0); @@ -1400,7 +1464,6 @@ Render3DError OpenGLRenderer_1_2::InitGeometryProgram(const std::string &vertexS } glValidateProgram(OGLRef.programGeometryID); - glUseProgram(OGLRef.programGeometryID); this->InitGeometryProgramShaderLocations(); @@ -1495,6 +1558,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glGenTextures(1, &OGLRef.texCIPolyID); glGenTextures(1, &OGLRef.texCIDepthStencilID); + glGenTextures(1, &OGLRef.texFinalColorID); glGenTextures(1, &OGLRef.texGColorID); glGenTextures(1, &OGLRef.texGDepthID); glGenTextures(1, &OGLRef.texGFogAttrID); @@ -1502,6 +1566,17 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glGenTextures(1, &OGLRef.texGDepthStencilID); glGenTextures(1, &OGLRef.texPostprocessFogID); + if (this->willConvertFramebufferOnGPU) + { + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); + glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, this->_framebufferWidth, this->_framebufferHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + } + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texGDepthStencilID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -1589,6 +1664,14 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glBindTexture(GL_TEXTURE_2D, 0); + // Set up RBOs + if (this->willConvertFramebufferOnGPU) + { + glGenRenderbuffersEXT(1, &OGLRef.rboFramebufferRGBA6665ID); + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, OGLRef.rboFramebufferRGBA6665ID); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA, this->_framebufferWidth, this->_framebufferHeight); + } + // Set up FBOs glGenFramebuffersEXT(1, &OGLRef.fboClearImageID); glGenFramebuffersEXT(1, &OGLRef.fboRenderID); @@ -1621,6 +1704,8 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffersEXT(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -1660,6 +1745,8 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffersEXT(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -1675,6 +1762,12 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID); glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, OGLRef.texPostprocessFogID, 0); + if (this->willConvertFramebufferOnGPU) + { + glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT2_EXT, GL_RENDERBUFFER_EXT, OGLRef.rboFramebufferRGBA6665ID); + } + if (glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT) { INFO("OpenGL: Failed to created FBOs. Some emulation features will be disabled.\n"); @@ -1694,6 +1787,8 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffersEXT(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -1737,6 +1832,8 @@ void OpenGLRenderer_1_2::DestroyFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffersEXT(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -1901,6 +1998,21 @@ Render3DError OpenGLRenderer_1_2::InitTables() return OGLERROR_NOERR; } +Render3DError OpenGLRenderer_1_2::InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, + const std::string &edgeMarkFragShader, + const std::string &fogVtxShader, + const std::string &fogFragShader, + const std::string &framebufferOutputVtxShader, + const std::string &framebufferOutputFragShader) +{ + return OGLERROR_FEATURE_UNSUPPORTED; +} + +Render3DError OpenGLRenderer_1_2::DestroyPostprocessingPrograms() +{ + return OGLERROR_FEATURE_UNSUPPORTED; +} + Render3DError OpenGLRenderer_1_2::InitEdgeMarkProgramBindings() { return OGLERROR_FEATURE_UNSUPPORTED; @@ -1911,11 +2023,6 @@ Render3DError OpenGLRenderer_1_2::InitEdgeMarkProgramShaderLocations() return OGLERROR_FEATURE_UNSUPPORTED; } -Render3DError OpenGLRenderer_1_2::InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, const std::string &edgeMarkFragShader, const std::string &fogVtxShader, const std::string &fogFragShader) -{ - return OGLERROR_FEATURE_UNSUPPORTED; -} - Render3DError OpenGLRenderer_1_2::InitFogProgramBindings() { return OGLERROR_FEATURE_UNSUPPORTED; @@ -1926,7 +2033,12 @@ Render3DError OpenGLRenderer_1_2::InitFogProgramShaderLocations() return OGLERROR_FEATURE_UNSUPPORTED; } -Render3DError OpenGLRenderer_1_2::DestroyPostprocessingPrograms() +Render3DError OpenGLRenderer_1_2::InitFramebufferOutputProgramBindings() +{ + return OGLERROR_FEATURE_UNSUPPORTED; +} + +Render3DError OpenGLRenderer_1_2::InitFramebufferOutputShaderLocations() { return OGLERROR_FEATURE_UNSUPPORTED; } @@ -2153,13 +2265,63 @@ Render3DError OpenGLRenderer_1_2::DownsampleFBO() Render3DError OpenGLRenderer_1_2::ReadBackPixels() { - if (this->isPBOSupported) + if (!this->isPBOSupported) { + this->_pixelReadNeedsFinish = true; + return OGLERROR_NOERR; + } + + if (this->_mappedFramebuffer != NULL) + { + glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB); + this->_mappedFramebuffer = NULL; + } + + if (this->willConvertFramebufferOnGPU) + { + OGLRenderRef &OGLRef = *this->ref; + + // Perform the RGBA6665 color space conversion while we're still on the GPU so + // that we can avoid having to do it on the CPU. + glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); + glBlitFramebufferEXT(0, this->_framebufferHeight, this->_framebufferWidth, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT); + + glUseProgram(OGLRef.programFramebufferOutputID); + + glViewport(0, 0, this->_framebufferWidth, this->_framebufferHeight); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glDisable(GL_CULL_FACE); + + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboPostprocessIndexID); + glBindVertexArray(OGLRef.vaoPostprocessStatesID); + + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); + + glBindVertexArray(0); + + // Read back the pixels. + glReadBuffer(GL_COLOR_ATTACHMENT2_EXT); + glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + + // Set the read and draw target buffers back to color attachment 0, which is always the default. + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + } + else + { + // Otherwise, we'll just have to do the color space conversion on the CPU if there + // isn't proper support. glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); } this->_pixelReadNeedsFinish = true; - return OGLERROR_NOERR; } @@ -2834,12 +2996,8 @@ Render3DError OpenGLRenderer_1_2::RenderFinish() if (this->isPBOSupported) { - const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); - if (mappedBufferPtr != NULL) - { - this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551); - glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB); - } + this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBufferARB(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY_ARB); + this->FlushFramebuffer(this->_mappedFramebuffer, framebufferRGBA6665, framebufferRGBA5551); } else { @@ -2870,6 +3028,15 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) return OGLERROR_BEGINGL_FAILED; } + if (this->isPBOSupported) + { + if (this->_mappedFramebuffer != NULL) + { + glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB); + this->_mappedFramebuffer = NULL; + } + } + if (this->isFBOSupported) { glActiveTextureARB(GL_TEXTURE0_ARB + OGLTextureUnitID_GColor); @@ -2909,32 +3076,40 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h) glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, maxSamples, GL_DEPTH24_STENCIL8_EXT, w, h); } - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); - FragmentColor *oldFramebufferColor = this->_framebufferColor; - FragmentColor *newFramebufferColor = NULL; + if (this->willConvertFramebufferOnGPU) + { + glActiveTextureARB(GL_TEXTURE0_ARB + OGLTextureUnitID_FinalColor); + glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, OGLRef.rboFramebufferRGBA6665ID); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA, w, h); + } - if (this->isPBOSupported) - { - glBufferData(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ); - } - else - { - newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); - memset(newFramebufferColor, 0, newFramebufferColorSizeBytes); - } + const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); this->_framebufferWidth = w; this->_framebufferHeight = h; this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; - this->_framebufferColor = newFramebufferColor; + + if (this->isPBOSupported) + { + glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ_ARB); + this->_framebufferColor = NULL; + } + else + { + FragmentColor *oldFramebufferColor = this->_framebufferColor; + FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); + this->_framebufferColor = newFramebufferColor; + free_aligned(oldFramebufferColor); + } if (oglrender_framebufferDidResizeCallback != NULL) { oglrender_framebufferDidResizeCallback(w, h); } - free_aligned(oldFramebufferColor); - ENDGL(); return OGLERROR_NOERR; @@ -3030,6 +3205,15 @@ Render3DError OpenGLRenderer_1_3::SetFramebufferSize(size_t w, size_t h) return OGLERROR_BEGINGL_FAILED; } + if (this->isPBOSupported) + { + if (this->_mappedFramebuffer != NULL) + { + glUnmapBufferARB(GL_PIXEL_PACK_BUFFER_ARB); + this->_mappedFramebuffer = NULL; + } + } + if (this->isFBOSupported) { glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor); @@ -3069,32 +3253,40 @@ Render3DError OpenGLRenderer_1_3::SetFramebufferSize(size_t w, size_t h) glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, maxSamples, GL_DEPTH24_STENCIL8_EXT, w, h); } - const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); - FragmentColor *oldFramebufferColor = this->_framebufferColor; - FragmentColor *newFramebufferColor = NULL; + if (this->willConvertFramebufferOnGPU) + { + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); + glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + + glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, OGLRef.rboFramebufferRGBA6665ID); + glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA, w, h); + } - if (this->isPBOSupported) - { - glBufferData(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ); - } - else - { - newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); - memset(newFramebufferColor, 0, newFramebufferColorSizeBytes); - } + const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor); this->_framebufferWidth = w; this->_framebufferHeight = h; this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; - this->_framebufferColor = newFramebufferColor; + + if (this->isPBOSupported) + { + glBufferDataARB(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ_ARB); + this->_framebufferColor = NULL; + } + else + { + FragmentColor *oldFramebufferColor = this->_framebufferColor; + FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes); + this->_framebufferColor = newFramebufferColor; + free_aligned(oldFramebufferColor); + } if (oglrender_framebufferDidResizeCallback != NULL) { oglrender_framebufferDidResizeCallback(w, h); } - free_aligned(oldFramebufferColor); - ENDGL(); return OGLERROR_NOERR; @@ -3189,30 +3381,6 @@ void OpenGLRenderer_1_5::DestroyVBOs() this->isVBOSupported = false; } -Render3DError OpenGLRenderer_1_5::CreatePBOs() -{ - OGLRenderRef &OGLRef = *this->ref; - - glGenBuffers(1, &OGLRef.pboRenderDataID); - glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, OGLRef.pboRenderDataID); - glBufferData(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ); - - return OGLERROR_NOERR; -} - -void OpenGLRenderer_1_5::DestroyPBOs() -{ - if (!this->isPBOSupported) - { - return; - } - - glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); - glDeleteBuffers(1, &this->ref->pboRenderDataID); - - this->isPBOSupported = false; -} - Render3DError OpenGLRenderer_1_5::CreateVAOs() { OGLRenderRef &OGLRef = *this->ref; @@ -3385,61 +3553,6 @@ Render3DError OpenGLRenderer_1_5::BeginRender(const GFX3D &engine) return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_1_5::ReadBackPixels() -{ - if (this->isPBOSupported) - { - glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); - } - - this->_pixelReadNeedsFinish = true; - - return OGLERROR_NOERR; -} - -Render3DError OpenGLRenderer_1_5::RenderFinish() -{ - if (!this->_pixelReadNeedsFinish) - { - GPU->GetEventHandler()->DidRender3DEnd(); - return OGLERROR_NOERR; - } - - FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL; - u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL; - - if ( (framebufferRGBA6665 != NULL) || (framebufferRGBA5551 != NULL) ) - { - if(!BEGINGL()) - { - GPU->GetEventHandler()->DidRender3DEnd(); - return OGLERROR_BEGINGL_FAILED; - } - - if (this->isPBOSupported) - { - const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY); - if (mappedBufferPtr != NULL) - { - this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551); - glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB); - } - } - else - { - glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, this->_framebufferColor); - this->FlushFramebuffer(this->_framebufferColor, framebufferRGBA6665, framebufferRGBA5551); - } - - ENDGL(); - } - - this->_pixelReadNeedsFinish = false; - - GPU->GetEventHandler()->DidRender3DEnd(); - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_2_0::InitExtensions() { Render3DError error = OGLERROR_NOERR; @@ -3474,7 +3587,14 @@ Render3DError OpenGLRenderer_2_0::InitExtensions() std::string edgeMarkFragShaderString = std::string(EdgeMarkFragShader_100); std::string fogVtxShaderString = std::string(FogVtxShader_100); std::string fogFragShaderString = std::string(FogFragShader_100); - error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, edgeMarkFragShaderString, fogVtxShaderString, fogFragShaderString); + std::string framebufferOutputVtxShaderString = std::string(FramebufferOutputVtxShader_100); + std::string framebufferOutputFragShaderString = std::string(FramebufferOutputFragShader_100); + error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, + edgeMarkFragShaderString, + fogVtxShaderString, + fogFragShaderString, + framebufferOutputVtxShaderString, + framebufferOutputFragShaderString); if (error != OGLERROR_NOERR) { this->DestroyGeometryProgram(); @@ -3507,6 +3627,8 @@ Render3DError OpenGLRenderer_2_0::InitExtensions() this->IsExtensionPresent(&oglExtensionSet, "GL_EXT_packed_depth_stencil"); if (this->isFBOSupported) { + this->willConvertFramebufferOnGPU = (this->isShaderSupported && this->isVAOSupported && this->isPBOSupported && this->isFBOSupported); + error = this->CreateFBOs(); if (error != OGLERROR_NOERR) { @@ -3534,6 +3656,8 @@ Render3DError OpenGLRenderer_2_0::InitExtensions() INFO("OpenGL: Multisampled FBOs are unsupported. Multisample antialiasing will be disabled.\n"); } + this->willConvertFramebufferOnGPU = (this->isShaderSupported && this->isVAOSupported && this->isPBOSupported && this->isFBOSupported); + this->InitTextures(); this->InitFinalRenderStates(&oglExtensionSet); // This must be done last @@ -3563,31 +3687,12 @@ Render3DError OpenGLRenderer_2_0::InitFinalRenderStates(const std::setref; - glBindAttribLocation(OGLRef.programEdgeMarkID, OGLVertexAttributeID_Position, "inPosition"); - glBindAttribLocation(OGLRef.programEdgeMarkID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); - - return OGLERROR_NOERR; -} - -Render3DError OpenGLRenderer_2_0::InitEdgeMarkProgramShaderLocations() -{ - OGLRenderRef &OGLRef = *this->ref; - - const GLint uniformTexGDepth = glGetUniformLocation(OGLRef.programEdgeMarkID, "texInFragDepth"); - const GLint uniformTexGPolyID = glGetUniformLocation(OGLRef.programEdgeMarkID, "texInPolyID"); - glUniform1i(uniformTexGDepth, OGLTextureUnitID_GDepth); - glUniform1i(uniformTexGPolyID, OGLTextureUnitID_GPolyID); - - OGLRef.uniformFramebufferSize = glGetUniformLocation(OGLRef.programEdgeMarkID, "framebufferSize"); - OGLRef.uniformStateEdgeColor = glGetUniformLocation(OGLRef.programEdgeMarkID, "stateEdgeColor"); - - return OGLERROR_NOERR; -} - -Render3DError OpenGLRenderer_2_0::InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, const std::string &edgeMarkFragShader, const std::string &fogVtxShader, const std::string &fogFragShader) +Render3DError OpenGLRenderer_2_0::InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, + const std::string &edgeMarkFragShader, + const std::string &fogVtxShader, + const std::string &fogFragShader, + const std::string &framebufferOutputVtxShader, + const std::string &framebufferOutputFragShader) { Render3DError error = OGLERROR_NOERR; OGLRenderRef &OGLRef = *this->ref; @@ -3665,8 +3770,6 @@ Render3DError OpenGLRenderer_2_0::InitPostprocessingPrograms(const std::string & } glValidateProgram(OGLRef.programEdgeMarkID); - glUseProgram(OGLRef.programEdgeMarkID); - this->InitEdgeMarkProgramShaderLocations(); // ------------------------------------------ @@ -3744,45 +3847,93 @@ Render3DError OpenGLRenderer_2_0::InitPostprocessingPrograms(const std::string & } glValidateProgram(OGLRef.programFogID); - glUseProgram(OGLRef.programFogID); - this->InitFogProgramShaderLocations(); + // ------------------------------------------ + + OGLRef.vertexFramebufferOutputShaderID = glCreateShader(GL_VERTEX_SHADER); + if(!OGLRef.vertexFramebufferOutputShaderID) + { + INFO("OpenGL: Failed to create the framebuffer output vertex shader.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + const char *framebufferOutputVtxShaderCStr = framebufferOutputVtxShader.c_str(); + glShaderSource(OGLRef.vertexFramebufferOutputShaderID, 1, (const GLchar **)&framebufferOutputVtxShaderCStr, NULL); + glCompileShader(OGLRef.vertexFramebufferOutputShaderID); + if (!this->ValidateShaderCompile(OGLRef.vertexFramebufferOutputShaderID)) + { + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + INFO("OpenGL: Failed to compile the framebuffer output vertex shader.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + OGLRef.fragmentFramebufferOutputShaderID = glCreateShader(GL_FRAGMENT_SHADER); + if(!OGLRef.fragmentFramebufferOutputShaderID) + { + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + INFO("OpenGL: Failed to create the framebuffer output fragment shader.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + const char *framebufferOutputFragShaderCStr = framebufferOutputFragShader.c_str(); + glShaderSource(OGLRef.fragmentFramebufferOutputShaderID, 1, (const GLchar **)&framebufferOutputFragShaderCStr, NULL); + glCompileShader(OGLRef.fragmentFramebufferOutputShaderID); + if (!this->ValidateShaderCompile(OGLRef.fragmentFramebufferOutputShaderID)) + { + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + glDeleteShader(OGLRef.fragmentFramebufferOutputShaderID); + INFO("OpenGL: Failed to compile the framebuffer output fragment shader.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + OGLRef.programFramebufferOutputID = glCreateProgram(); + if(!OGLRef.programFramebufferOutputID) + { + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + glDeleteShader(OGLRef.fragmentFramebufferOutputShaderID); + INFO("OpenGL: Failed to create the framebuffer output shader program.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + glAttachShader(OGLRef.programFramebufferOutputID, OGLRef.vertexFramebufferOutputShaderID); + glAttachShader(OGLRef.programFramebufferOutputID, OGLRef.fragmentFramebufferOutputShaderID); + + error = this->InitFramebufferOutputProgramBindings(); + if (error != OGLERROR_NOERR) + { + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.vertexFramebufferOutputShaderID); + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.fragmentFramebufferOutputShaderID); + glDeleteProgram(OGLRef.programFramebufferOutputID); + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + glDeleteShader(OGLRef.fragmentFramebufferOutputShaderID); + INFO("OpenGL: Failed to make the framebuffer output shader bindings.\n"); + return error; + } + + glLinkProgram(OGLRef.programFramebufferOutputID); + if (!this->ValidateShaderProgramLink(OGLRef.programFramebufferOutputID)) + { + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.vertexFramebufferOutputShaderID); + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.fragmentFramebufferOutputShaderID); + glDeleteProgram(OGLRef.programFramebufferOutputID); + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + glDeleteShader(OGLRef.fragmentFramebufferOutputShaderID); + INFO("OpenGL: Failed to link the framebuffer output shader program.\n"); + return OGLERROR_SHADER_CREATE_ERROR; + } + + glValidateProgram(OGLRef.programFramebufferOutputID); + this->InitFramebufferOutputShaderLocations(); + + // ------------------------------------------ + glUseProgram(OGLRef.programGeometryID); INFO("OpenGL: Successfully created postprocess shaders.\n"); return OGLERROR_NOERR; } -Render3DError OpenGLRenderer_2_0::InitFogProgramBindings() -{ - OGLRenderRef &OGLRef = *this->ref; - glBindAttribLocation(OGLRef.programFogID, OGLVertexAttributeID_Position, "inPosition"); - glBindAttribLocation(OGLRef.programFogID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); - - return OGLERROR_NOERR; -} - -Render3DError OpenGLRenderer_2_0::InitFogProgramShaderLocations() -{ - OGLRenderRef &OGLRef = *this->ref; - - const GLint uniformTexGColor = glGetUniformLocation(OGLRef.programFogID, "texInFragColor"); - const GLint uniformTexGDepth = glGetUniformLocation(OGLRef.programFogID, "texInFragDepth"); - const GLint uniformTexGFog = glGetUniformLocation(OGLRef.programFogID, "texInFogAttributes"); - glUniform1i(uniformTexGColor, OGLTextureUnitID_GColor); - glUniform1i(uniformTexGDepth, OGLTextureUnitID_GDepth); - glUniform1i(uniformTexGFog, OGLTextureUnitID_FogAttr); - - OGLRef.uniformStateEnableFogAlphaOnly = glGetUniformLocation(OGLRef.programFogID, "stateEnableFogAlphaOnly"); - OGLRef.uniformStateFogColor = glGetUniformLocation(OGLRef.programFogID, "stateFogColor"); - OGLRef.uniformStateFogDensity = glGetUniformLocation(OGLRef.programFogID, "stateFogDensity"); - OGLRef.uniformStateFogOffset = glGetUniformLocation(OGLRef.programFogID, "stateFogOffset"); - OGLRef.uniformStateFogStep = glGetUniformLocation(OGLRef.programFogID, "stateFogStep"); - - return OGLERROR_NOERR; -} - Render3DError OpenGLRenderer_2_0::DestroyPostprocessingPrograms() { OGLRenderRef &OGLRef = *this->ref; @@ -3801,6 +3952,90 @@ Render3DError OpenGLRenderer_2_0::DestroyPostprocessingPrograms() glDeleteShader(OGLRef.vertexFogShaderID); glDeleteShader(OGLRef.fragmentFogShaderID); + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.vertexFramebufferOutputShaderID); + glDetachShader(OGLRef.programFramebufferOutputID, OGLRef.fragmentFramebufferOutputShaderID); + glDeleteProgram(OGLRef.programFramebufferOutputID); + glDeleteShader(OGLRef.vertexFramebufferOutputShaderID); + glDeleteShader(OGLRef.fragmentFramebufferOutputShaderID); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitEdgeMarkProgramBindings() +{ + OGLRenderRef &OGLRef = *this->ref; + glBindAttribLocation(OGLRef.programEdgeMarkID, OGLVertexAttributeID_Position, "inPosition"); + glBindAttribLocation(OGLRef.programEdgeMarkID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitEdgeMarkProgramShaderLocations() +{ + OGLRenderRef &OGLRef = *this->ref; + + glUseProgram(OGLRef.programEdgeMarkID); + + const GLint uniformTexGDepth = glGetUniformLocation(OGLRef.programEdgeMarkID, "texInFragDepth"); + const GLint uniformTexGPolyID = glGetUniformLocation(OGLRef.programEdgeMarkID, "texInPolyID"); + glUniform1i(uniformTexGDepth, OGLTextureUnitID_GDepth); + glUniform1i(uniformTexGPolyID, OGLTextureUnitID_GPolyID); + + OGLRef.uniformFramebufferSize = glGetUniformLocation(OGLRef.programEdgeMarkID, "framebufferSize"); + OGLRef.uniformStateEdgeColor = glGetUniformLocation(OGLRef.programEdgeMarkID, "stateEdgeColor"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitFogProgramBindings() +{ + OGLRenderRef &OGLRef = *this->ref; + glBindAttribLocation(OGLRef.programFogID, OGLVertexAttributeID_Position, "inPosition"); + glBindAttribLocation(OGLRef.programFogID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitFogProgramShaderLocations() +{ + OGLRenderRef &OGLRef = *this->ref; + + glUseProgram(OGLRef.programFogID); + + const GLint uniformTexGColor = glGetUniformLocation(OGLRef.programFogID, "texInFragColor"); + const GLint uniformTexGDepth = glGetUniformLocation(OGLRef.programFogID, "texInFragDepth"); + const GLint uniformTexGFog = glGetUniformLocation(OGLRef.programFogID, "texInFogAttributes"); + glUniform1i(uniformTexGColor, OGLTextureUnitID_GColor); + glUniform1i(uniformTexGDepth, OGLTextureUnitID_GDepth); + glUniform1i(uniformTexGFog, OGLTextureUnitID_FogAttr); + + OGLRef.uniformStateEnableFogAlphaOnly = glGetUniformLocation(OGLRef.programFogID, "stateEnableFogAlphaOnly"); + OGLRef.uniformStateFogColor = glGetUniformLocation(OGLRef.programFogID, "stateFogColor"); + OGLRef.uniformStateFogDensity = glGetUniformLocation(OGLRef.programFogID, "stateFogDensity"); + OGLRef.uniformStateFogOffset = glGetUniformLocation(OGLRef.programFogID, "stateFogOffset"); + OGLRef.uniformStateFogStep = glGetUniformLocation(OGLRef.programFogID, "stateFogStep"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitFramebufferOutputProgramBindings() +{ + OGLRenderRef &OGLRef = *this->ref; + glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_Position, "inPosition"); + glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_2_0::InitFramebufferOutputShaderLocations() +{ + OGLRenderRef &OGLRef = *this->ref; + + glUseProgram(OGLRef.programFramebufferOutputID); + + const GLint uniformTexFinalColor = glGetUniformLocation(OGLRef.programFramebufferOutputID, "texInFragColor"); + glUniform1i(uniformTexFinalColor, OGLTextureUnitID_FinalColor); + return OGLERROR_NOERR; } @@ -4190,9 +4425,57 @@ Render3DError OpenGLRenderer_2_0::SetupTexture(const POLY &thePoly, bool enableT Render3DError OpenGLRenderer_2_1::ReadBackPixels() { - glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); - this->_pixelReadNeedsFinish = true; + if (this->_mappedFramebuffer != NULL) + { + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + this->_mappedFramebuffer = NULL; + } + if (this->willConvertFramebufferOnGPU) + { + // Perform the RGBA6665 color space conversion while we're still on the GPU so + // that we can avoid having to do it on the CPU. + OGLRenderRef &OGLRef = *this->ref; + + glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT); + glBlitFramebufferEXT(0, this->_framebufferHeight, this->_framebufferWidth, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + + glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT); + + glUseProgram(OGLRef.programFramebufferOutputID); + + glViewport(0, 0, this->_framebufferWidth, this->_framebufferHeight); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glDisable(GL_CULL_FACE); + + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboPostprocessIndexID); + glBindVertexArray(OGLRef.vaoPostprocessStatesID); + + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); + + glBindVertexArray(0); + + // Read back the pixels. + glReadBuffer(GL_COLOR_ATTACHMENT2_EXT); + glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + + // Set the read and draw target buffers back to color attachment 0, which is always the default. + glReadBuffer(GL_COLOR_ATTACHMENT0_EXT); + glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + } + else + { + // Otherwise, we'll just have to do the color space conversion on the CPU if there + // isn't proper support. + glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + } + + this->_pixelReadNeedsFinish = true; return OGLERROR_NOERR; } @@ -4215,12 +4498,8 @@ Render3DError OpenGLRenderer_2_1::RenderFinish() return OGLERROR_BEGINGL_FAILED; } - const FragmentColor *__restrict mappedBufferPtr = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - if (mappedBufferPtr != NULL) - { - this->FlushFramebuffer(mappedBufferPtr, framebufferRGBA6665, framebufferRGBA5551); - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - } + this->_mappedFramebuffer = (FragmentColor *__restrict)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + this->FlushFramebuffer(this->_mappedFramebuffer, framebufferRGBA6665, framebufferRGBA5551); ENDGL(); } diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index 386f7fed5..d9b2365c8 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -292,7 +292,8 @@ enum OGLVertexAttributeID enum OGLTextureUnitID { // Main textures will always be on texture unit 0. - OGLTextureUnitID_ToonTable = 1, + OGLTextureUnitID_FinalColor = 1, + OGLTextureUnitID_ToonTable, OGLTextureUnitID_GColor, OGLTextureUnitID_GDepth, OGLTextureUnitID_GPolyID, @@ -425,13 +426,14 @@ struct OGLRenderRef GLuint texGPolyID; GLuint texGDepthStencilID; GLuint texPostprocessFogID; + GLuint texFinalColorID; GLuint rboMSGColorID; GLuint rboMSGDepthID; GLuint rboMSGPolyID; GLuint rboMSGFogAttrID; GLuint rboMSGDepthStencilID; - GLuint rboMSPostprocessID; + GLuint rboFramebufferRGBA6665ID; GLuint fboClearImageID; GLuint fboRenderID; @@ -446,10 +448,13 @@ struct OGLRenderRef GLuint vertexEdgeMarkShaderID; GLuint vertexFogShaderID; + GLuint vertexFramebufferOutputShaderID; GLuint fragmentEdgeMarkShaderID; GLuint fragmentFogShaderID; + GLuint fragmentFramebufferOutputShaderID; GLuint programEdgeMarkID; GLuint programFogID; + GLuint programFramebufferOutputID; GLint uniformFramebufferSize; GLint uniformStateToonShadingMode; @@ -566,6 +571,9 @@ private: unsigned int versionMinor; unsigned int versionRevision; +private: + Render3DError _FlushFramebufferConvertOnCPU(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); + protected: // OpenGL-specific References OGLRenderRef *ref; @@ -577,15 +585,16 @@ protected: bool isMultisampledFBOSupported; bool isShaderSupported; bool isVAOSupported; + bool willConvertFramebufferOnGPU; // Textures TexCacheItem *currTexture; + FragmentColor *_mappedFramebuffer; bool _pixelReadNeedsFinish; size_t _currentPolyIndex; - Render3DError FlushFramebuffer(const FragmentColor *__restrict srcRGBA8888, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); - Render3DError FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); + Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); // OpenGL-specific methods virtual Render3DError CreateVBOs() = 0; @@ -603,12 +612,19 @@ protected: virtual Render3DError InitTextures() = 0; virtual Render3DError InitFinalRenderStates(const std::set *oglExtensionSet) = 0; virtual Render3DError InitTables() = 0; + virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, + const std::string &edgeMarkFragShader, + const std::string &fogVtxShader, + const std::string &fogFragShader, + const std::string &framebufferOutputVtxShader, + const std::string &framebufferOutputFragShader) = 0; + virtual Render3DError DestroyPostprocessingPrograms() = 0; virtual Render3DError InitEdgeMarkProgramBindings() = 0; virtual Render3DError InitEdgeMarkProgramShaderLocations() = 0; - virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, const std::string &edgeMarkFragShader, const std::string &fogVtxShader, const std::string &fogFragShader) = 0; virtual Render3DError InitFogProgramBindings() = 0; virtual Render3DError InitFogProgramShaderLocations() = 0; - virtual Render3DError DestroyPostprocessingPrograms() = 0; + virtual Render3DError InitFramebufferOutputProgramBindings() = 0; + virtual Render3DError InitFramebufferOutputShaderLocations() = 0; virtual Render3DError LoadGeometryShaders(std::string &outVertexShaderProgram, std::string &outFragmentShaderProgram) = 0; virtual Render3DError InitGeometryProgramBindings() = 0; @@ -638,6 +654,8 @@ public: bool ValidateShaderProgramLink(GLuint theProgram) const; void GetVersion(unsigned int *major, unsigned int *minor, unsigned int *revision) const; void SetVersion(unsigned int major, unsigned int minor, unsigned int revision); + + virtual FragmentColor* GetFramebuffer(); }; class OpenGLRenderer_1_2 : public OpenGLRenderer @@ -663,12 +681,19 @@ protected: virtual Render3DError InitGeometryProgramBindings(); virtual Render3DError InitGeometryProgramShaderLocations(); virtual void DestroyGeometryProgram(); + virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, + const std::string &edgeMarkFragShader, + const std::string &fogVtxShader, + const std::string &fogFragShader, + const std::string &framebufferOutputVtxShader, + const std::string &framebufferOutputFragShader); + virtual Render3DError DestroyPostprocessingPrograms(); virtual Render3DError InitEdgeMarkProgramBindings(); virtual Render3DError InitEdgeMarkProgramShaderLocations(); - virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, const std::string &edgeMarkFragShader, const std::string &fogVtxShader, const std::string &fogFragShader); virtual Render3DError InitFogProgramBindings(); virtual Render3DError InitFogProgramShaderLocations(); - virtual Render3DError DestroyPostprocessingPrograms(); + virtual Render3DError InitFramebufferOutputProgramBindings(); + virtual Render3DError InitFramebufferOutputShaderLocations(); virtual Render3DError CreateToonTable(); virtual Render3DError DestroyToonTable(); @@ -728,19 +753,14 @@ class OpenGLRenderer_1_5 : public OpenGLRenderer_1_4 protected: virtual Render3DError CreateVBOs(); virtual void DestroyVBOs(); - virtual Render3DError CreatePBOs(); - virtual void DestroyPBOs(); virtual Render3DError CreateVAOs(); virtual Render3DError EnableVertexAttributes(); virtual Render3DError DisableVertexAttributes(); virtual Render3DError BeginRender(const GFX3D &engine); - virtual Render3DError ReadBackPixels(); - + public: ~OpenGLRenderer_1_5(); - - virtual Render3DError RenderFinish(); }; class OpenGLRenderer_2_0 : public OpenGLRenderer_1_5 @@ -748,12 +768,19 @@ class OpenGLRenderer_2_0 : public OpenGLRenderer_1_5 protected: virtual Render3DError InitExtensions(); virtual Render3DError InitFinalRenderStates(const std::set *oglExtensionSet); + virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, + const std::string &edgeMarkFragShader, + const std::string &fogVtxShader, + const std::string &fogFragShader, + const std::string &framebufferOutputVtxShader, + const std::string &framebufferOutputFragShader); + virtual Render3DError DestroyPostprocessingPrograms(); virtual Render3DError InitEdgeMarkProgramBindings(); virtual Render3DError InitEdgeMarkProgramShaderLocations(); - virtual Render3DError InitPostprocessingPrograms(const std::string &edgeMarkVtxShader, const std::string &edgeMarkFragShader, const std::string &fogVtxShader, const std::string &fogFragShader); virtual Render3DError InitFogProgramBindings(); virtual Render3DError InitFogProgramShaderLocations(); - virtual Render3DError DestroyPostprocessingPrograms(); + virtual Render3DError InitFramebufferOutputProgramBindings(); + virtual Render3DError InitFramebufferOutputShaderLocations(); virtual Render3DError EnableVertexAttributes(); virtual Render3DError DisableVertexAttributes(); diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index 0c725a192..f908deb64 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -439,6 +439,44 @@ static const char *FogFragShader_150 = {"\ }\n\ "}; +// Vertex shader for the final framebuffer, GLSL 1.50 +static const char *FramebufferOutputVtxShader_150 = {"\ + #version 150\n\ + \n\ + in vec2 inPosition;\n\ + in vec2 inTexCoord0;\n\ + out vec2 texCoord;\n\ + \n\ + void main()\n\ + {\n\ + texCoord = inTexCoord0;\n\ + gl_Position = vec4(inPosition, 0.0, 1.0);\n\ + }\n\ +"}; + +// Fragment shader for the final framebuffer, GLSL 1.50 +static const char *FramebufferOutputFragShader_150 = {"\ + #version 150\n\ + \n\ + in vec2 texCoord;\n\ + \n\ + uniform sampler2D texInFragColor;\n\ + \n\ + out vec4 outFragColor;\n\ + \n\ + void main()\n\ + {\n\ + // Note that we swap B and R since pixel readbacks are done in BGRA format for fastest\n\ + // performance. The final color is still in RGBA format.\n\ + vec4 colorRGBA6665 = texture(texInFragColor, texCoord).bgra;\n\ + colorRGBA6665 = floor((colorRGBA6665 * 255.0) + 0.5);\n\ + colorRGBA6665.rgb = floor(colorRGBA6665.rgb / 4.0);\n\ + colorRGBA6665.a = floor(colorRGBA6665.a / 8.0);\n\ + \n\ + outFragColor = (colorRGBA6665 / 255.0);\n\ + }\n\ +"}; + void OGLCreateRenderer_3_2(OpenGLRenderer **rendererPtr) { if (IsVersionSupported(3, 2, 0)) @@ -471,6 +509,9 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() // Load and create shaders. Return on any error, since v3.2 Core Profile makes shaders mandatory. this->isShaderSupported = true; + // OpenGL v3.2 Core Profile should have all the necessary features to be able to convert the framebuffer. + this->willConvertFramebufferOnGPU = true; + std::string vertexShaderProgram; std::string fragmentShaderProgram; error = this->LoadGeometryShaders(vertexShaderProgram, fragmentShaderProgram); @@ -491,7 +532,14 @@ Render3DError OpenGLRenderer_3_2::InitExtensions() std::string edgeMarkFragShaderString = std::string(EdgeMarkFragShader_150); std::string fogVtxShaderString = std::string(FogVtxShader_150); std::string fogFragShaderString = std::string(FogFragShader_150); - error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, edgeMarkFragShaderString, fogVtxShaderString, fogFragShaderString); + std::string framebufferOutputVtxShaderString = std::string(FramebufferOutputVtxShader_150); + std::string framebufferOutputFragShaderString = std::string(FramebufferOutputFragShader_150); + error = this->InitPostprocessingPrograms(edgeMarkVtxShaderString, + edgeMarkFragShaderString, + fogVtxShaderString, + fogFragShaderString, + framebufferOutputVtxShaderString, + framebufferOutputFragShaderString); if (error != OGLERROR_NOERR) { this->DestroyGeometryProgram(); @@ -551,6 +599,8 @@ Render3DError OpenGLRenderer_3_2::InitEdgeMarkProgramShaderLocations() { OGLRenderRef &OGLRef = *this->ref; + glUseProgram(OGLRef.programEdgeMarkID); + const GLuint uniformBlockRenderStates = glGetUniformBlockIndex(OGLRef.programEdgeMarkID, "RenderStates"); glUniformBlockBinding(OGLRef.programEdgeMarkID, uniformBlockRenderStates, OGLBindingPointID_RenderStates); @@ -576,6 +626,8 @@ Render3DError OpenGLRenderer_3_2::InitFogProgramShaderLocations() { OGLRenderRef &OGLRef = *this->ref; + glUseProgram(OGLRef.programFogID); + const GLuint uniformBlockRenderStates = glGetUniformBlockIndex(OGLRef.programFogID, "RenderStates"); glUniformBlockBinding(OGLRef.programFogID, uniformBlockRenderStates, OGLBindingPointID_RenderStates); @@ -589,6 +641,28 @@ Render3DError OpenGLRenderer_3_2::InitFogProgramShaderLocations() return OGLERROR_NOERR; } +Render3DError OpenGLRenderer_3_2::InitFramebufferOutputProgramBindings() +{ + OGLRenderRef &OGLRef = *this->ref; + glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_Position, "inPosition"); + glBindAttribLocation(OGLRef.programFramebufferOutputID, OGLVertexAttributeID_TexCoord0, "inTexCoord0"); + glBindFragDataLocation(OGLRef.programFramebufferOutputID, 0, "outFragColor"); + + return OGLERROR_NOERR; +} + +Render3DError OpenGLRenderer_3_2::InitFramebufferOutputShaderLocations() +{ + OGLRenderRef &OGLRef = *this->ref; + + glUseProgram(OGLRef.programFramebufferOutputID); + + const GLint uniformTexFinalColor = glGetUniformLocation(OGLRef.programFramebufferOutputID, "texInFragColor"); + glUniform1i(uniformTexFinalColor, OGLTextureUnitID_FinalColor); + + return OGLERROR_NOERR; +} + Render3DError OpenGLRenderer_3_2::CreateFBOs() { OGLRenderRef &OGLRef = *this->ref; @@ -600,6 +674,7 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glGenTextures(1, &OGLRef.texCIPolyID); glGenTextures(1, &OGLRef.texCIDepthStencilID); + glGenTextures(1, &OGLRef.texFinalColorID); glGenTextures(1, &OGLRef.texGColorID); glGenTextures(1, &OGLRef.texGDepthID); glGenTextures(1, &OGLRef.texGFogAttrID); @@ -607,6 +682,14 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glGenTextures(1, &OGLRef.texGDepthStencilID); glGenTextures(1, &OGLRef.texPostprocessFogID); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); + glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, this->_framebufferWidth, this->_framebufferHeight, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texGDepthStencilID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -694,6 +777,11 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glBindTexture(GL_TEXTURE_2D, 0); + // Set up RBOs + glGenRenderbuffers(1, &OGLRef.rboFramebufferRGBA6665ID); + glBindRenderbuffer(GL_RENDERBUFFER, OGLRef.rboFramebufferRGBA6665ID); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, this->_framebufferWidth, this->_framebufferHeight); + // Set up FBOs glGenFramebuffers(1, &OGLRef.fboClearImageID); glGenFramebuffers(1, &OGLRef.fboRenderID); @@ -725,6 +813,8 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffers(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -762,6 +852,8 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffers(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -775,6 +867,8 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboPostprocessID); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGLRef.texPostprocessFogID, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, OGLRef.texFinalColorID, 0); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_RENDERBUFFER, OGLRef.rboFramebufferRGBA6665ID); if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { @@ -795,6 +889,8 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffers(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -837,6 +933,8 @@ void OpenGLRenderer_3_2::DestroyFBOs() glDeleteTextures(1, &OGLRef.texGFogAttrID); glDeleteTextures(1, &OGLRef.texGDepthStencilID); glDeleteTextures(1, &OGLRef.texPostprocessFogID); + glDeleteTextures(1, &OGLRef.texFinalColorID); + glDeleteRenderbuffers(1, &OGLRef.rboFramebufferRGBA6665ID); OGLRef.fboClearImageID = 0; OGLRef.fboRenderID = 0; @@ -1018,6 +1116,8 @@ Render3DError OpenGLRenderer_3_2::InitGeometryProgramShaderLocations() { OGLRenderRef &OGLRef = *this->ref; + glUseProgram(OGLRef.programGeometryID); + // Set up render states UBO const GLuint uniformBlockRenderStates = glGetUniformBlockIndex(OGLRef.programGeometryID, "RenderStates"); glUniformBlockBinding(OGLRef.programGeometryID, uniformBlockRenderStates, OGLBindingPointID_RenderStates); @@ -1144,6 +1244,53 @@ Render3DError OpenGLRenderer_3_2::DownsampleFBO() return OGLERROR_NOERR; } +Render3DError OpenGLRenderer_3_2::ReadBackPixels() +{ + OGLRenderRef &OGLRef = *this->ref; + + if (this->_mappedFramebuffer != NULL) + { + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + this->_mappedFramebuffer = NULL; + } + + // Perform the RGBA6665 color space conversion while we're still on the GPU so + // that we can avoid having to do it on the CPU. + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT1); + glBlitFramebuffer(0, this->_framebufferHeight, this->_framebufferWidth, 0, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST); + + glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboPostprocessID); + glDrawBuffer(GL_COLOR_ATTACHMENT2); + + glUseProgram(OGLRef.programFramebufferOutputID); + + glViewport(0, 0, this->_framebufferWidth, this->_framebufferHeight); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glDisable(GL_CULL_FACE); + + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboPostprocessVtxID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboPostprocessIndexID); + glBindVertexArray(OGLRef.vaoPostprocessStatesID); + + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_BYTE, 0); + + glBindVertexArray(0); + + // Read back the pixels. + glReadBuffer(GL_COLOR_ATTACHMENT2); + glReadPixels(0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0); + + // Set the read and draw target buffers back to color attachment 0, which is always the default. + glReadBuffer(GL_COLOR_ATTACHMENT0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + + this->_pixelReadNeedsFinish = true; + return OGLERROR_NOERR; +} + Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D &engine) { OGLRenderRef &OGLRef = *this->ref; @@ -1565,6 +1712,16 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h) return OGLERROR_BEGINGL_FAILED; } + if (this->_mappedFramebuffer != NULL) + { + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + this->_mappedFramebuffer = NULL; + } + + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); + glBindTexture(GL_TEXTURE_2D, OGLRef.texFinalColorID); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor); glBindTexture(GL_TEXTURE_2D, OGLRef.texGDepthStencilID); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, w, h, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); @@ -1584,6 +1741,9 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h) glBindTexture(GL_TEXTURE_2D, OGLRef.texPostprocessFogID); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glBindRenderbuffer(GL_RENDERBUFFER, OGLRef.rboFramebufferRGBA6665ID); + glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, w, h); + if (this->isMultisampledFBOSupported) { GLint maxSamples = 0; diff --git a/desmume/src/OGLRender_3_2.h b/desmume/src/OGLRender_3_2.h index e8eff75a8..8d69b4e21 100644 --- a/desmume/src/OGLRender_3_2.h +++ b/desmume/src/OGLRender_3_2.h @@ -1,7 +1,7 @@ /* Copyright (C) 2006 yopyop Copyright (C) 2006-2007 shash - Copyright (C) 2008-2015 DeSmuME team + Copyright (C) 2008-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -65,6 +65,8 @@ protected: virtual Render3DError InitEdgeMarkProgramShaderLocations(); virtual Render3DError InitFogProgramBindings(); virtual Render3DError InitFogProgramShaderLocations(); + virtual Render3DError InitFramebufferOutputProgramBindings(); + virtual Render3DError InitFramebufferOutputShaderLocations(); virtual Render3DError CreateFBOs(); virtual void DestroyFBOs(); virtual Render3DError CreateMultisampledFBO(); @@ -81,6 +83,7 @@ protected: virtual Render3DError EnableVertexAttributes(); virtual Render3DError DisableVertexAttributes(); virtual Render3DError DownsampleFBO(); + virtual Render3DError ReadBackPixels(); virtual Render3DError BeginRender(const GFX3D &engine); virtual Render3DError RenderEdgeMarking(const u16 *colorTable, const bool useAntialias); virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly); diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 53b6bbab7..eb296f244 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2009-2015 DeSmuME team + Copyright (C) 2009-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1961,9 +1961,8 @@ Render3DError SoftRasterizerRenderer::EndRender(const u64 frameCount) this->RenderEdgeMarkingAndFog(this->postprocessParam[0]); } - FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL; u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL; - this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551); + this->FlushFramebuffer(this->_framebufferColor, NULL, framebufferRGBA5551); } return RENDER3DERROR_NOERR; @@ -2007,9 +2006,8 @@ Render3DError SoftRasterizerRenderer::RenderFinish() } } - FragmentColor *framebufferRGBA6665 = (this->_willFlushFramebufferRGBA6665) ? GPU->GetEngineMain()->Get3DFramebufferRGBA6665() : NULL; u16 *framebufferRGBA5551 = (this->_willFlushFramebufferRGBA5551) ? GPU->GetEngineMain()->Get3DFramebufferRGBA5551() : NULL; - this->FlushFramebuffer(framebufferRGBA6665, framebufferRGBA5551); + this->FlushFramebuffer(this->_framebufferColor, NULL, framebufferRGBA5551); GPU->GetEventHandler()->DidRender3DEnd(); return RENDER3DERROR_NOERR; @@ -2017,19 +2015,14 @@ Render3DError SoftRasterizerRenderer::RenderFinish() Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) { - if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT) + Render3DError error = Render3D::SetFramebufferSize(w, h); + if (error != RENDER3DERROR_NOERR) { return RENDER3DERROR_NOERR; } - FragmentAttributesBuffer *oldFramebufferAttributes = this->_framebufferAttributes; - FragmentAttributesBuffer *newFramebufferAttributes = new FragmentAttributesBuffer(w * h); - - this->_framebufferWidth = w; - this->_framebufferHeight = h; - this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); - this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); - this->_framebufferAttributes = newFramebufferAttributes; + delete this->_framebufferAttributes; + this->_framebufferAttributes = new FragmentAttributesBuffer(w * h); if (rasterizerCores == 0 || rasterizerCores == 1) { @@ -2046,9 +2039,7 @@ Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h) postprocessParam[i].endLine = (i < rasterizerCores - 1) ? (i + 1) * linesPerThread : h; } } - - delete oldFramebufferAttributes; - + return RENDER3DERROR_NOERR; } diff --git a/desmume/src/render3D.cpp b/desmume/src/render3D.cpp index f0659d13b..f6c613f60 100644 --- a/desmume/src/render3D.cpp +++ b/desmume/src/render3D.cpp @@ -1,6 +1,6 @@ /* Copyright (C) 2006-2007 shash - Copyright (C) 2008-2015 DeSmuME team + Copyright (C) 2008-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -274,7 +274,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h) this->_framebufferWidth = w; this->_framebufferHeight = h; this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor); - this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); + this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferRGBA6665(); // Just use the buffer that is already present on the main GPU engine return RENDER3DERROR_NOERR; } @@ -316,13 +316,18 @@ Render3DError Render3D::EndRender(const u64 frameCount) return RENDER3DERROR_NOERR; } -Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) +Render3DError Render3D::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) { + if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) ) + { + return RENDER3DERROR_NOERR; + } + if (dstRGBA5551 != NULL) { for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++) { - dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000); + dstRGBA5551[i] = R6G6B6TORGB15(srcFramebuffer[i].r, srcFramebuffer[i].g, srcFramebuffer[i].b) | ((srcFramebuffer[i].a == 0) ? 0x0000 : 0x8000); } } @@ -454,7 +459,6 @@ Render3DError Render3D::Reset() if (this->_framebufferColor != NULL) { memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes); - this->FlushFramebuffer(GPU->GetEngineMain()->Get3DFramebufferRGBA6665(), GPU->GetEngineMain()->Get3DFramebufferRGBA5551()); } memset(this->clearImageColor16Buffer, 0, sizeof(this->clearImageColor16Buffer)); @@ -515,15 +519,13 @@ Render3DError Render3D::VramReconfigureSignal() #ifdef ENABLE_SSE2 -Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) +Render3DError Render3D_SSE2::FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551) { if ( (dstRGBA6665 == NULL) && (dstRGBA5551 == NULL) ) { return RENDER3DERROR_NOERR; } - const __m128i zero_vec128 = _mm_setzero_si128(); - size_t i = 0; const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight; const size_t ssePixCount = pixCount - (pixCount % 4); @@ -533,18 +535,18 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6 for (; i < ssePixCount; i += 4) { // Convert to RGBA5551 - __m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i)); - __m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R - r = _mm_srli_epi32(r, 1); // Shift to R + __m128i color5551 = _mm_load_si128((__m128i *)(srcFramebuffer + i)); + __m128i r = _mm_and_si128(color5551, _mm_set1_epi32(0x0000003E)); // Read from R + r = _mm_srli_epi32(r, 1); // Shift to R - __m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G - g = _mm_srli_epi32(g, 4); // Shift in G + __m128i g = _mm_and_si128(color5551, _mm_set1_epi32(0x00003E00)); // Read from G + g = _mm_srli_epi32(g, 4); // Shift in G - __m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B - b = _mm_srli_epi32(b, 7); // Shift to B + __m128i b = _mm_and_si128(color5551, _mm_set1_epi32(0x003E0000)); // Read from B + b = _mm_srli_epi32(b, 7); // Shift to B - __m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A - a = _mm_cmpeq_epi32(a, zero_vec128); // Determine A + __m128i a = _mm_and_si128(color5551, _mm_set1_epi32(0xFF000000)); // Read from A + a = _mm_cmpeq_epi32(a, _mm_setzero_si128()); // Determine A // From here on, we're going to do an SSE2 trick to pack 32-bit down to unsigned // 16-bit. Since SSE2 only has packssdw (signed saturated 16-bit pack), using @@ -558,21 +560,21 @@ Render3DError Render3D_SSE2::FlushFramebuffer(FragmentColor *__restrict dstRGBA6 // packssdw, then shift the bit back to its original position. Then we por the // alpha vector with the post-packed color vector to get the final color. - a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A - a = _mm_packs_epi32(a, zero_vec128); // Pack 32-bit down to 16-bit - a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be + a = _mm_andnot_si128(a, _mm_set1_epi32(0x00004000)); // Mask out the bit before A + a = _mm_packs_epi32(a, _mm_setzero_si128()); // Pack 32-bit down to 16-bit + a = _mm_slli_epi16(a, 1); // Shift the A bit back to where it needs to be // Assemble the RGB colors, pack the 32-bit color into a signed 16-bit color, then por the alpha bit back in. - color = _mm_or_si128(_mm_or_si128(r, g), b); - color = _mm_packs_epi32(color, zero_vec128); - color = _mm_or_si128(color, a); + color5551 = _mm_or_si128(_mm_or_si128(r, g), b); + color5551 = _mm_packs_epi32(color5551, _mm_setzero_si128()); + color5551 = _mm_or_si128(color5551, a); - _mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color); + _mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color5551); } for (; i < pixCount; i++) { - dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000); + dstRGBA5551[i] = R6G6B6TORGB15(srcFramebuffer[i].r, srcFramebuffer[i].g, srcFramebuffer[i].b) | ((srcFramebuffer[i].a == 0) ? 0x0000 : 0x8000); } } diff --git a/desmume/src/render3D.h b/desmume/src/render3D.h index bc9ecf55a..3081ac052 100644 --- a/desmume/src/render3D.h +++ b/desmume/src/render3D.h @@ -1,6 +1,6 @@ /* Copyright (C) 2006-2007 shash - Copyright (C) 2007-2015 DeSmuME team + Copyright (C) 2007-2016 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -122,7 +122,7 @@ protected: virtual Render3DError RenderEdgeMarking(const u16 *colorTable, const bool useAntialias); virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly); virtual Render3DError EndRender(const u64 frameCount); - virtual Render3DError FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); + virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer); virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const; @@ -140,7 +140,6 @@ public: RendererID GetRenderID(); std::string GetName(); - FragmentColor* GetFramebuffer(); size_t GetFramebufferWidth(); size_t GetFramebufferHeight(); @@ -159,6 +158,7 @@ public: virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes. + virtual FragmentColor* GetFramebuffer(); virtual void GetFramebufferFlushStates(bool &willFlushRGBA6665, bool &willFlushRGBA5551); virtual void SetFramebufferFlushStates(bool willFlushRGBA6665, bool willFlushRGBA5551); }; @@ -168,7 +168,7 @@ public: class Render3D_SSE2 : public Render3D { protected: - virtual Render3DError FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); + virtual Render3DError FlushFramebuffer(const FragmentColor *__restrict srcFramebuffer, FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551); public: virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);