OpenGL ES Renderer: Fix a major performance bug on many ARM-based mobile devices with integrated GPUs.

This commit is contained in:
rogerman 2024-08-10 15:06:30 -07:00
parent ea648f7110
commit 04f97d5755
2 changed files with 72 additions and 28 deletions

View File

@ -2525,6 +2525,8 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
glReadBuffer(OGL_WORKING_ATTACHMENT_ID); glReadBuffer(OGL_WORKING_ATTACHMENT_ID);
} }
if (this->isPBOSupported)
{
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this // Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
// format without a performance penalty. // format without a performance penalty.
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
@ -2534,6 +2536,7 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
} }
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
}
this->_pixelReadNeedsFinish = true; this->_pixelReadNeedsFinish = true;
return OGLERROR_NOERR; return OGLERROR_NOERR;
@ -3172,13 +3175,16 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
glFinish(); glFinish();
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
if (this->isPBOSupported)
{
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
{ {
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glFinish(); glFinish();
} }
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ); glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
@ -3186,6 +3192,7 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
glFinish(); glFinish();
} }
}
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
@ -3208,7 +3215,18 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
this->_framebufferHeight = h; this->_framebufferHeight = h;
this->_framebufferPixCount = w * h; this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes; this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.
if (this->isPBOSupported)
{
this->_framebufferColor = NULL;
}
else
{
Color4u8 *oldFramebufferColor = this->_framebufferColor;
Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedPage(newFramebufferColorSizeBytes);
this->_framebufferColor = newFramebufferColor;
free_aligned(oldFramebufferColor);
}
// Recreate shaders that use the framebuffer size. // Recreate shaders that use the framebuffer size.
glUseProgram(0); glUseProgram(0);
@ -3261,6 +3279,8 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
Render3DError OpenGLRenderer_3_2::RenderFinish() Render3DError OpenGLRenderer_3_2::RenderFinish()
{ {
OGLRenderRef &OGLRef = *this->ref;
if (!this->_renderNeedsFinish) if (!this->_renderNeedsFinish)
{ {
return OGLERROR_NOERR; return OGLERROR_NOERR;
@ -3270,11 +3290,20 @@ Render3DError OpenGLRenderer_3_2::RenderFinish()
{ {
this->_pixelReadNeedsFinish = false; this->_pixelReadNeedsFinish = false;
if(!BEGINGL()) if (!BEGINGL())
{ {
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
if (this->isPBOSupported)
{
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT); this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
}
else
{
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, this->_framebufferColor);
}
ENDGL(); ENDGL();
} }
@ -3298,7 +3327,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes); memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes);
memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16)); memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16));
if(!BEGINGL()) if (!BEGINGL())
{ {
return OGLERROR_BEGINGL_FAILED; return OGLERROR_BEGINGL_FAILED;
} }
@ -3308,6 +3337,8 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID); glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID);
glClearBufferfv(GL_COLOR, 0, oglColor); glClearBufferfv(GL_COLOR, 0, oglColor);
if (this->isPBOSupported)
{
if (this->_mappedFramebuffer != NULL) if (this->_mappedFramebuffer != NULL)
{ {
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
@ -3315,6 +3346,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
} }
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0); glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
}
ENDGL(); ENDGL();

View File

@ -408,8 +408,20 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
this->isVBOSupported = true; this->isVBOSupported = true;
this->CreateVBOs(); this->CreateVBOs();
this->isPBOSupported = true; // PBOs are only used when reading back the rendered framebuffer for the emulated
this->CreatePBOs(); // BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
// is always advantageous since such devices are expected to have their GPUs
// connected to a data bus.
//
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
// of memory latency and implementation quality. This means that the performance
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
//
// In fact, many ARM-based devices suffer devastating performance drops when trying
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
// a traditional synchronous glReadPixels() call instead.
this->isPBOSupported = false;
this->isVAOSupported = true; this->isVAOSupported = true;
this->CreateVAOs(); this->CreateVAOs();