OpenGL ES Renderer: Fix a major performance bug on many ARM-based mobile devices with integrated GPUs.
This commit is contained in:
parent
ea648f7110
commit
04f97d5755
|
@ -2525,15 +2525,18 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
|
||||||
glReadBuffer(OGL_WORKING_ATTACHMENT_ID);
|
glReadBuffer(OGL_WORKING_ATTACHMENT_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
|
if (this->isPBOSupported)
|
||||||
// format without a performance penalty.
|
|
||||||
if (this->_mappedFramebuffer != NULL)
|
|
||||||
{
|
{
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
|
||||||
this->_mappedFramebuffer = NULL;
|
// format without a performance penalty.
|
||||||
|
if (this->_mappedFramebuffer != NULL)
|
||||||
|
{
|
||||||
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
|
this->_mappedFramebuffer = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = true;
|
this->_pixelReadNeedsFinish = true;
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
|
@ -3172,19 +3175,23 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
|
||||||
glFinish();
|
glFinish();
|
||||||
|
|
||||||
if (this->_mappedFramebuffer != NULL)
|
|
||||||
{
|
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
|
||||||
glFinish();
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
|
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
|
||||||
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
|
||||||
|
|
||||||
if (this->_mappedFramebuffer != NULL)
|
if (this->isPBOSupported)
|
||||||
{
|
{
|
||||||
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
|
if (this->_mappedFramebuffer != NULL)
|
||||||
glFinish();
|
{
|
||||||
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
|
glFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
||||||
|
|
||||||
|
if (this->_mappedFramebuffer != NULL)
|
||||||
|
{
|
||||||
|
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
|
||||||
|
glFinish();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
|
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
|
||||||
|
@ -3208,7 +3215,18 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
||||||
this->_framebufferHeight = h;
|
this->_framebufferHeight = h;
|
||||||
this->_framebufferPixCount = w * h;
|
this->_framebufferPixCount = w * h;
|
||||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||||
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.
|
|
||||||
|
if (this->isPBOSupported)
|
||||||
|
{
|
||||||
|
this->_framebufferColor = NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Color4u8 *oldFramebufferColor = this->_framebufferColor;
|
||||||
|
Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedPage(newFramebufferColorSizeBytes);
|
||||||
|
this->_framebufferColor = newFramebufferColor;
|
||||||
|
free_aligned(oldFramebufferColor);
|
||||||
|
}
|
||||||
|
|
||||||
// Recreate shaders that use the framebuffer size.
|
// Recreate shaders that use the framebuffer size.
|
||||||
glUseProgram(0);
|
glUseProgram(0);
|
||||||
|
@ -3261,6 +3279,8 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
||||||
|
|
||||||
Render3DError OpenGLRenderer_3_2::RenderFinish()
|
Render3DError OpenGLRenderer_3_2::RenderFinish()
|
||||||
{
|
{
|
||||||
|
OGLRenderRef &OGLRef = *this->ref;
|
||||||
|
|
||||||
if (!this->_renderNeedsFinish)
|
if (!this->_renderNeedsFinish)
|
||||||
{
|
{
|
||||||
return OGLERROR_NOERR;
|
return OGLERROR_NOERR;
|
||||||
|
@ -3270,11 +3290,20 @@ Render3DError OpenGLRenderer_3_2::RenderFinish()
|
||||||
{
|
{
|
||||||
this->_pixelReadNeedsFinish = false;
|
this->_pixelReadNeedsFinish = false;
|
||||||
|
|
||||||
if(!BEGINGL())
|
if (!BEGINGL())
|
||||||
{
|
{
|
||||||
return OGLERROR_BEGINGL_FAILED;
|
return OGLERROR_BEGINGL_FAILED;
|
||||||
}
|
}
|
||||||
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
|
|
||||||
|
if (this->isPBOSupported)
|
||||||
|
{
|
||||||
|
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, this->_framebufferColor);
|
||||||
|
}
|
||||||
|
|
||||||
ENDGL();
|
ENDGL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3298,7 +3327,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
|
||||||
memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes);
|
memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes);
|
||||||
memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16));
|
memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16));
|
||||||
|
|
||||||
if(!BEGINGL())
|
if (!BEGINGL())
|
||||||
{
|
{
|
||||||
return OGLERROR_BEGINGL_FAILED;
|
return OGLERROR_BEGINGL_FAILED;
|
||||||
}
|
}
|
||||||
|
@ -3308,14 +3337,17 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
|
||||||
glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID);
|
glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID);
|
||||||
glClearBufferfv(GL_COLOR, 0, oglColor);
|
glClearBufferfv(GL_COLOR, 0, oglColor);
|
||||||
|
|
||||||
if (this->_mappedFramebuffer != NULL)
|
if (this->isPBOSupported)
|
||||||
{
|
{
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
if (this->_mappedFramebuffer != NULL)
|
||||||
this->_mappedFramebuffer = NULL;
|
{
|
||||||
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
|
this->_mappedFramebuffer = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
|
||||||
|
|
||||||
ENDGL();
|
ENDGL();
|
||||||
|
|
||||||
this->_pixelReadNeedsFinish = true;
|
this->_pixelReadNeedsFinish = true;
|
||||||
|
|
|
@ -408,8 +408,20 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
|
||||||
this->isVBOSupported = true;
|
this->isVBOSupported = true;
|
||||||
this->CreateVBOs();
|
this->CreateVBOs();
|
||||||
|
|
||||||
this->isPBOSupported = true;
|
// PBOs are only used when reading back the rendered framebuffer for the emulated
|
||||||
this->CreatePBOs();
|
// BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
|
||||||
|
// is always advantageous since such devices are expected to have their GPUs
|
||||||
|
// connected to a data bus.
|
||||||
|
//
|
||||||
|
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
|
||||||
|
// of memory latency and implementation quality. This means that the performance
|
||||||
|
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
|
||||||
|
//
|
||||||
|
// In fact, many ARM-based devices suffer devastating performance drops when trying
|
||||||
|
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
|
||||||
|
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
|
||||||
|
// a traditional synchronous glReadPixels() call instead.
|
||||||
|
this->isPBOSupported = false;
|
||||||
|
|
||||||
this->isVAOSupported = true;
|
this->isVAOSupported = true;
|
||||||
this->CreateVAOs();
|
this->CreateVAOs();
|
||||||
|
|
Loading…
Reference in New Issue