OpenGL ES Renderer: Fix a major performance bug on many ARM-based mobile devices with integrated GPUs.
This commit is contained in:
parent
ea648f7110
commit
04f97d5755
|
@ -2525,6 +2525,8 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
|
|||
glReadBuffer(OGL_WORKING_ATTACHMENT_ID);
|
||||
}
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
// Read back the pixels in RGBA format, since an OpenGL 3.2 device should be able to read back this
|
||||
// format without a performance penalty.
|
||||
if (this->_mappedFramebuffer != NULL)
|
||||
|
@ -2534,6 +2536,7 @@ Render3DError OpenGLRenderer_3_2::ReadBackPixels()
|
|||
}
|
||||
|
||||
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
||||
}
|
||||
|
||||
this->_pixelReadNeedsFinish = true;
|
||||
return OGLERROR_NOERR;
|
||||
|
@ -3172,13 +3175,16 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
glFinish();
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
if (this->_mappedFramebuffer != NULL)
|
||||
{
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glFinish();
|
||||
}
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(Color4u8);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
||||
|
||||
if (this->_mappedFramebuffer != NULL)
|
||||
|
@ -3186,6 +3192,7 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, newFramebufferColorSizeBytes, GL_MAP_READ_BIT);
|
||||
glFinish();
|
||||
}
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
|
||||
|
@ -3208,7 +3215,18 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
this->_framebufferHeight = h;
|
||||
this->_framebufferPixCount = w * h;
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = NULL; // Don't need to make a client-side buffer since we will be reading directly from the PBO.
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
this->_framebufferColor = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
Color4u8 *oldFramebufferColor = this->_framebufferColor;
|
||||
Color4u8 *newFramebufferColor = (Color4u8 *)malloc_alignedPage(newFramebufferColorSizeBytes);
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
free_aligned(oldFramebufferColor);
|
||||
}
|
||||
|
||||
// Recreate shaders that use the framebuffer size.
|
||||
glUseProgram(0);
|
||||
|
@ -3261,6 +3279,8 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
Render3DError OpenGLRenderer_3_2::RenderFinish()
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (!this->_renderNeedsFinish)
|
||||
{
|
||||
return OGLERROR_NOERR;
|
||||
|
@ -3270,11 +3290,20 @@ Render3DError OpenGLRenderer_3_2::RenderFinish()
|
|||
{
|
||||
this->_pixelReadNeedsFinish = false;
|
||||
|
||||
if(!BEGINGL())
|
||||
if (!BEGINGL())
|
||||
{
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
this->_mappedFramebuffer = (Color4u8 *__restrict)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, this->_framebufferColorSizeBytes, GL_MAP_READ_BIT);
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, this->_framebufferColor);
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
}
|
||||
|
||||
|
@ -3298,7 +3327,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
|
|||
memset(GPU->GetEngineMain()->Get3DFramebufferMain(), 0, this->_framebufferColorSizeBytes);
|
||||
memset(GPU->GetEngineMain()->Get3DFramebuffer16(), 0, this->_framebufferPixCount * sizeof(u16));
|
||||
|
||||
if(!BEGINGL())
|
||||
if (!BEGINGL())
|
||||
{
|
||||
return OGLERROR_BEGINGL_FAILED;
|
||||
}
|
||||
|
@ -3308,6 +3337,8 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
|
|||
glDrawBuffer(OGL_COLOROUT_ATTACHMENT_ID);
|
||||
glClearBufferfv(GL_COLOR, 0, oglColor);
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
if (this->_mappedFramebuffer != NULL)
|
||||
{
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
|
@ -3315,6 +3346,7 @@ Render3DError OpenGLRenderer_3_2::RenderPowerOff()
|
|||
}
|
||||
|
||||
glReadPixels(0, 0, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, OGLRef.readPixelsBestFormat, OGLRef.readPixelsBestDataType, 0);
|
||||
}
|
||||
|
||||
ENDGL();
|
||||
|
||||
|
|
|
@ -408,8 +408,20 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
|
|||
this->isVBOSupported = true;
|
||||
this->CreateVBOs();
|
||||
|
||||
this->isPBOSupported = true;
|
||||
this->CreatePBOs();
|
||||
// PBOs are only used when reading back the rendered framebuffer for the emulated
|
||||
// BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
|
||||
// is always advantageous since such devices are expected to have their GPUs
|
||||
// connected to a data bus.
|
||||
//
|
||||
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
|
||||
// of memory latency and implementation quality. This means that the performance
|
||||
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
|
||||
//
|
||||
// In fact, many ARM-based devices suffer devastating performance drops when trying
|
||||
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
|
||||
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
|
||||
// a traditional synchronous glReadPixels() call instead.
|
||||
this->isPBOSupported = false;
|
||||
|
||||
this->isVAOSupported = true;
|
||||
this->CreateVAOs();
|
||||
|
|
Loading…
Reference in New Issue