OpenGL: Always use a PBO in EncodeToRamUsingShader

This improves performance significantly on macOS, particularly
noticeably in the Super Mario Sunshine transition, which goes
from ~5FPS to ~17FPS.
This commit is contained in:
hthh 2016-12-09 19:14:16 +11:00
parent d7dc854b50
commit 801d1d1876
1 changed files with 17 additions and 20 deletions

View File

@ -242,34 +242,31 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
int dstSize = dst_line_size * dstHeight;
if ((writeStride != dst_line_size) && (dstHeight > 1))
{
// writing to a texture of a different size
// also copy more then one block line, so the different strides matters
// copy into one pbo first, map this buffer, and then memcpy into GC memory
// in this way, we only have one vram->ram transfer, but maybe a bigger
// CPU overhead because of the pbo
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, nullptr, GL_STREAM_READ);
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
nullptr);
u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT);
// When the dst_line_size and writeStride are the same, we could use glReadPixels directly to RAM.
// But instead we always copy the data via a PBO, because macOS inexplicably prefers this (most
// noticeably in the Super Mario Sunshine transition).
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, nullptr, GL_STREAM_READ);
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
nullptr);
u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT);
if (dst_line_size == writeStride)
{
memcpy(destAddr, pbo, dst_line_size * dstHeight);
}
else
{
for (size_t i = 0; i < dstHeight; ++i)
{
memcpy(destAddr, pbo, dst_line_size);
pbo += dst_line_size;
destAddr += writeStride;
}
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
else
{
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
destAddr);
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
void EncodeToRamFromTexture(u8* dest_ptr, u32 format, u32 native_width, u32 bytes_per_row,