OpenGL: Always use a PBO in EncodeToRamUsingShader
This improves performance significantly on macOS, particularly noticeably in the Super Mario Sunshine transition, which goes from ~5FPS to ~17FPS.
This commit is contained in:
parent
d7dc854b50
commit
801d1d1876
|
@ -242,34 +242,31 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
|
|||
|
||||
int dstSize = dst_line_size * dstHeight;
|
||||
|
||||
if ((writeStride != dst_line_size) && (dstHeight > 1))
|
||||
{
|
||||
// writing to a texture of a different size
|
||||
// also copy more then one block line, so the different strides matters
|
||||
// copy into one pbo first, map this buffer, and then memcpy into GC memory
|
||||
// in this way, we only have one vram->ram transfer, but maybe a bigger
|
||||
// CPU overhead because of the pbo
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, nullptr, GL_STREAM_READ);
|
||||
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
|
||||
nullptr);
|
||||
u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT);
|
||||
// When the dst_line_size and writeStride are the same, we could use glReadPixels directly to RAM.
|
||||
// But instead we always copy the data via a PBO, because macOS inexplicably prefers this (most
|
||||
// noticeably in the Super Mario Sunshine transition).
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, s_PBO);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, dstSize, nullptr, GL_STREAM_READ);
|
||||
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
|
||||
nullptr);
|
||||
u8* pbo = (u8*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, dstSize, GL_MAP_READ_BIT);
|
||||
|
||||
if (dst_line_size == writeStride)
|
||||
{
|
||||
memcpy(destAddr, pbo, dst_line_size * dstHeight);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < dstHeight; ++i)
|
||||
{
|
||||
memcpy(destAddr, pbo, dst_line_size);
|
||||
pbo += dst_line_size;
|
||||
destAddr += writeStride;
|
||||
}
|
||||
}
|
||||
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
glReadPixels(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight, GL_BGRA, GL_UNSIGNED_BYTE,
|
||||
destAddr);
|
||||
}
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
void EncodeToRamFromTexture(u8* dest_ptr, u32 format, u32 native_width, u32 bytes_per_row,
|
||||
|
|
Loading…
Reference in New Issue