Added an EFB peek cache to the GL video plugin

Most of the games using EFB peeks are suffering from major performance problems
when these peeks are not disabled in the graphics settings. This is an attempt
to fix this in the GL renderer by doing the glReadPixels in bulk: instead of
doing a lot of 1x1 pixel reads, read for 64x64 pixels at once and keep that in
a cache.

Deck menu in Baten Kaitos: 3FPS -> 54FPS
Character creation in Monster Hunter Tri: 7FPS -> 60FPS
This commit is contained in:
Pierre Bourdon 2011-08-22 06:15:02 +02:00
parent 3fc5d8d7cf
commit 1e558aedeb
3 changed files with 107 additions and 28 deletions

View File

@ -123,6 +123,12 @@ static u32 s_blendMode;
static std::thread scrshotThread; static std::thread scrshotThread;
#endif #endif
// EFB cache related
const u32 EFB_CACHE_RECT_SIZE = 64; // Cache 64x64 blocks.
const u32 EFB_CACHE_WIDTH = EFB_WIDTH / EFB_CACHE_RECT_SIZE;
const u32 EFB_CACHE_HEIGHT = EFB_HEIGHT / EFB_CACHE_RECT_SIZE;
static bool s_efbCacheValid[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT];
static std::vector<u32> s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PEEK_Z and PEEK_COLOR
static const GLenum glSrcFactors[8] = static const GLenum glSrcFactors[8] =
{ {
@ -637,6 +643,42 @@ void Renderer::SetColorMask()
glColorMask(ColorMask, ColorMask, ColorMask, AlphaMask); glColorMask(ColorMask, ColorMask, ColorMask, AlphaMask);
} }
void ClearEFBCache()
{
for (u32 i = 0; i < EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT; ++i)
s_efbCacheValid[0][i] = false;
for (u32 i = 0; i < EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT; ++i)
s_efbCacheValid[1][i] = false;
}
void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const u32* data)
{
u32 cacheType = (type == PEEK_Z ? 0 : 1);
if (!s_efbCache[cacheType][cacheRectIdx].size())
s_efbCache[cacheType][cacheRectIdx].resize(EFB_CACHE_RECT_SIZE * EFB_CACHE_RECT_SIZE);
u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left;
for (u32 yCache = 0; yCache < EFB_CACHE_RECT_SIZE; ++yCache)
{
u32 yEFB = efbPixelRc.top + yCache;
u32 yPixel = (EFBToScaledY(EFB_HEIGHT - yEFB) + EFBToScaledY(EFB_HEIGHT - yEFB - 1)) / 2;
u32 yData = yPixel - targetPixelRc.bottom;
for (u32 xCache = 0; xCache < EFB_CACHE_RECT_SIZE; ++xCache)
{
u32 xEFB = efbPixelRc.left + xCache;
u32 xPixel = (EFBToScaledX(xEFB) + EFBToScaledX(xEFB + 1)) / 2;
u32 xData = xPixel - targetPixelRc.left;
s_efbCache[cacheType][cacheRectIdx][yCache * EFB_CACHE_RECT_SIZE + xCache] = data[yData * targetPixelRcWidth + xData];
}
}
s_efbCacheValid[cacheType][cacheRectIdx] = true;
}
// This function allows the CPU to directly access the EFB. // This function allows the CPU to directly access the EFB.
// There are EFB peeks (which will read the color or depth of a pixel) // There are EFB peeks (which will read the color or depth of a pixel)
// and EFB pokes (which will change the color or depth of a pixel). // and EFB pokes (which will change the color or depth of a pixel).
@ -656,34 +698,49 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
if (!g_ActiveConfig.bEFBAccessEnable) if (!g_ActiveConfig.bEFBAccessEnable)
return 0; return 0;
// Get the rectangular target region covered by the EFB pixel u32 cacheRectIdx = ((x / EFB_CACHE_RECT_SIZE) << 16) | (y / EFB_CACHE_RECT_SIZE);
// Get the rectangular target region containing the EFB pixel
EFBRectangle efbPixelRc; EFBRectangle efbPixelRc;
efbPixelRc.left = x; efbPixelRc.left = (x / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE;
efbPixelRc.top = y; efbPixelRc.top = (y / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE;
efbPixelRc.right = x + 1; efbPixelRc.right = efbPixelRc.left + EFB_CACHE_RECT_SIZE;
efbPixelRc.bottom = y + 1; efbPixelRc.bottom = efbPixelRc.top + EFB_CACHE_RECT_SIZE;
TargetRectangle targetPixelRc = ConvertEFBRectangle(efbPixelRc); TargetRectangle targetPixelRc = ConvertEFBRectangle(efbPixelRc);
u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left;
u32 targetPixelRcHeight = targetPixelRc.top - targetPixelRc.bottom;
// TODO (FIX) : currently, AA path is broken/offset and doesn't return the correct pixel // TODO (FIX) : currently, AA path is broken/offset and doesn't return the correct pixel
switch (type) switch (type)
{ {
case PEEK_Z: case PEEK_Z:
{ {
if (s_MSAASamples > 1) u32 z;
if (!s_efbCacheValid[0][cacheRectIdx])
{ {
// Resolve our rectangle. if (s_MSAASamples > 1)
FramebufferManager::GetEFBDepthTexture(efbPixelRc); {
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FramebufferManager::GetResolvedFramebuffer()); // Resolve our rectangle.
FramebufferManager::GetEFBDepthTexture(efbPixelRc);
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FramebufferManager::GetResolvedFramebuffer());
}
u32* depthMap = new u32[targetPixelRcWidth * targetPixelRcHeight];
glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, targetPixelRcHeight,
GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, depthMap);
GL_REPORT_ERRORD();
UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, depthMap);
delete[] depthMap;
} }
// Sample from the center of the target region. u32 xRect = x % EFB_CACHE_RECT_SIZE;
int srcX = (targetPixelRc.left + targetPixelRc.right) / 2; u32 yRect = y % EFB_CACHE_RECT_SIZE;
int srcY = (targetPixelRc.top + targetPixelRc.bottom) / 2; z = s_efbCache[0][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect];
u32 z = 0;
glReadPixels(srcX, srcY, 1, 1, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, &z);
GL_REPORT_ERRORD();
// Scale the 32-bit value returned by glReadPixels to a 24-bit // Scale the 32-bit value returned by glReadPixels to a 24-bit
// value (GC uses a 24-bit Z-buffer). // value (GC uses a 24-bit Z-buffer).
@ -708,21 +765,31 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
// determine if we're aiming at an enemy (0x80 / 0x88) or not (0x70) // determine if we're aiming at an enemy (0x80 / 0x88) or not (0x70)
// Wind Waker is also using it for the pictograph to determine the color of each pixel // Wind Waker is also using it for the pictograph to determine the color of each pixel
if (s_MSAASamples > 1) u32 color;
if (!s_efbCacheValid[1][cacheRectIdx])
{ {
// Resolve our rectangle. if (s_MSAASamples > 1)
FramebufferManager::GetEFBColorTexture(efbPixelRc); {
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FramebufferManager::GetResolvedFramebuffer()); // Resolve our rectangle.
FramebufferManager::GetEFBColorTexture(efbPixelRc);
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, FramebufferManager::GetResolvedFramebuffer());
}
u32* colorMap = new u32[targetPixelRcWidth * targetPixelRcHeight];
glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, targetPixelRcHeight,
GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, colorMap);
GL_REPORT_ERRORD();
UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, colorMap);
delete[] colorMap;
} }
// Sample from the center of the target region. u32 xRect = x % EFB_CACHE_RECT_SIZE;
int srcX = (targetPixelRc.left + targetPixelRc.right) / 2; u32 yRect = y % EFB_CACHE_RECT_SIZE;
int srcY = (targetPixelRc.top + targetPixelRc.bottom) / 2; color = s_efbCache[1][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect];
// Read back pixel in BGRA format, then byteswap to get GameCube's ARGB Format.
u32 color = 0;
glReadPixels(srcX, srcY, 1, 1, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, &color);
GL_REPORT_ERRORD();
// check what to do with the alpha channel (GX_PokeAlphaRead) // check what to do with the alpha channel (GX_PokeAlphaRead)
PixelEngine::UPEAlphaReadReg alpha_read_mode; PixelEngine::UPEAlphaReadReg alpha_read_mode;
@ -839,6 +906,8 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE
glEnd(); glEnd();
RestoreAPIState(); RestoreAPIState();
ClearEFBCache();
} }
void Renderer::ReinterpretPixelData(unsigned int convtype) void Renderer::ReinterpretPixelData(unsigned int convtype)
@ -1347,6 +1416,9 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
// GetTargetWidth(), GetTargetHeight()); // GetTargetWidth(), GetTargetHeight());
Core::Callback_VideoCopiedToXFB(XFBWrited || (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB)); Core::Callback_VideoCopiedToXFB(XFBWrited || (g_ActiveConfig.bUseXFB && g_ActiveConfig.bUseRealXFB));
XFBWrited = false; XFBWrited = false;
// Invalidate EFB cache
ClearEFBCache();
} }
// ALWAYS call RestoreAPIState for each ResetAPIState call you're doing // ALWAYS call RestoreAPIState for each ResetAPIState call you're doing

View File

@ -7,6 +7,8 @@
namespace OGL namespace OGL
{ {
void ClearEFBCache();
class Renderer : public ::Renderer class Renderer : public ::Renderer
{ {
public: public:
@ -57,6 +59,9 @@ public:
void SetVSConstant4fv(unsigned int const_number, const float *f); void SetVSConstant4fv(unsigned int const_number, const float *f);
void SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float *f); void SetMultiVSConstant3fv(unsigned int const_number, unsigned int count, const float *f);
void SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f); void SetMultiVSConstant4fv(unsigned int const_number, unsigned int count, const float *f);
private:
void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, const TargetRectangle& targetPixelRc, const u32* data);
}; };
} }

View File

@ -268,6 +268,8 @@ void VertexManager::vFlush()
#endif #endif
g_Config.iSaveTargetId++; g_Config.iSaveTargetId++;
ClearEFBCache();
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
} }