VideoCommon: Update EFB peek cache on draw done and tokens

Massively improves performance in Mario Galaxy on Android.
This commit is contained in:
Robin Kertels 2022-09-26 23:41:56 +02:00
parent c196c47e81
commit 779fe13e62
No known key found for this signature in database
GPG Key ID: 3824904F14D40757
5 changed files with 125 additions and 10 deletions

View File

@ -181,6 +181,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_draw_done); INCSTAT(g_stats.this_frame.num_draw_done);
g_texture_cache->FlushEFBCopies(); g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread()) if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetFinish(cycles_into_future); // may generate interrupt PixelEngine::SetFinish(cycles_into_future); // may generate interrupt
DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF); DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF);
@ -195,6 +196,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token); INCSTAT(g_stats.this_frame.num_token);
g_texture_cache->FlushEFBCopies(); g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread()) if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false, cycles_into_future); PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF); DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF);
@ -203,6 +205,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token_int); INCSTAT(g_stats.this_frame.num_token_int);
g_texture_cache->FlushEFBCopies(); g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false); g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread()) if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true, cycles_into_future); PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF); DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF);

View File

@ -26,6 +26,7 @@
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
@ -415,6 +416,7 @@ void RunGpuLoop()
// The fifo is empty and it's unlikely we will get any more work in the near future. // The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
g_vertex_manager->Flush(); g_vertex_manager->Flush();
g_framebuffer_manager->RefreshPeekCache();
} }
}, },
100); 100);

View File

@ -389,7 +389,7 @@ bool FramebufferManager::IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* ti
{ {
*tile_index = *tile_index =
((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size); ((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size);
return data.valid && data.tiles[*tile_index]; return data.valid && data.tiles[*tile_index].present;
} }
} }
@ -417,6 +417,15 @@ u32 FramebufferManager::PeekEFBColor(u32 x, u32 y)
if (!IsEFBCacheTilePresent(false, x, y, &tile_index)) if (!IsEFBCacheTilePresent(false, x, y, &tile_index))
PopulateEFBCache(false, tile_index); PopulateEFBCache(false, tile_index);
if (IsUsingTiledEFBCache())
m_efb_color_cache.tiles[tile_index].frame_access_mask |= 1;
if (m_efb_color_cache.needs_flush)
{
m_efb_color_cache.readback_texture->Flush();
m_efb_color_cache.needs_flush = false;
}
u32 value; u32 value;
m_efb_color_cache.readback_texture->ReadTexel(x, y, &value); m_efb_color_cache.readback_texture->ReadTexel(x, y, &value);
return value; return value;
@ -432,6 +441,15 @@ float FramebufferManager::PeekEFBDepth(u32 x, u32 y)
if (!IsEFBCacheTilePresent(true, x, y, &tile_index)) if (!IsEFBCacheTilePresent(true, x, y, &tile_index))
PopulateEFBCache(true, tile_index); PopulateEFBCache(true, tile_index);
if (IsUsingTiledEFBCache())
m_efb_depth_cache.tiles[tile_index].frame_access_mask |= 1;
if (m_efb_depth_cache.needs_flush)
{
m_efb_depth_cache.readback_texture->Flush();
m_efb_depth_cache.needs_flush = false;
}
float value; float value;
m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value); m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value);
return value; return value;
@ -449,23 +467,82 @@ void FramebufferManager::SetEFBCacheTileSize(u32 size)
PanicAlertFmt("Failed to create EFB readback framebuffers"); PanicAlertFmt("Failed to create EFB readback framebuffers");
} }
void FramebufferManager::RefreshPeekCache()
{
if (m_efb_color_cache.valid && m_efb_depth_cache.valid)
{
return;
}
bool flush_command_buffer = false;
if (IsUsingTiledEFBCache())
{
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
if (m_efb_color_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_color_cache.valid || !m_efb_color_cache.tiles[i].present))
{
PopulateEFBCache(false, i, true);
flush_command_buffer = true;
}
if (m_efb_depth_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_depth_cache.valid || !m_efb_depth_cache.tiles[i].present))
{
PopulateEFBCache(true, i, true);
flush_command_buffer = true;
}
}
}
else
{
if (!m_efb_color_cache.valid)
{
PopulateEFBCache(false, 0, true);
flush_command_buffer = true;
}
if (!m_efb_depth_cache.valid)
{
PopulateEFBCache(true, 0, true);
flush_command_buffer = true;
}
}
if (flush_command_buffer)
{
g_renderer->Flush();
}
}
void FramebufferManager::InvalidatePeekCache(bool forced) void FramebufferManager::InvalidatePeekCache(bool forced)
{ {
if (forced || m_efb_color_cache.out_of_date) if (forced || m_efb_color_cache.out_of_date)
{ {
if (m_efb_color_cache.valid) if (m_efb_color_cache.valid)
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); {
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].present = false;
}
}
m_efb_color_cache.valid = false; m_efb_color_cache.valid = false;
m_efb_color_cache.out_of_date = false; m_efb_color_cache.out_of_date = false;
m_efb_color_cache.needs_flush = true;
} }
if (forced || m_efb_depth_cache.out_of_date) if (forced || m_efb_depth_cache.out_of_date)
{ {
if (m_efb_depth_cache.valid) if (m_efb_depth_cache.valid)
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); {
for (u32 i = 0; i < m_efb_depth_cache.tiles.size(); i++)
{
m_efb_depth_cache.tiles[i].present = false;
}
}
m_efb_depth_cache.valid = false; m_efb_depth_cache.valid = false;
m_efb_depth_cache.out_of_date = false; m_efb_depth_cache.out_of_date = false;
m_efb_depth_cache.needs_flush = true;
} }
} }
@ -480,6 +557,18 @@ void FramebufferManager::FlagPeekCacheAsOutOfDate()
InvalidatePeekCache(); InvalidatePeekCache();
} }
void FramebufferManager::EndOfFrame()
{
if (!IsUsingTiledEFBCache())
return;
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].frame_access_mask <<= 1;
m_efb_depth_cache.tiles[i].frame_access_mask <<= 1;
}
}
bool FramebufferManager::CompileReadbackPipelines() bool FramebufferManager::CompileReadbackPipelines()
{ {
AbstractPipelineConfig config = {}; AbstractPipelineConfig config = {};
@ -612,9 +701,11 @@ bool FramebufferManager::CreateReadbackFramebuffer()
const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size); const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size);
const u32 total_tiles = tiles_wide * tiles_high; const u32 total_tiles = tiles_wide * tiles_high;
m_efb_color_cache.tiles.resize(total_tiles); m_efb_color_cache.tiles.resize(total_tiles);
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_depth_cache.tiles.resize(total_tiles); m_efb_depth_cache.tiles.resize(total_tiles);
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_cache_tiles_wide = tiles_wide; m_efb_cache_tiles_wide = tiles_wide;
} }
@ -633,7 +724,7 @@ void FramebufferManager::DestroyReadbackFramebuffer()
DestroyCache(m_efb_depth_cache); DestroyCache(m_efb_depth_cache);
} }
void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index) void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index, bool async)
{ {
FlushEFBPokes(); FlushEFBPokes();
g_vertex_manager->OnCPUEFBAccess(); g_vertex_manager->OnCPUEFBAccess();
@ -693,11 +784,19 @@ void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index)
} }
// Wait until the copy is complete. // Wait until the copy is complete.
if (!async)
{
data.readback_texture->Flush(); data.readback_texture->Flush();
data.needs_flush = false;
}
else
{
data.needs_flush = true;
}
data.valid = true; data.valid = true;
data.out_of_date = false; data.out_of_date = false;
if (IsUsingTiledEFBCache()) if (IsUsingTiledEFBCache())
data.tiles[tile_index] = true; data.tiles[tile_index].present = true;
} }
void FramebufferManager::ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color, void FramebufferManager::ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color,

View File

@ -99,7 +99,9 @@ public:
float PeekEFBDepth(u32 x, u32 y); float PeekEFBDepth(u32 x, u32 y);
void SetEFBCacheTileSize(u32 size); void SetEFBCacheTileSize(u32 size);
void InvalidatePeekCache(bool forced = true); void InvalidatePeekCache(bool forced = true);
void RefreshPeekCache();
void FlagPeekCacheAsOutOfDate(); void FlagPeekCacheAsOutOfDate();
void EndOfFrame();
// Writes a value to the framebuffer. This will never block, and writes will be batched. // Writes a value to the framebuffer. This will never block, and writes will be batched.
void PokeEFBColor(u32 x, u32 y, u32 color); void PokeEFBColor(u32 x, u32 y, u32 color);
@ -117,6 +119,12 @@ protected:
}; };
static_assert(std::is_standard_layout<EFBPokeVertex>::value, "EFBPokeVertex is standard-layout"); static_assert(std::is_standard_layout<EFBPokeVertex>::value, "EFBPokeVertex is standard-layout");
struct EFBCacheTile
{
bool present;
u8 frame_access_mask;
};
// EFB cache - for CPU EFB access // EFB cache - for CPU EFB access
// Tiles are ordered left-to-right, then top-to-bottom // Tiles are ordered left-to-right, then top-to-bottom
struct EFBCacheData struct EFBCacheData
@ -125,9 +133,10 @@ protected:
std::unique_ptr<AbstractFramebuffer> framebuffer; std::unique_ptr<AbstractFramebuffer> framebuffer;
std::unique_ptr<AbstractStagingTexture> readback_texture; std::unique_ptr<AbstractStagingTexture> readback_texture;
std::unique_ptr<AbstractPipeline> copy_pipeline; std::unique_ptr<AbstractPipeline> copy_pipeline;
std::vector<bool> tiles; std::vector<EFBCacheTile> tiles;
bool out_of_date; bool out_of_date;
bool valid; bool valid;
bool needs_flush;
}; };
bool CreateEFBFramebuffer(); bool CreateEFBFramebuffer();
@ -151,7 +160,7 @@ protected:
bool IsUsingTiledEFBCache() const; bool IsUsingTiledEFBCache() const;
bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const; bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const;
MathUtil::Rectangle<int> GetEFBCacheTileRect(u32 tile_index) const; MathUtil::Rectangle<int> GetEFBCacheTileRect(u32 tile_index) const;
void PopulateEFBCache(bool depth, u32 tile_index); void PopulateEFBCache(bool depth, u32 tile_index, bool async = false);
void CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x, u32 y, float z, void CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x, u32 y, float z,
u32 color); u32 color);

View File

@ -1346,6 +1346,8 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
m_graphics_mod_manager.EndOfFrame(); m_graphics_mod_manager.EndOfFrame();
} }
g_framebuffer_manager->EndOfFrame();
if (xfb_addr && fb_width && fb_stride && fb_height) if (xfb_addr && fb_width && fb_stride && fb_height)
{ {
// Get the current XFB from texture cache // Get the current XFB from texture cache