VideoCommon: Update EFB peek cache on draw done and tokens

Massively improves performance in Mario Galaxy on Android.
This commit is contained in:
Robin Kertels 2022-09-26 23:41:56 +02:00
parent c196c47e81
commit 779fe13e62
No known key found for this signature in database
GPG Key ID: 3824904F14D40757
5 changed files with 125 additions and 10 deletions

View File

@ -181,6 +181,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_draw_done);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetFinish(cycles_into_future); // may generate interrupt
DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF);
@ -195,6 +196,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF);
@ -203,6 +205,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
INCSTAT(g_stats.this_frame.num_token_int);
g_texture_cache->FlushEFBCopies();
g_framebuffer_manager->InvalidatePeekCache(false);
g_framebuffer_manager->RefreshPeekCache();
if (!Fifo::UseDeterministicGPUThread())
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true, cycles_into_future);
DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF);

View File

@ -26,6 +26,7 @@
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
@ -415,6 +416,7 @@ void RunGpuLoop()
// The fifo is empty and it's unlikely we will get any more work in the near future.
// Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
g_vertex_manager->Flush();
g_framebuffer_manager->RefreshPeekCache();
}
},
100);

View File

@ -389,7 +389,7 @@ bool FramebufferManager::IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* ti
{
*tile_index =
((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size);
return data.valid && data.tiles[*tile_index];
return data.valid && data.tiles[*tile_index].present;
}
}
@ -417,6 +417,15 @@ u32 FramebufferManager::PeekEFBColor(u32 x, u32 y)
if (!IsEFBCacheTilePresent(false, x, y, &tile_index))
PopulateEFBCache(false, tile_index);
if (IsUsingTiledEFBCache())
m_efb_color_cache.tiles[tile_index].frame_access_mask |= 1;
if (m_efb_color_cache.needs_flush)
{
m_efb_color_cache.readback_texture->Flush();
m_efb_color_cache.needs_flush = false;
}
u32 value;
m_efb_color_cache.readback_texture->ReadTexel(x, y, &value);
return value;
@ -432,6 +441,15 @@ float FramebufferManager::PeekEFBDepth(u32 x, u32 y)
if (!IsEFBCacheTilePresent(true, x, y, &tile_index))
PopulateEFBCache(true, tile_index);
if (IsUsingTiledEFBCache())
m_efb_depth_cache.tiles[tile_index].frame_access_mask |= 1;
if (m_efb_depth_cache.needs_flush)
{
m_efb_depth_cache.readback_texture->Flush();
m_efb_depth_cache.needs_flush = false;
}
float value;
m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value);
return value;
@ -449,23 +467,82 @@ void FramebufferManager::SetEFBCacheTileSize(u32 size)
PanicAlertFmt("Failed to create EFB readback framebuffers");
}
void FramebufferManager::RefreshPeekCache()
{
if (m_efb_color_cache.valid && m_efb_depth_cache.valid)
{
return;
}
bool flush_command_buffer = false;
if (IsUsingTiledEFBCache())
{
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
if (m_efb_color_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_color_cache.valid || !m_efb_color_cache.tiles[i].present))
{
PopulateEFBCache(false, i, true);
flush_command_buffer = true;
}
if (m_efb_depth_cache.tiles[i].frame_access_mask != 0 &&
(!m_efb_depth_cache.valid || !m_efb_depth_cache.tiles[i].present))
{
PopulateEFBCache(true, i, true);
flush_command_buffer = true;
}
}
}
else
{
if (!m_efb_color_cache.valid)
{
PopulateEFBCache(false, 0, true);
flush_command_buffer = true;
}
if (!m_efb_depth_cache.valid)
{
PopulateEFBCache(true, 0, true);
flush_command_buffer = true;
}
}
if (flush_command_buffer)
{
g_renderer->Flush();
}
}
void FramebufferManager::InvalidatePeekCache(bool forced)
{
if (forced || m_efb_color_cache.out_of_date)
{
if (m_efb_color_cache.valid)
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false);
{
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].present = false;
}
}
m_efb_color_cache.valid = false;
m_efb_color_cache.out_of_date = false;
m_efb_color_cache.needs_flush = true;
}
if (forced || m_efb_depth_cache.out_of_date)
{
if (m_efb_depth_cache.valid)
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false);
{
for (u32 i = 0; i < m_efb_depth_cache.tiles.size(); i++)
{
m_efb_depth_cache.tiles[i].present = false;
}
}
m_efb_depth_cache.valid = false;
m_efb_depth_cache.out_of_date = false;
m_efb_depth_cache.needs_flush = true;
}
}
@ -480,6 +557,18 @@ void FramebufferManager::FlagPeekCacheAsOutOfDate()
InvalidatePeekCache();
}
void FramebufferManager::EndOfFrame()
{
if (!IsUsingTiledEFBCache())
return;
for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++)
{
m_efb_color_cache.tiles[i].frame_access_mask <<= 1;
m_efb_depth_cache.tiles[i].frame_access_mask <<= 1;
}
}
bool FramebufferManager::CompileReadbackPipelines()
{
AbstractPipelineConfig config = {};
@ -612,9 +701,11 @@ bool FramebufferManager::CreateReadbackFramebuffer()
const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size);
const u32 total_tiles = tiles_wide * tiles_high;
m_efb_color_cache.tiles.resize(total_tiles);
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false);
std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_depth_cache.tiles.resize(total_tiles);
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false);
std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(),
EFBCacheTile{false, 0});
m_efb_cache_tiles_wide = tiles_wide;
}
@ -633,7 +724,7 @@ void FramebufferManager::DestroyReadbackFramebuffer()
DestroyCache(m_efb_depth_cache);
}
void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index)
void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index, bool async)
{
FlushEFBPokes();
g_vertex_manager->OnCPUEFBAccess();
@ -693,11 +784,19 @@ void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index)
}
// Wait until the copy is complete.
if (!async)
{
data.readback_texture->Flush();
data.needs_flush = false;
}
else
{
data.needs_flush = true;
}
data.valid = true;
data.out_of_date = false;
if (IsUsingTiledEFBCache())
data.tiles[tile_index] = true;
data.tiles[tile_index].present = true;
}
void FramebufferManager::ClearEFB(const MathUtil::Rectangle<int>& rc, bool clear_color,

View File

@ -99,7 +99,9 @@ public:
float PeekEFBDepth(u32 x, u32 y);
void SetEFBCacheTileSize(u32 size);
void InvalidatePeekCache(bool forced = true);
void RefreshPeekCache();
void FlagPeekCacheAsOutOfDate();
void EndOfFrame();
// Writes a value to the framebuffer. This will never block, and writes will be batched.
void PokeEFBColor(u32 x, u32 y, u32 color);
@ -117,6 +119,12 @@ protected:
};
static_assert(std::is_standard_layout<EFBPokeVertex>::value, "EFBPokeVertex is standard-layout");
struct EFBCacheTile
{
bool present;
u8 frame_access_mask;
};
// EFB cache - for CPU EFB access
// Tiles are ordered left-to-right, then top-to-bottom
struct EFBCacheData
@ -125,9 +133,10 @@ protected:
std::unique_ptr<AbstractFramebuffer> framebuffer;
std::unique_ptr<AbstractStagingTexture> readback_texture;
std::unique_ptr<AbstractPipeline> copy_pipeline;
std::vector<bool> tiles;
std::vector<EFBCacheTile> tiles;
bool out_of_date;
bool valid;
bool needs_flush;
};
bool CreateEFBFramebuffer();
@ -151,7 +160,7 @@ protected:
bool IsUsingTiledEFBCache() const;
bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const;
MathUtil::Rectangle<int> GetEFBCacheTileRect(u32 tile_index) const;
void PopulateEFBCache(bool depth, u32 tile_index);
void PopulateEFBCache(bool depth, u32 tile_index, bool async = false);
void CreatePokeVertices(std::vector<EFBPokeVertex>* destination_list, u32 x, u32 y, float z,
u32 color);

View File

@ -1346,6 +1346,8 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
m_graphics_mod_manager.EndOfFrame();
}
g_framebuffer_manager->EndOfFrame();
if (xfb_addr && fb_width && fb_stride && fb_height)
{
// Get the current XFB from texture cache