From 779fe13e62e6c0e9d591f2cb33d00a89191dd6a6 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Mon, 26 Sep 2022 23:41:56 +0200 Subject: [PATCH] VideoCommon: Update EFB peek cache on draw done and tokens Massively improves performance in Mario Galaxy on Android. --- Source/Core/VideoCommon/BPStructs.cpp | 3 + Source/Core/VideoCommon/Fifo.cpp | 2 + .../Core/VideoCommon/FramebufferManager.cpp | 115 ++++++++++++++++-- Source/Core/VideoCommon/FramebufferManager.h | 13 +- Source/Core/VideoCommon/RenderBase.cpp | 2 + 5 files changed, 125 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 35f2f23ffe..bf0438a74b 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -181,6 +181,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) INCSTAT(g_stats.this_frame.num_draw_done); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); + g_framebuffer_manager->RefreshPeekCache(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetFinish(cycles_into_future); // may generate interrupt DEBUG_LOG_FMT(VIDEO, "GXSetDrawDone SetPEFinish (value: {:#04X})", bp.newvalue & 0xFFFF); @@ -195,6 +196,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) INCSTAT(g_stats.this_frame.num_token); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); + g_framebuffer_manager->RefreshPeekCache(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), false, cycles_into_future); DEBUG_LOG_FMT(VIDEO, "SetPEToken {:#06X}", bp.newvalue & 0xFFFF); @@ -203,6 +205,7 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future) INCSTAT(g_stats.this_frame.num_token_int); g_texture_cache->FlushEFBCopies(); g_framebuffer_manager->InvalidatePeekCache(false); + g_framebuffer_manager->RefreshPeekCache(); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), true, cycles_into_future); DEBUG_LOG_FMT(VIDEO, "SetPEToken + INT {:#06X}", bp.newvalue & 0xFFFF); diff --git a/Source/Core/VideoCommon/Fifo.cpp b/Source/Core/VideoCommon/Fifo.cpp index 5b31589e82..faea454147 100644 --- a/Source/Core/VideoCommon/Fifo.cpp +++ b/Source/Core/VideoCommon/Fifo.cpp @@ -26,6 +26,7 @@ #include "VideoCommon/CPMemory.h" #include "VideoCommon/CommandProcessor.h" #include "VideoCommon/DataReader.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" @@ -415,6 +416,7 @@ void RunGpuLoop() // The fifo is empty and it's unlikely we will get any more work in the near future. // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer. g_vertex_manager->Flush(); + g_framebuffer_manager->RefreshPeekCache(); } }, 100); diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 76c265c76a..6dbad557cf 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -389,7 +389,7 @@ bool FramebufferManager::IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* ti { *tile_index = ((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size); - return data.valid && data.tiles[*tile_index]; + return data.valid && data.tiles[*tile_index].present; } } @@ -417,6 +417,15 @@ u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) if (!IsEFBCacheTilePresent(false, x, y, &tile_index)) PopulateEFBCache(false, tile_index); + if (IsUsingTiledEFBCache()) + m_efb_color_cache.tiles[tile_index].frame_access_mask |= 1; + + if (m_efb_color_cache.needs_flush) + { + m_efb_color_cache.readback_texture->Flush(); + m_efb_color_cache.needs_flush = false; + } + u32 value; m_efb_color_cache.readback_texture->ReadTexel(x, y, &value); return value; @@ -432,6 +441,15 @@ float FramebufferManager::PeekEFBDepth(u32 x, u32 y) if (!IsEFBCacheTilePresent(true, x, y, &tile_index)) PopulateEFBCache(true, tile_index); + if (IsUsingTiledEFBCache()) + m_efb_depth_cache.tiles[tile_index].frame_access_mask |= 1; + + if (m_efb_depth_cache.needs_flush) + { + m_efb_depth_cache.readback_texture->Flush(); + m_efb_depth_cache.needs_flush = false; + } + float value; m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value); return value; @@ -449,23 +467,82 @@ void FramebufferManager::SetEFBCacheTileSize(u32 size) PanicAlertFmt("Failed to create EFB readback framebuffers"); } +void FramebufferManager::RefreshPeekCache() +{ + if (m_efb_color_cache.valid && m_efb_depth_cache.valid) + { + return; + } + + bool flush_command_buffer = false; + + if (IsUsingTiledEFBCache()) + { + for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++) + { + if (m_efb_color_cache.tiles[i].frame_access_mask != 0 && + (!m_efb_color_cache.valid || !m_efb_color_cache.tiles[i].present)) + { + PopulateEFBCache(false, i, true); + flush_command_buffer = true; + } + if (m_efb_depth_cache.tiles[i].frame_access_mask != 0 && + (!m_efb_depth_cache.valid || !m_efb_depth_cache.tiles[i].present)) + { + PopulateEFBCache(true, i, true); + flush_command_buffer = true; + } + } + } + else + { + if (!m_efb_color_cache.valid) + { + PopulateEFBCache(false, 0, true); + flush_command_buffer = true; + } + if (!m_efb_depth_cache.valid) + { + PopulateEFBCache(true, 0, true); + flush_command_buffer = true; + } + } + + if (flush_command_buffer) + { + g_renderer->Flush(); + } +} + void FramebufferManager::InvalidatePeekCache(bool forced) { if (forced || m_efb_color_cache.out_of_date) { if (m_efb_color_cache.valid) - std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); + { + for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++) + { + m_efb_color_cache.tiles[i].present = false; + } + } m_efb_color_cache.valid = false; m_efb_color_cache.out_of_date = false; + m_efb_color_cache.needs_flush = true; } if (forced || m_efb_depth_cache.out_of_date) { if (m_efb_depth_cache.valid) - std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); + { + for (u32 i = 0; i < m_efb_depth_cache.tiles.size(); i++) + { + m_efb_depth_cache.tiles[i].present = false; + } + } m_efb_depth_cache.valid = false; m_efb_depth_cache.out_of_date = false; + m_efb_depth_cache.needs_flush = true; } } @@ -480,6 +557,18 @@ void FramebufferManager::FlagPeekCacheAsOutOfDate() InvalidatePeekCache(); } +void FramebufferManager::EndOfFrame() +{ + if (!IsUsingTiledEFBCache()) + return; + + for (u32 i = 0; i < m_efb_color_cache.tiles.size(); i++) + { + m_efb_color_cache.tiles[i].frame_access_mask <<= 1; + m_efb_depth_cache.tiles[i].frame_access_mask <<= 1; + } +} + bool FramebufferManager::CompileReadbackPipelines() { AbstractPipelineConfig config = {}; @@ -612,9 +701,11 @@ bool FramebufferManager::CreateReadbackFramebuffer() const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size); const u32 total_tiles = tiles_wide * tiles_high; m_efb_color_cache.tiles.resize(total_tiles); - std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); + std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), + EFBCacheTile{false, 0}); m_efb_depth_cache.tiles.resize(total_tiles); - std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); + std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), + EFBCacheTile{false, 0}); m_efb_cache_tiles_wide = tiles_wide; } @@ -633,7 +724,7 @@ void FramebufferManager::DestroyReadbackFramebuffer() DestroyCache(m_efb_depth_cache); } -void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index) +void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index, bool async) { FlushEFBPokes(); g_vertex_manager->OnCPUEFBAccess(); @@ -693,11 +784,19 @@ void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index) } // Wait until the copy is complete. - data.readback_texture->Flush(); + if (!async) + { + data.readback_texture->Flush(); + data.needs_flush = false; + } + else + { + data.needs_flush = true; + } data.valid = true; data.out_of_date = false; if (IsUsingTiledEFBCache()) - data.tiles[tile_index] = true; + data.tiles[tile_index].present = true; } void FramebufferManager::ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h index 2d27fef09b..1e918ff937 100644 --- a/Source/Core/VideoCommon/FramebufferManager.h +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -99,7 +99,9 @@ public: float PeekEFBDepth(u32 x, u32 y); void SetEFBCacheTileSize(u32 size); void InvalidatePeekCache(bool forced = true); + void RefreshPeekCache(); void FlagPeekCacheAsOutOfDate(); + void EndOfFrame(); // Writes a value to the framebuffer. This will never block, and writes will be batched. void PokeEFBColor(u32 x, u32 y, u32 color); @@ -117,6 +119,12 @@ protected: }; static_assert(std::is_standard_layout::value, "EFBPokeVertex is standard-layout"); + struct EFBCacheTile + { + bool present; + u8 frame_access_mask; + }; + // EFB cache - for CPU EFB access // Tiles are ordered left-to-right, then top-to-bottom struct EFBCacheData @@ -125,9 +133,10 @@ protected: std::unique_ptr framebuffer; std::unique_ptr readback_texture; std::unique_ptr copy_pipeline; - std::vector tiles; + std::vector tiles; bool out_of_date; bool valid; + bool needs_flush; }; bool CreateEFBFramebuffer(); @@ -151,7 +160,7 @@ protected: bool IsUsingTiledEFBCache() const; bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const; MathUtil::Rectangle GetEFBCacheTileRect(u32 tile_index) const; - void PopulateEFBCache(bool depth, u32 tile_index); + void PopulateEFBCache(bool depth, u32 tile_index, bool async = false); void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, u32 color); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index e51307fa15..6f727c50ed 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -1346,6 +1346,8 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6 m_graphics_mod_manager.EndOfFrame(); } + g_framebuffer_manager->EndOfFrame(); + if (xfb_addr && fb_width && fb_stride && fb_height) { // Get the current XFB from texture cache