diff --git a/Data/Sys/GameSettings/GFZ.ini b/Data/Sys/GameSettings/GFZ.ini index e6f797c508..6e3a82efae 100644 --- a/Data/Sys/GameSettings/GFZ.ini +++ b/Data/Sys/GameSettings/GFZ.ini @@ -15,3 +15,7 @@ SyncGPU = True [ActionReplay] # Add action replay cheats here. +[Video_Hacks] +# In the Sand Ocean track, EFB peeks occur across the whole screen. +# This leads to slow performance with the tile cache enabled, so disable it. +EFBAccessTileSize = 0 \ No newline at end of file diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 3418e97629..3913176f2b 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -136,6 +136,10 @@ const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE{ // Graphics.Hacks const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE{{System::GFX, "Hacks", "EFBAccessEnable"}, true}; +const ConfigInfo GFX_HACK_EFB_DEFER_INVALIDATION{ + {System::GFX, "Hacks", "EFBAccessDeferInvalidation"}, false}; +const ConfigInfo GFX_HACK_EFB_ACCESS_TILE_SIZE{{System::GFX, "Hacks", "EFBAccessTileSize"}, + 64}; const ConfigInfo GFX_HACK_BBOX_ENABLE{{System::GFX, "Hacks", "BBoxEnable"}, false}; const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE{{System::GFX, "Hacks", "ForceProgressive"}, true}; const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM{{System::GFX, "Hacks", "EFBToTextureEnable"}, diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 8e29c13a77..1946e704fc 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -101,6 +101,8 @@ extern const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE; // Graphics.Hacks extern const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE; +extern const ConfigInfo GFX_HACK_EFB_DEFER_INVALIDATION; +extern const ConfigInfo GFX_HACK_EFB_ACCESS_TILE_SIZE; extern const ConfigInfo GFX_HACK_BBOX_ENABLE; extern const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE; extern const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index b1021f7827..5180169353 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -113,6 +113,8 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) // Graphics.Hacks Config::GFX_HACK_EFB_ACCESS_ENABLE.location, + Config::GFX_HACK_EFB_DEFER_INVALIDATION.location, + Config::GFX_HACK_EFB_ACCESS_TILE_SIZE.location, Config::GFX_HACK_BBOX_ENABLE.location, Config::GFX_HACK_FORCE_PROGRESSIVE.location, Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM.location, diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index 77c690b13e..04f7863de6 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -105,9 +105,20 @@ void AdvancedWidget::CreateWidgets() misc_layout->addWidget(m_borderless_fullscreen, 1, 1); #endif + // Experimental. + auto* experimental_box = new QGroupBox(tr("Experimental")); + auto* experimental_layout = new QGridLayout(); + experimental_box->setLayout(experimental_layout); + + m_defer_efb_access_invalidation = + new GraphicsBool(tr("Defer EFB Cache Invalidation"), Config::GFX_HACK_EFB_DEFER_INVALIDATION); + + experimental_layout->addWidget(m_defer_efb_access_invalidation, 0, 0); + main_layout->addWidget(debugging_box); main_layout->addWidget(utility_box); main_layout->addWidget(misc_box); + main_layout->addWidget(experimental_box); main_layout->addStretch(); setLayout(main_layout); @@ -194,6 +205,12 @@ void AdvancedWidget::AddDescriptions() "this option may result in a performance improvement on systems with more than " "two CPU cores. Currently, this is limited to the Vulkan backend.\n\nIf unsure, " "leave this checked."); + static const char TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION[] = + QT_TR_NOOP("Defers invalidation of the EFB access cache until a GPU synchronization command " + "is executed. If disabled, the cache will be invalidated with every draw call. " + "May improve performance in some games which rely on CPU EFB Access at the cost " + "of stability.\n\nIf unsure, leave this unchecked."); + #ifdef _WIN32 static const char TR_BORDERLESS_FULLSCREEN_DESCRIPTION[] = QT_TR_NOOP( "Implements fullscreen mode with a borderless window spanning the whole screen instead of " @@ -223,4 +240,5 @@ void AdvancedWidget::AddDescriptions() #ifdef _WIN32 AddDescription(m_borderless_fullscreen, TR_BORDERLESS_FULLSCREEN_DESCRIPTION); #endif + AddDescription(m_defer_efb_access_invalidation, TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION); } diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h index b0eaf6bf11..acfbc78392 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h @@ -46,4 +46,7 @@ private: QCheckBox* m_enable_prog_scan; QCheckBox* m_backend_multithreading; QCheckBox* m_borderless_fullscreen; + + // Experimental + QCheckBox* m_defer_efb_access_invalidation; }; diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 70db72206e..c9a5ab7091 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -67,6 +67,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsCopyToVram = true; g_Config.backend_info.bSupportsLargePoints = false; + g_Config.backend_info.bSupportsPartialDepthCopies = false; g_Config.backend_info.bSupportsBitfield = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index 1e5c4e1193..6959c820ed 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -50,6 +50,8 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsBackgroundCompiling = false; g_Config.backend_info.bSupportsLogicOp = false; + g_Config.backend_info.bSupportsLargePoints = false; + g_Config.backend_info.bSupportsPartialDepthCopies = false; // aamodes: We only support 1 sample, so no MSAA g_Config.backend_info.Adapters.clear(); diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index baeb9f877c..d0f3e74c5a 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -939,7 +939,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaE u32 color, u32 z) { g_framebuffer_manager->FlushEFBPokes(); - g_framebuffer_manager->InvalidatePeekCache(); + g_framebuffer_manager->FlagPeekCacheAsOutOfDate(); u32 clear_mask = 0; if (colorEnable || alphaEnable) diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 76cd7b2ca6..22dba1a21f 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -89,6 +89,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsCopyToVram = true; g_Config.backend_info.bSupportsLargePoints = true; + g_Config.backend_info.bSupportsPartialDepthCopies = true; // TODO: There is a bug here, if texel buffers are not supported the graphics options // will show the option when it is not supported. The only way around this would be diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index c47072d4af..b1c465c4a2 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -70,6 +70,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCopyToVram = false; g_Config.backend_info.bSupportsLargePoints = false; + g_Config.backend_info.bSupportsPartialDepthCopies = false; g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsBackgroundCompiling = false; g_Config.backend_info.bSupportsLogicOp = true; diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index 0254f128a8..ae3c7c5807 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -186,7 +186,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha bool z_enable, u32 color, u32 z) { g_framebuffer_manager->FlushEFBPokes(); - g_framebuffer_manager->InvalidatePeekCache(); + g_framebuffer_manager->FlagPeekCacheAsOutOfDate(); // Native -> EFB coordinates TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 72f929af1c..51c0cde329 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -252,6 +252,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsComputeShaders = true; // Assumed support. config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support. config->backend_info.bSupportsBitfield = true; // Assumed support. + config->backend_info.bSupportsPartialDepthCopies = true; // Assumed support. config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support. config->backend_info.bSupportsPostProcessing = true; // Assumed support. config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support. diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 7712421f57..b234cec785 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -22,6 +22,7 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/BoundingBox.h" #include "VideoCommon/Fifo.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/PixelEngine.h" @@ -178,6 +179,7 @@ static void BPWritten(const BPCmd& bp) { case 0x02: g_texture_cache->FlushEFBCopies(); + g_framebuffer_manager->InvalidatePeekCache(false); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetFinish(); // may generate interrupt DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF)); @@ -190,12 +192,14 @@ static void BPWritten(const BPCmd& bp) return; case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID g_texture_cache->FlushEFBCopies(); + g_framebuffer_manager->InvalidatePeekCache(false); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), false); DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF)); return; case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID g_texture_cache->FlushEFBCopies(); + g_framebuffer_manager->InvalidatePeekCache(false); if (!Fifo::UseDeterministicGPUThread()) PixelEngine::SetToken(static_cast(bp.newvalue & 0xFFFF), true); DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF)); diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp index 0af19012ea..489ce62292 100644 --- a/Source/Core/VideoCommon/FramebufferManager.cpp +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -43,6 +43,7 @@ bool FramebufferManager::Initialize() return false; } + m_efb_cache_tile_size = static_cast(std::max(g_ActiveConfig.iEFBAccessTileSize, 0)); if (!CreateReadbackFramebuffer()) { PanicAlert("Failed to create EFB readback framebuffer"); @@ -79,7 +80,7 @@ bool FramebufferManager::Initialize() void FramebufferManager::RecreateEFBFramebuffer() { FlushEFBPokes(); - InvalidatePeekCache(); + InvalidatePeekCache(true); DestroyReadbackFramebuffer(); DestroyEFBFramebuffer(); @@ -288,6 +289,7 @@ bool FramebufferManager::ReinterpretPixelData(EFBReinterpretType convtype) std::swap(m_efb_color_texture, m_efb_convert_color_texture); std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); g_renderer->EndUtilityDrawing(); + InvalidatePeekCache(true); return true; } @@ -324,92 +326,114 @@ void FramebufferManager::DestroyConversionPipelines() pipeline.reset(); } -bool FramebufferManager::PopulateColorReadbackTexture() +bool FramebufferManager::IsUsingTiledEFBCache() const { - g_vertex_manager->OnCPUEFBAccess(); + return m_efb_cache_tile_size > 0; +} - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - AbstractTexture* src_texture = - ResolveEFBColorTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); - if (g_renderer->GetEFBScale() != 1) +bool FramebufferManager::IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const +{ + const EFBCacheData& data = depth ? m_efb_depth_cache : m_efb_color_cache; + if (m_efb_cache_tile_size == 0) { - // Downsample from internal resolution to 1x. - // TODO: This won't produce correct results at IRs above 2x. - g_renderer->BeginUtilityDrawing(); - g_renderer->SetAndDiscardFramebuffer(m_color_copy_framebuffer.get()); - g_renderer->SetViewportAndScissor(m_color_copy_framebuffer->GetRect()); - g_renderer->SetPipeline(m_color_copy_pipeline.get()); - g_renderer->SetTexture(0, src_texture); - g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); - g_renderer->Draw(0, 3); - - // Copy from EFB or copy texture to staging texture. - m_color_readback_texture->CopyFromTexture(m_color_copy_texture.get(), - m_color_readback_texture->GetRect(), 0, 0, - m_color_readback_texture->GetRect()); - - g_renderer->EndUtilityDrawing(); + *tile_index = 0; + return data.valid; } else { - m_color_readback_texture->CopyFromTexture(src_texture, m_color_readback_texture->GetRect(), 0, - 0, m_color_readback_texture->GetRect()); + *tile_index = + ((y / m_efb_cache_tile_size) * m_efb_cache_tiles_wide) + (x / m_efb_cache_tile_size); + return data.valid && data.tiles[*tile_index]; } - - // Wait until the copy is complete. - m_color_readback_texture->Flush(); - m_color_readback_texture_valid = true; - return true; } -bool FramebufferManager::PopulateDepthReadbackTexture() +MathUtil::Rectangle FramebufferManager::GetEFBCacheTileRect(u32 tile_index) const { - g_vertex_manager->OnCPUEFBAccess(); + if (m_efb_cache_tile_size == 0) + return MathUtil::Rectangle(0, 0, EFB_WIDTH, EFB_HEIGHT); - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - AbstractTexture* src_texture = - ResolveEFBDepthTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); - if (g_renderer->GetEFBScale() != 1) - { - // Downsample from internal resolution to 1x. - // TODO: This won't produce correct results at IRs above 2x. - g_renderer->BeginUtilityDrawing(); - g_renderer->SetAndDiscardFramebuffer(m_depth_copy_framebuffer.get()); - g_renderer->SetViewportAndScissor(m_depth_copy_framebuffer->GetRect()); - g_renderer->SetPipeline(m_depth_copy_pipeline.get()); - g_renderer->SetTexture(0, src_texture); - g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); - g_renderer->Draw(0, 3); - - // No need to call FinishedRendering() here because CopyFromTexture() transitions. - m_depth_readback_texture->CopyFromTexture(m_depth_copy_texture.get(), - m_depth_readback_texture->GetRect(), 0, 0, - m_depth_readback_texture->GetRect()); - - g_renderer->EndUtilityDrawing(); - } - else - { - m_depth_readback_texture->CopyFromTexture(src_texture, m_depth_readback_texture->GetRect(), 0, - 0, m_depth_readback_texture->GetRect()); - } - - // Wait until the copy is complete. - m_depth_readback_texture->Flush(); - m_depth_readback_texture_valid = true; - return true; + const u32 tile_y = tile_index / m_efb_cache_tiles_wide; + const u32 tile_x = tile_index % m_efb_cache_tiles_wide; + const u32 start_y = tile_y * m_efb_cache_tile_size; + const u32 start_x = tile_x * m_efb_cache_tile_size; + return MathUtil::Rectangle( + start_x, start_y, std::min(start_x + m_efb_cache_tile_size, static_cast(EFB_WIDTH)), + std::min(start_y + m_efb_cache_tile_size, static_cast(EFB_HEIGHT))); } -void FramebufferManager::InvalidatePeekCache() +u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) { - m_color_readback_texture_valid = false; - m_depth_readback_texture_valid = false; + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + u32 tile_index; + if (!IsEFBCacheTilePresent(false, x, y, &tile_index)) + PopulateEFBCache(false, tile_index); + + u32 value; + m_efb_color_cache.readback_texture->ReadTexel(x, y, &value); + return value; +} + +float FramebufferManager::PeekEFBDepth(u32 x, u32 y) +{ + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + u32 tile_index; + if (!IsEFBCacheTilePresent(true, x, y, &tile_index)) + PopulateEFBCache(true, tile_index); + + float value; + m_efb_depth_cache.readback_texture->ReadTexel(x, y, &value); + return value; +} + +void FramebufferManager::SetEFBCacheTileSize(u32 size) +{ + if (m_efb_cache_tile_size == size) + return; + + InvalidatePeekCache(true); + m_efb_cache_tile_size = size; + DestroyReadbackFramebuffer(); + if (!CreateReadbackFramebuffer()) + PanicAlert("Failed to create EFB readback framebuffers"); +} + +void FramebufferManager::InvalidatePeekCache(bool forced) +{ + if (forced || m_efb_color_cache.out_of_date) + { + m_efb_color_cache.valid = false; + m_efb_color_cache.out_of_date = false; + std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); + } + if (forced || m_efb_depth_cache.out_of_date) + { + m_efb_depth_cache.valid = false; + m_efb_depth_cache.out_of_date = false; + std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); + } +} + +void FramebufferManager::FlagPeekCacheAsOutOfDate() +{ + if (m_efb_color_cache.valid) + m_efb_color_cache.out_of_date = true; + if (m_efb_depth_cache.valid) + m_efb_depth_cache.out_of_date = true; + + if (!g_ActiveConfig.bEFBAccessDeferInvalidation) + InvalidatePeekCache(); } bool FramebufferManager::CompileReadbackPipelines() { AbstractPipelineConfig config = {}; - config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.vertex_shader = g_shader_cache->GetTextureCopyVertexShader(); config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; config.pixel_shader = g_shader_cache->GetTextureCopyPixelShader(); config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); @@ -417,15 +441,15 @@ bool FramebufferManager::CompileReadbackPipelines() config.blending_state = RenderState::GetNoBlendingBlendState(); config.framebuffer_state = RenderState::GetColorFramebufferState(GetEFBColorFormat()); config.usage = AbstractPipelineUsage::Utility; - m_color_copy_pipeline = g_renderer->CreatePipeline(config); - if (!m_color_copy_pipeline) + m_efb_color_cache.copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_color_cache.copy_pipeline) return false; // same for depth, except different format config.framebuffer_state.color_texture_format = AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()); - m_depth_copy_pipeline = g_renderer->CreatePipeline(config); - if (!m_depth_copy_pipeline) + m_efb_depth_cache.copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_depth_cache.copy_pipeline) return false; if (IsEFBMultisampled()) @@ -447,56 +471,138 @@ bool FramebufferManager::CompileReadbackPipelines() void FramebufferManager::DestroyReadbackPipelines() { m_efb_depth_resolve_pipeline.reset(); - m_depth_copy_pipeline.reset(); - m_color_copy_pipeline.reset(); + m_efb_depth_cache.copy_pipeline.reset(); + m_efb_color_cache.copy_pipeline.reset(); } bool FramebufferManager::CreateReadbackFramebuffer() { - const TextureConfig color_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, GetEFBColorFormat(), - AbstractTextureFlag_RenderTarget); - const TextureConfig depth_config( - EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, - AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), - AbstractTextureFlag_RenderTarget); - if (g_renderer->GetEFBScale() != 1) + // Since we can't partially copy from a depth buffer directly to the staging texture in D3D, we + // use an intermediate buffer to avoid copying the whole texture. + if ((IsUsingTiledEFBCache() && !g_ActiveConfig.backend_info.bSupportsPartialDepthCopies) || + g_renderer->GetEFBScale() != 1) { - m_color_copy_texture = g_renderer->CreateTexture(color_config); - m_depth_copy_texture = g_renderer->CreateTexture(depth_config); - if (!m_color_copy_texture || !m_depth_copy_texture) + const TextureConfig color_config(IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_WIDTH, + IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_HEIGHT, 1, + 1, 1, GetEFBColorFormat(), AbstractTextureFlag_RenderTarget); + const TextureConfig depth_config( + color_config.width, color_config.height, 1, 1, 1, + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), + AbstractTextureFlag_RenderTarget); + + m_efb_color_cache.texture = g_renderer->CreateTexture(color_config); + m_efb_depth_cache.texture = g_renderer->CreateTexture(depth_config); + if (!m_efb_color_cache.texture || !m_efb_depth_cache.texture) return false; - m_color_copy_framebuffer = g_renderer->CreateFramebuffer(m_color_copy_texture.get(), nullptr); - m_depth_copy_framebuffer = g_renderer->CreateFramebuffer(m_depth_copy_texture.get(), nullptr); - if (!m_color_copy_framebuffer || !m_depth_copy_framebuffer) + m_efb_color_cache.framebuffer = + g_renderer->CreateFramebuffer(m_efb_color_cache.texture.get(), nullptr); + m_efb_depth_cache.framebuffer = + g_renderer->CreateFramebuffer(m_efb_depth_cache.texture.get(), nullptr); + if (!m_efb_color_cache.framebuffer || !m_efb_depth_cache.framebuffer) return false; } - m_color_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, color_config); - m_depth_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, depth_config); - if (!m_color_readback_texture || !m_depth_readback_texture) + // Staging texture use the full EFB dimensions, as this is the buffer for the whole cache. + m_efb_color_cache.readback_texture = g_renderer->CreateStagingTexture( + StagingTextureType::Mutable, + TextureConfig(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, GetEFBColorFormat(), 0)); + m_efb_depth_cache.readback_texture = g_renderer->CreateStagingTexture( + StagingTextureType::Mutable, + TextureConfig(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), 0)); + if (!m_efb_color_cache.readback_texture || !m_efb_depth_cache.readback_texture) return false; + if (IsUsingTiledEFBCache()) + { + const u32 tiles_wide = ((EFB_WIDTH + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size); + const u32 tiles_high = ((EFB_HEIGHT + (m_efb_cache_tile_size - 1)) / m_efb_cache_tile_size); + const u32 total_tiles = tiles_wide * tiles_high; + m_efb_color_cache.tiles.resize(total_tiles); + std::fill(m_efb_color_cache.tiles.begin(), m_efb_color_cache.tiles.end(), false); + m_efb_depth_cache.tiles.resize(total_tiles); + std::fill(m_efb_depth_cache.tiles.begin(), m_efb_depth_cache.tiles.end(), false); + m_efb_cache_tiles_wide = tiles_wide; + } + return true; } void FramebufferManager::DestroyReadbackFramebuffer() { - m_depth_copy_framebuffer.reset(); - m_depth_copy_texture.reset(); - m_depth_readback_texture_valid = false; - m_color_copy_framebuffer.reset(); - m_color_copy_texture.reset(); - m_color_readback_texture_valid = false; + auto DestroyCache = [](EFBCacheData& data) { + data.readback_texture.reset(); + data.framebuffer.reset(); + data.texture.reset(); + data.valid = false; + }; + DestroyCache(m_efb_color_cache); + DestroyCache(m_efb_depth_cache); +} + +void FramebufferManager::PopulateEFBCache(bool depth, u32 tile_index) +{ + g_vertex_manager->OnCPUEFBAccess(); + + // Force the path through the intermediate texture, as we can't do an image copy from a depth + // buffer directly to a staging texture (must be the whole resource). + const bool force_intermediate_copy = + depth && !g_ActiveConfig.backend_info.bSupportsPartialDepthCopies && IsUsingTiledEFBCache(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + EFBCacheData& data = depth ? m_efb_depth_cache : m_efb_color_cache; + const MathUtil::Rectangle rect = GetEFBCacheTileRect(tile_index); + const MathUtil::Rectangle native_rect = g_renderer->ConvertEFBRectangle(rect); + AbstractTexture* src_texture = + depth ? ResolveEFBDepthTexture(native_rect) : ResolveEFBColorTexture(native_rect); + if (g_renderer->GetEFBScale() != 1 || force_intermediate_copy) + { + // Downsample from internal resolution to 1x. + // TODO: This won't produce correct results at IRs above 2x. More samples are required. + // This is the same issue as with EFB copies. + g_renderer->BeginUtilityDrawing(); + + const float rcp_src_width = 1.0f / m_efb_framebuffer->GetWidth(); + const float rcp_src_height = 1.0f / m_efb_framebuffer->GetHeight(); + const std::array uniforms = { + {native_rect.left * rcp_src_width, native_rect.top * rcp_src_height, + native_rect.GetWidth() * rcp_src_width, native_rect.GetHeight() * rcp_src_height}}; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + g_renderer->SetAndDiscardFramebuffer(data.framebuffer.get()); + g_renderer->SetViewportAndScissor(data.framebuffer->GetRect()); + g_renderer->SetPipeline(data.copy_pipeline.get()); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, depth ? RenderState::GetPointSamplerState() : + RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); + + // Copy from EFB or copy texture to staging texture. + // No need to call FinishedRendering() here because CopyFromTexture() transitions. + data.readback_texture->CopyFromTexture( + data.texture.get(), MathUtil::Rectangle(0, 0, rect.GetWidth(), rect.GetHeight()), 0, 0, + rect); + + g_renderer->EndUtilityDrawing(); + } + else + { + data.readback_texture->CopyFromTexture(src_texture, rect, 0, 0, rect); + } + + // Wait until the copy is complete. + data.readback_texture->Flush(); + data.valid = true; + data.out_of_date = false; + if (IsUsingTiledEFBCache()) + data.tiles[tile_index] = true; } void FramebufferManager::ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, bool clear_alpha, bool clear_z, u32 color, u32 z) { FlushEFBPokes(); - InvalidatePeekCache(); + FlagPeekCacheAsOutOfDate(); g_renderer->BeginUtilityDrawing(); // Set up uniforms. @@ -578,34 +684,6 @@ void FramebufferManager::DestroyClearPipelines() } } -u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) -{ - if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture()) - return 0; - - // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. - if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) - y = EFB_HEIGHT - 1 - y; - - u32 value; - m_color_readback_texture->ReadTexel(x, y, &value); - return value; -} - -float FramebufferManager::PeekEFBDepth(u32 x, u32 y) -{ - if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture()) - return 0.0f; - - // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. - if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) - y = EFB_HEIGHT - 1 - y; - - float value; - m_depth_readback_texture->ReadTexel(x, y, &value); - return value; -} - void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) { // Flush if we exceeded the number of vertices per batch. @@ -614,15 +692,14 @@ void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_color_readback_texture_valid) - { - // See comment above for reasoning for lower-left coordinates. - if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) - y = EFB_HEIGHT - 1 - y; + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; - m_color_readback_texture->WriteTexel(x, y, &color); - } + // Update the peek cache if it's valid, since we know the color of the pixel now. + u32 tile_index; + if (IsEFBCacheTilePresent(false, x, y, &tile_index)) + m_efb_color_cache.readback_texture->WriteTexel(x, y, &color); } void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) @@ -633,15 +710,14 @@ void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_depth_readback_texture_valid) - { - // See comment above for reasoning for lower-left coordinates. - if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) - y = EFB_HEIGHT - 1 - y; + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; - m_depth_readback_texture->WriteTexel(x, y, &depth); - } + // Update the peek cache if it's valid, since we know the color of the pixel now. + u32 tile_index; + if (IsEFBCacheTilePresent(true, x, y, &tile_index)) + m_efb_depth_cache.readback_texture->WriteTexel(x, y, &depth); } void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h index d0de7b62b1..b97d45b31e 100644 --- a/Source/Core/VideoCommon/FramebufferManager.h +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -6,15 +6,16 @@ #include #include +#include #include "Common/CommonTypes.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/TextureConfig.h" -class AbstractFramebuffer; -class AbstractPipeline; -class AbstractStagingTexture; class NativeVertexFormat; enum class EFBReinterpretType @@ -85,7 +86,9 @@ public: // Reads a framebuffer value back from the GPU. This may block if the cache is not current. u32 PeekEFBColor(u32 x, u32 y); float PeekEFBDepth(u32 x, u32 y); - void InvalidatePeekCache(); + void SetEFBCacheTileSize(u32 size); + void InvalidatePeekCache(bool forced = true); + void FlagPeekCacheAsOutOfDate(); // Writes a value to the framebuffer. This will never block, and writes will be batched. void PokeEFBColor(u32 x, u32 y, u32 color); @@ -100,6 +103,19 @@ protected: }; static_assert(std::is_standard_layout::value, "EFBPokeVertex is standard-layout"); + // EFB cache - for CPU EFB access + // Tiles are ordered left-to-right, then top-to-bottom + struct EFBCacheData + { + std::unique_ptr texture; + std::unique_ptr framebuffer; + std::unique_ptr readback_texture; + std::unique_ptr copy_pipeline; + std::vector tiles; + bool out_of_date; + bool valid; + }; + bool CreateEFBFramebuffer(); void DestroyEFBFramebuffer(); @@ -118,8 +134,10 @@ protected: bool CompilePokePipelines(); void DestroyPokePipelines(); - bool PopulateColorReadbackTexture(); - bool PopulateDepthReadbackTexture(); + bool IsUsingTiledEFBCache() const; + bool IsEFBCacheTilePresent(bool depth, u32 x, u32 y, u32* tile_index) const; + MathUtil::Rectangle GetEFBCacheTileRect(u32 tile_index) const; + void PopulateEFBCache(bool depth, u32 tile_index); void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, u32 color); @@ -141,19 +159,11 @@ protected: // Format conversion shaders std::array, 6> m_format_conversion_pipelines; - // EFB readback texture - std::unique_ptr m_color_copy_texture; - std::unique_ptr m_depth_copy_texture; - std::unique_ptr m_color_copy_framebuffer; - std::unique_ptr m_depth_copy_framebuffer; - std::unique_ptr m_color_copy_pipeline; - std::unique_ptr m_depth_copy_pipeline; - - // CPU-side EFB readback texture - std::unique_ptr m_color_readback_texture; - std::unique_ptr m_depth_readback_texture; - bool m_color_readback_texture_valid = false; - bool m_depth_readback_texture_valid = false; + // EFB cache - for CPU EFB access + u32 m_efb_cache_tile_size = 0; + u32 m_efb_cache_tiles_wide = 0; + EFBCacheData m_efb_color_cache = {}; + EFBCacheData m_efb_depth_cache = {}; // EFB clear pipelines // Indexed by [color_write_enabled][alpha_write_enabled][depth_write_enabled] diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 611eca29e6..74760f57e9 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -386,6 +386,7 @@ void Renderer::CheckForConfigChanges() const StereoMode old_stereo = g_ActiveConfig.stereo_mode; const u32 old_multisamples = g_ActiveConfig.iMultisamples; const int old_anisotropy = g_ActiveConfig.iMaxAnisotropy; + const int old_efb_access_tile_size = g_ActiveConfig.iEFBAccessTileSize; const bool old_force_filtering = g_ActiveConfig.bForceFiltering; const bool old_vsync = g_ActiveConfig.bVSyncActive; const bool old_bbox = g_ActiveConfig.bBBoxEnable; @@ -395,6 +396,10 @@ void Renderer::CheckForConfigChanges() // Update texture cache settings with any changed options. g_texture_cache->OnConfigChanged(g_ActiveConfig); + // EFB tile cache doesn't need to notify the backend. + if (old_efb_access_tile_size != g_ActiveConfig.iEFBAccessTileSize) + g_framebuffer_manager->SetEFBCacheTileSize(std::max(g_ActiveConfig.iEFBAccessTileSize, 0)); + // Check for post-processing shader changes. Done up here as it doesn't affect anything outside // the post-processor. Note that options are applied every frame, so no need to check those. if (m_post_processor->GetConfig()->GetShader() != g_ActiveConfig.sPostProcessingShader) diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index bbc355a98c..06168c6cec 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -99,8 +99,7 @@ u32 VertexManagerBase::GetRemainingSize() const DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall) { - // Flush all EFB pokes and invalidate the peek cache. - g_framebuffer_manager->InvalidatePeekCache(); + // Flush all EFB pokes. Since the buffer is shared, we can't draw pokes+primitives concurrently. g_framebuffer_manager->FlushEFBPokes(); // The SSE vertex loader can write up to 4 bytes past the end @@ -449,6 +448,9 @@ void VertexManagerBase::Flush() g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); OnDraw(); + + // The EFB cache is now potentially stale. + g_framebuffer_manager->FlagPeekCacheAsOutOfDate(); } } diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 3b1b22dc89..bfd4a6b99b 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -144,6 +144,7 @@ void VideoConfig::Refresh() iStereoDepthPercentage = Config::Get(Config::GFX_STEREO_DEPTH_PERCENTAGE); bEFBAccessEnable = Config::Get(Config::GFX_HACK_EFB_ACCESS_ENABLE); + bEFBAccessDeferInvalidation = Config::Get(Config::GFX_HACK_EFB_DEFER_INVALIDATION); bBBoxEnable = Config::Get(Config::GFX_HACK_BBOX_ENABLE); bForceProgressive = Config::Get(Config::GFX_HACK_FORCE_PROGRESSIVE); bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); @@ -154,6 +155,7 @@ void VideoConfig::Refresh() bCopyEFBScaled = Config::Get(Config::GFX_HACK_COPY_EFB_SCALED); bEFBEmulateFormatChanges = Config::Get(Config::GFX_HACK_EFB_EMULATE_FORMAT_CHANGES); bVertexRounding = Config::Get(Config::GFX_HACK_VERTEX_ROUDING); + iEFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE); bPerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 9271762015..5be2a06639 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -112,6 +112,7 @@ struct VideoConfig final // Hacks bool bEFBAccessEnable; + bool bEFBAccessDeferInvalidation; bool bPerfQueriesEnable; bool bBBoxEnable; bool bForceProgressive; @@ -128,6 +129,7 @@ struct VideoConfig final bool bEnablePixelLighting; bool bFastDepthCalc; bool bVertexRounding; + int iEFBAccessTileSize; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped @@ -216,6 +218,7 @@ struct VideoConfig final bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES bool bSupportsBackgroundCompiling; bool bSupportsLargePoints; + bool bSupportsPartialDepthCopies; } backend_info; // Utility