From 708bd3d9f7f375fba750561758abab77c99fa0af Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 31 Mar 2019 14:11:53 +1000 Subject: [PATCH] TextureCache: Simplify XFB reconstruction This also better handles in-memory interlaced XFB data placed by the CPU by considering the stride from the VI. --- Source/Core/Core/HW/VideoInterface.cpp | 13 +- Source/Core/Core/HW/VideoInterface.h | 2 +- Source/Core/VideoCommon/AsyncRequests.cpp | 3 +- Source/Core/VideoCommon/BPStructs.cpp | 5 +- Source/Core/VideoCommon/RenderBase.cpp | 67 +-- Source/Core/VideoCommon/RenderBase.h | 9 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 501 +++++++------------ Source/Core/VideoCommon/TextureCacheBase.h | 55 +- 8 files changed, 228 insertions(+), 427 deletions(-) diff --git a/Source/Core/Core/HW/VideoInterface.cpp b/Source/Core/Core/HW/VideoInterface.cpp index 036c791bd6..a2750a5840 100644 --- a/Source/Core/Core/HW/VideoInterface.cpp +++ b/Source/Core/Core/HW/VideoInterface.cpp @@ -686,6 +686,9 @@ static void BeginField(FieldType field, u64 ticks) xfbAddr = GetXFBAddressTop(); } + // Multiply the stride by 2 to get the byte offset for each subsequent line. + fbStride *= 2; + if (potentially_interlaced_xfb && interlaced_video_mode && g_ActiveConfig.bForceProgressive) { // Strictly speaking, in interlaced mode, we're only supposed to read @@ -704,10 +707,10 @@ static void BeginField(FieldType field, u64 ticks) // offset the xfb by (-stride_of_one_line) to get the start // address of the full xfb. if (field == FieldType::Odd && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB + 1 && xfbAddr) - xfbAddr -= fbStride * 2; + xfbAddr -= fbStride; if (field == FieldType::Even && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB - 1 && xfbAddr) - xfbAddr -= fbStride * 2; + xfbAddr -= fbStride; } LogField(field, xfbAddr); @@ -787,10 +790,8 @@ void Update(u64 ticks) } // Create a fake VI mode for a fifolog -void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height) +void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height) { - u32 fb_stride = fb_width; - bool interlaced = fb_height > 480 / 2; if (interlaced) { @@ -807,7 +808,7 @@ void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height) m_VBlankTimingEven.PRB = 503 - fb_height * 2; m_VBlankTimingEven.PSB = 4; m_PictureConfiguration.WPL = fb_width / 16; - m_PictureConfiguration.STD = fb_stride / 16; + m_PictureConfiguration.STD = (fb_stride / 2) / 16; UpdateParameters(); diff --git a/Source/Core/Core/HW/VideoInterface.h b/Source/Core/Core/HW/VideoInterface.h index 7f0ebf8400..3daf3cbf7c 100644 --- a/Source/Core/Core/HW/VideoInterface.h +++ b/Source/Core/Core/HW/VideoInterface.h @@ -374,6 +374,6 @@ u32 GetTicksPerField(); float GetAspectRatio(); // Create a fake VI mode for a fifolog -void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height); +void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height); } // namespace VideoInterface diff --git a/Source/Core/VideoCommon/AsyncRequests.cpp b/Source/Core/VideoCommon/AsyncRequests.cpp index 28769925c9..cefd8541c4 100644 --- a/Source/Core/VideoCommon/AsyncRequests.cpp +++ b/Source/Core/VideoCommon/AsyncRequests.cpp @@ -113,7 +113,6 @@ void AsyncRequests::SetEnable(bool enable) void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) { - EFBRectangle rc; switch (e.type) { case Event::EFB_POKE_COLOR: @@ -145,7 +144,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e) case Event::SWAP_EVENT: g_renderer->Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride, - e.swap_event.fbHeight, rc, e.time); + e.swap_event.fbHeight, e.time); break; case Event::BBOX_READ: diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index d7d80a1bfa..925619e797 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -312,14 +312,13 @@ static void BPWritten(const BPCmd& bp) if (g_ActiveConfig.bImmediateXFB) { // below div two to convert from bytes to pixels - it expects width, not stride - g_renderer->Swap(destAddr, destStride / 2, destStride / 2, height, srcRect, - CoreTiming::GetTicks()); + g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks()); } else { if (FifoPlayer::GetInstance().IsRunningWithFakeVideoInterfaceUpdates()) { - VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), height); + VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), destStride, height); } } } diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 09d59e4877..67bc9ede9e 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -1154,8 +1154,7 @@ void Renderer::EndUIFrame() BeginImGuiFrame(); } -void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, - u64 ticks) +void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks) { const AspectMode suggested = g_ActiveConfig.suggested_aspect_mode; if (suggested == AspectMode::Analog || suggested == AspectMode::AnalogWide) @@ -1188,33 +1187,15 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // behind the renderer. FlushFrameDump(); - if (xfbAddr && fbWidth && fbStride && fbHeight) + if (xfb_addr && fb_width && fb_stride && fb_height) { - constexpr int force_safe_texture_cache_hash = 0; // Get the current XFB from texture cache - auto* xfb_entry = g_texture_cache->GetXFBTexture( - xfbAddr, fbStride, fbHeight, TextureFormat::XFB, force_safe_texture_cache_hash); - + MathUtil::Rectangle xfb_rect; + const auto* xfb_entry = + g_texture_cache->GetXFBTexture(xfb_addr, fb_width, fb_height, fb_stride, &xfb_rect); if (xfb_entry && xfb_entry->id != m_last_xfb_id) { - const TextureConfig& texture_config = xfb_entry->texture->GetConfig(); - m_last_xfb_texture = xfb_entry->texture.get(); m_last_xfb_id = xfb_entry->id; - m_last_xfb_ticks = ticks; - - auto xfb_rect = texture_config.GetRect(); - - // It's possible that the returned XFB texture is native resolution - // even when we're rendering at higher than native resolution - // if the XFB was was loaded entirely from console memory. - // If so, adjust the rectangle by native resolution instead of scaled resolution. - const u32 native_stride_width_difference = fbStride - fbWidth; - if (texture_config.width == xfb_entry->native_width) - xfb_rect.right -= native_stride_width_difference; - else - xfb_rect.right -= EFBToScaledX(native_stride_width_difference); - - m_last_xfb_region = xfb_rect; // Since we use the common pipelines here and draw vertices if a batch is currently being // built by the vertex loader, we end up trampling over its pointer, as we share the buffer @@ -1247,7 +1228,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // Update the window size based on the frame that was just rendered. // Due to depending on guest state, we need to call this every frame. - SetWindowSize(texture_config.width, texture_config.height); + SetWindowSize(xfb_rect.GetWidth(), xfb_rect.GetHeight()); } m_fps_counter.Update(); @@ -1259,7 +1240,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const DolphinAnalytics::Instance()->ReportPerformanceInfo(std::move(perf_sample)); if (IsFrameDumping()) - DumpCurrentFrame(); + DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks); // Begin new frame m_frame_count++; @@ -1295,8 +1276,8 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const } // Update our last xfb values - m_last_xfb_width = (fbStride < 1 || fbStride > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fbStride; - m_last_xfb_height = (fbHeight < 1 || fbHeight > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fbHeight; + m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width; + m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height; } else { @@ -1333,8 +1314,11 @@ bool Renderer::IsFrameDumping() return false; } -void Renderer::DumpCurrentFrame() +void Renderer::DumpCurrentFrame(const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect, u64 ticks) { + int source_width = src_rect.GetWidth(); + int source_height = src_rect.GetHeight(); int target_width, target_height; if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless()) { @@ -1344,22 +1328,20 @@ void Renderer::DumpCurrentFrame() } else { - std::tie(target_width, target_height) = CalculateOutputDimensions( - m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height); + std::tie(target_width, target_height) = CalculateOutputDimensions(source_width, source_height); } // We only need to render a copy if we need to stretch/scale the XFB copy. - const AbstractTexture* source_tex = m_last_xfb_texture; - MathUtil::Rectangle source_rect = m_last_xfb_region; - if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height) + MathUtil::Rectangle copy_rect = src_rect; + if (source_width != target_width || source_height != target_height) { if (!CheckFrameDumpRenderTexture(target_width, target_height)) return; - source_tex = m_frame_dump_render_texture.get(); - source_rect = MathUtil::Rectangle(0, 0, target_width, target_height); - ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture, - m_last_xfb_region); + ScaleTexture(m_frame_dump_render_framebuffer.get(), m_frame_dump_render_framebuffer->GetRect(), + src_texture, src_rect); + src_texture = m_frame_dump_render_texture.get(); + copy_rect = src_texture->GetRect(); } // Index 0 was just sent to AVI dump. Swap with the second texture. @@ -1369,12 +1351,9 @@ void Renderer::DumpCurrentFrame() if (!CheckFrameDumpReadbackTexture(target_width, target_height)) return; - const auto converted_region = - ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight()); - m_frame_dump_readback_textures[0]->CopyFromTexture( - source_tex, converted_region, 0, 0, - MathUtil::Rectangle(0, 0, target_width, target_height)); - m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks); + m_frame_dump_readback_textures[0]->CopyFromTexture(src_texture, copy_rect, 0, 0, + m_frame_dump_readback_textures[0]->GetRect()); + m_last_frame_state = AVIDump::FetchState(ticks); m_last_frame_exported = true; } diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 31a0e2c15b..596b19834b 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -213,8 +213,7 @@ public: virtual void WaitForGPUIdle() {} // Finish up the current frame, print some stats - void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, - u64 ticks); + void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks); // Draws the specified XFB buffer to the screen, performing any post-processing. // Assumes that the backbuffer has already been bound and cleared. @@ -350,10 +349,7 @@ private: bool m_last_frame_exported = false; // Tracking of XFB textures so we don't render duplicate frames. - AbstractTexture* m_last_xfb_texture = nullptr; u64 m_last_xfb_id = std::numeric_limits::max(); - u64 m_last_xfb_ticks = 0; - EFBRectangle m_last_xfb_region; // Note: Only used for auto-ir u32 m_last_xfb_width = MAX_XFB_WIDTH; @@ -377,7 +373,8 @@ private: bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height); // Fills the frame dump staging texture with the current XFB texture. - void DumpCurrentFrame(); + void DumpCurrentFrame(const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect, u64 ticks); // Asynchronously encodes the specified pointer of frame data to the frame dump. void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 79e924771d..bc425fcc7f 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -471,6 +471,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale } ++iter.first; } + return entry_to_update; } @@ -952,7 +953,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo entry->native_height == nativeH) { entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt); - + entry->texture->FinishedRendering(); return entry; } } @@ -1003,7 +1004,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo entry->native_width == nativeW && entry->native_height == nativeH) { entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt); - + entry->texture->FinishedRendering(); return entry; } ++hash_iter; @@ -1209,142 +1210,115 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo return entry; } -TextureCacheBase::TCacheEntry* -TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat tex_format, - int texture_cache_safety_color_sample_size) +static void GetDisplayRectForXFBEntry(TextureCacheBase::TCacheEntry* entry, u32 width, u32 height, + MathUtil::Rectangle* display_rect) { - auto tex_info = ComputeTextureInformation(address, width, height, tex_format, - texture_cache_safety_color_sample_size, false, 0, 0, 0, - TLUTFormat::IA8, 1); - if (!tex_info) - return nullptr; + // Scale the sub-rectangle to the full resolution of the texture. + display_rect->left = 0; + display_rect->top = 0; + display_rect->right = static_cast(width * entry->GetWidth() / entry->native_width); + display_rect->bottom = static_cast(height * entry->GetHeight() / entry->native_height); +} - // Try a direct lookup by address/hash. - const TextureLookupInformation tex_info_value = tex_info.value(); - TCacheEntry* entry = GetXFBFromCache(tex_info_value); - if (entry) - return entry; - - // At this point, the XFB wasn't found in cache. This means the address is most likely not - // pointing at an xfb copy but instead an area of memory. Let's attempt to stitch all entries in - // this memory space together - bool loaded_from_overlapping = true; - entry = GetTextureFromOverlappingTextures(tex_info_value); - if (!entry) +TextureCacheBase::TCacheEntry* +TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, + MathUtil::Rectangle* display_rect) +{ + const u8* src_data = Memory::GetPointer(address); + if (!src_data) { - // At this point, the xfb address is truly "bogus" it likely is an area of memory defined by the - // CPU, so load it from memory. - entry = GetTextureFromMemory(tex_info_value); - loaded_from_overlapping = false; + ERROR_LOG(VIDEO, "Trying to load XFB texture from invalid address 0x%8x", address); + return nullptr; } + // Compute total texture size. XFB textures aren't tiled, so this is simple. + const u32 total_size = height * stride; + const u64 hash = Common::GetHash64(src_data, total_size, 0); + + // Do we currently have a version of this XFB copy in VRAM? + TCacheEntry* entry = GetXFBFromCache(address, width, height, stride, hash); + if (entry) + { + if (entry->is_xfb_container) + { + StitchXFBCopy(entry); + entry->texture->FinishedRendering(); + } + + GetDisplayRectForXFBEntry(entry, width, height, display_rect); + return entry; + } + + // Create a new VRAM texture, and fill it with the data from guest RAM. + entry = AllocateCacheEntry(TextureConfig(width, height, 1, 1, 1, AbstractTextureFormat::RGBA8, + AbstractTextureFlag_RenderTarget)); + entry->SetGeneralParameters(address, total_size, + TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true); + entry->SetDimensions(width, height, 1); + entry->SetHashes(hash, hash); + entry->SetXfbCopy(stride); + entry->is_xfb_container = true; + entry->is_custom_tex = false; + entry->may_have_overlapping_textures = false; + entry->frameCount = FRAMECOUNT_INVALID; + if (!g_ActiveConfig.UseGPUTextureDecoding() || + !DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height, + width, height, stride, texMem, entry->format.tlutfmt)) + { + const u32 decoded_size = width * height * sizeof(u32); + CheckTempSize(decoded_size); + TexDecoder_DecodeXFB(temp, src_data, width, height, stride); + entry->texture->Load(0, width, height, width, temp, decoded_size); + } + + // Stitch any VRAM copies into the new RAM copy. + StitchXFBCopy(entry); + entry->texture->FinishedRendering(); + + // Insert into the texture cache so we can re-use it next frame, if needed. + textures_by_address.emplace(entry->addr, entry); + SETSTAT(stats.numTexturesAlive, textures_by_address.size()); + INCSTAT(stats.numTexturesUploaded); + if (g_ActiveConfig.bDumpXFBTarget) { // While this isn't really an xfb copy, we can treat it as such // for dumping purposes static int xfb_count = 0; - const std::string xfb_type = loaded_from_overlapping ? "combined" : "from_memory"; - entry->texture->Save(StringFromFormat("%sxfb_%s_%i.png", + entry->texture->Save(StringFromFormat("loaded_xfb_%i.png", File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(), - xfb_type.c_str(), xfb_count++), + xfb_count++), 0); } + GetDisplayRectForXFBEntry(entry, width, height, display_rect); return entry; } -std::optional TextureCacheBase::ComputeTextureInformation( - u32 address, u32 width, u32 height, TextureFormat tex_format, - int texture_cache_safety_color_sample_size, bool from_tmem, u32 tmem_address_even, - u32 tmem_address_odd, u32 tlut_address, TLUTFormat tlut_format, u32 levels) +TextureCacheBase::TCacheEntry* TextureCacheBase::GetXFBFromCache(u32 address, u32 width, u32 height, + u32 stride, u64 hash) { - TextureLookupInformation tex_info; - - tex_info.from_tmem = from_tmem; - tex_info.tmem_address_even = tmem_address_even; - tex_info.tmem_address_odd = tmem_address_odd; - - tex_info.address = address; - - if (from_tmem) - tex_info.src_data = &texMem[tex_info.tmem_address_even]; - else - tex_info.src_data = Memory::GetPointer(tex_info.address); - - if (tex_info.src_data == nullptr) - { - ERROR_LOG(VIDEO, "Trying to use an invalid texture address 0x%8x", tex_info.address); - return {}; - } - - tex_info.texture_cache_safety_color_sample_size = texture_cache_safety_color_sample_size; - - // TexelSizeInNibbles(format) * width * height / 16; - tex_info.block_width = TexDecoder_GetBlockWidthInTexels(tex_format); - tex_info.block_height = TexDecoder_GetBlockHeightInTexels(tex_format); - - tex_info.bytes_per_block = (tex_info.block_width * tex_info.block_height * - TexDecoder_GetTexelSizeInNibbles(tex_format)) / - 2; - - tex_info.expanded_width = Common::AlignUp(width, tex_info.block_width); - tex_info.expanded_height = Common::AlignUp(height, tex_info.block_height); - - tex_info.total_bytes = TexDecoder_GetTextureSizeInBytes(tex_info.expanded_width, - tex_info.expanded_height, tex_format); - - tex_info.native_width = width; - tex_info.native_height = height; - tex_info.native_levels = levels; - - // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in - // the mipmap chain - // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we - // limit the mipmap count to 6 there - tex_info.computed_levels = std::min( - IntLog2(std::max(tex_info.native_width, tex_info.native_height)) + 1, tex_info.native_levels); - - tex_info.full_format = TextureAndTLUTFormat(tex_format, tlut_format); - tex_info.tlut_address = tlut_address; - - // TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data - // from the low tmem bank than it should) - tex_info.base_hash = Common::GetHash64(tex_info.src_data, tex_info.total_bytes, - tex_info.texture_cache_safety_color_sample_size); - - tex_info.is_palette_texture = IsColorIndexed(tex_format); - - if (tex_info.is_palette_texture) - { - tex_info.palette_size = TexDecoder_GetPaletteSize(tex_format); - tex_info.full_hash = tex_info.base_hash ^ - Common::GetHash64(&texMem[tex_info.tlut_address], tex_info.palette_size, - tex_info.texture_cache_safety_color_sample_size); - } - else - { - tex_info.full_hash = tex_info.base_hash; - } - - return tex_info; -} - -TextureCacheBase::TCacheEntry* -TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info) -{ - auto iter_range = textures_by_address.equal_range(tex_info.address); + auto iter_range = textures_by_address.equal_range(address); TexAddrCache::iterator iter = iter_range.first; while (iter != iter_range.second) { TCacheEntry* entry = iter->second; - if ((entry->is_xfb_copy || entry->format.texfmt == TextureFormat::XFB) && - entry->native_width == tex_info.native_width && - entry->native_height == tex_info.native_height && - entry->memory_stride == entry->BytesPerRow() && !entry->may_have_overlapping_textures) + // The only thing which has to match exactly is the stride. We can use a partial rectangle if + // the VI width/height differs from that of the XFB copy. + if (entry->is_xfb_copy && entry->memory_stride == stride && entry->native_width >= width && + entry->native_height >= height && !entry->may_have_overlapping_textures) { - if (tex_info.base_hash == entry->hash && !entry->reference_changed) + // But if the dimensions do differ, we must compute the hash on the sub-rectangle. + u64 check_hash = hash; + if (entry->native_width != width || entry->native_height != height) + { + check_hash = Common::GetHash64(Memory::GetPointer(entry->addr), + entry->memory_stride * entry->native_height, 0); + } + + if (entry->hash == check_hash && !entry->reference_changed) { return entry; } @@ -1364,37 +1338,36 @@ TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info) return nullptr; } -TextureCacheBase::TCacheEntry* -TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info) +void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry) { - u32 numBlocksX = tex_info.native_width / tex_info.block_width; - - // XFBs created for the purpose of being a container for textures from memory - // or as a container for overlapping textures, never need to be combined - // with other textures - TCacheEntry* stitched_entry = - CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers()); - stitched_entry->may_have_overlapping_textures = false; - - // It is possible that some of the overlapping textures overlap each other. - // This behavior has been seen with XFB copies in Rogue Leader. - // To get the correct result, we apply the texture updates in the order the textures were - // originally loaded. This ensures that the parts of the texture that would have been overwritten - // in memory on real hardware get overwritten the same way here too. - // This should work, but it may be a better idea to keep track of partial XFB copy invalidations - // instead, which would reduce the amount of copying work here. + // It is possible that some of the overlapping textures overlap each other. This behavior has been + // seen with XFB copies in Rogue Leader. To get the correct result, we apply the texture updates + // in the order the textures were originally loaded. This ensures that the parts of the texture + // that would have been overwritten in memory on real hardware get overwritten the same way here + // too. This should work, but it may be a better idea to keep track of partial XFB copy + // invalidations instead, which would reduce the amount of copying work here. std::vector candidates; + bool create_upscaled_copy = false; - auto iter = FindOverlappingTextures(tex_info.address, tex_info.total_bytes); + auto iter = FindOverlappingTextures(stitched_entry->addr, stitched_entry->size_in_bytes); while (iter.first != iter.second) { + // Currently, this checks the stride of the VRAM copy against the VI request. Therefore, for + // interlaced modes, VRAM copies won't be considered candidates. This is okay for now, because + // our force progressive hack means that an XFB copy should always have a matching stride. If + // the hack is disabled, XFB2RAM should also be enabled. Should we wish to implement interlaced + // stitching in the future, this would require a shader which grabs every second line. TCacheEntry* entry = iter.first->second; - if (entry->IsCopy() && !entry->tmem_only && - entry->OverlapsMemoryRange(tex_info.address, tex_info.total_bytes) && + if (entry != stitched_entry && entry->IsCopy() && !entry->tmem_only && + entry->OverlapsMemoryRange(stitched_entry->addr, stitched_entry->size_in_bytes) && entry->memory_stride == stitched_entry->memory_stride) { if (entry->hash == entry->CalculateHash()) { + // Can't check the height here because of Y scaling. + if (entry->native_width != entry->GetWidth()) + create_upscaled_copy = true; + candidates.emplace_back(entry); } else @@ -1407,219 +1380,108 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati ++iter.first; } + if (candidates.empty()) + return; + std::sort(candidates.begin(), candidates.end(), [](const TCacheEntry* a, const TCacheEntry* b) { return a->id < b->id; }); - bool updated_entry = false; + // We only upscale when necessary to preserve resolution. i.e. when there are upscaled partial + // copies to be stitched together. + if (create_upscaled_copy) + { + ScaleTextureCacheEntryTo(stitched_entry, g_renderer->EFBToScaledX(stitched_entry->native_width), + g_renderer->EFBToScaledY(stitched_entry->native_height)); + } + for (TCacheEntry* entry : candidates) { - if (tex_info.is_palette_texture) - { - TCacheEntry* decoded_entry = - ApplyPaletteToEntry(entry, nullptr, tex_info.full_format.tlutfmt); - if (decoded_entry) - { - // Link the efb copy with the partially updated texture, so we won't apply this partial - // update again - entry->CreateReference(stitched_entry); - // Mark the texture update as used, as if it was loaded directly - entry->frameCount = FRAMECOUNT_INVALID; - entry = decoded_entry; - } - else - { - continue; - } - } - - s32 src_x, src_y, dst_x, dst_y; - - // Note for understanding the math: - // Normal textures can't be strided, so the 2 missing cases with src_x > 0 don't exist + int src_x, src_y, dst_x, dst_y; if (entry->addr >= stitched_entry->addr) { - s32 block_offset = (entry->addr - stitched_entry->addr) / tex_info.bytes_per_block; - s32 block_x = block_offset % numBlocksX; - s32 block_y = block_offset / numBlocksX; + int pixel_offset = (entry->addr - stitched_entry->addr) / 2; src_x = 0; src_y = 0; - dst_x = block_x * tex_info.block_width; - dst_y = block_y * tex_info.block_height; + dst_x = pixel_offset % stitched_entry->native_width; + dst_y = pixel_offset / stitched_entry->native_width; } else { - s32 srcNumBlocksX = entry->native_width / tex_info.block_width; - s32 block_offset = (stitched_entry->addr - entry->addr) / tex_info.bytes_per_block; - s32 block_x = block_offset % srcNumBlocksX; - s32 block_y = block_offset / srcNumBlocksX; - src_x = block_x * tex_info.block_width; - src_y = block_y * tex_info.block_height; + int pixel_offset = (stitched_entry->addr - entry->addr) / 2; + src_x = pixel_offset % entry->native_width; + src_y = pixel_offset / entry->native_width; dst_x = 0; dst_y = 0; } + const int native_width = + std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x); + const int native_height = + std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y); + int src_width = native_width; + int src_height = native_height; + int dst_width = native_width; + int dst_height = native_height; + + // Scale to internal resolution. + if (entry->native_width != entry->GetWidth() || entry->native_height != entry->GetHeight()) + { + src_x = g_renderer->EFBToScaledX(src_x); + src_y = g_renderer->EFBToScaledY(src_y); + src_width = g_renderer->EFBToScaledX(src_width); + src_height = g_renderer->EFBToScaledY(src_height); + } + if (create_upscaled_copy) + { + dst_x = g_renderer->EFBToScaledX(dst_x); + dst_y = g_renderer->EFBToScaledY(dst_y); + dst_width = g_renderer->EFBToScaledX(dst_width); + dst_height = g_renderer->EFBToScaledY(dst_height); + } + // If the source rectangle is outside of what we actually have in VRAM, skip the copy. // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates // to the graphics driver, which can cause GPU resets. - if (static_cast(src_x) >= entry->native_width || - static_cast(src_y) >= entry->native_height || - static_cast(dst_x) >= stitched_entry->native_width || - static_cast(dst_y) >= stitched_entry->native_height) + if (static_cast(src_x + src_width) > entry->GetWidth() || + static_cast(src_y + src_height) > entry->GetHeight() || + static_cast(dst_x + dst_width) > stitched_entry->GetWidth() || + static_cast(dst_y + dst_height) > stitched_entry->GetHeight()) { continue; } - u32 copy_width = std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x); - u32 copy_height = std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y); - - // If one of the textures is scaled, scale both with the current efb scaling factor - if (stitched_entry->native_width != stitched_entry->GetWidth() || - stitched_entry->native_height != stitched_entry->GetHeight() || - entry->native_width != entry->GetWidth() || entry->native_height != entry->GetHeight()) - { - ScaleTextureCacheEntryTo(stitched_entry, - g_renderer->EFBToScaledX(stitched_entry->native_width), - g_renderer->EFBToScaledY(stitched_entry->native_height)); - ScaleTextureCacheEntryTo(entry, g_renderer->EFBToScaledX(entry->native_width), - g_renderer->EFBToScaledY(entry->native_height)); - - src_x = g_renderer->EFBToScaledX(src_x); - src_y = g_renderer->EFBToScaledY(src_y); - dst_x = g_renderer->EFBToScaledX(dst_x); - dst_y = g_renderer->EFBToScaledY(dst_y); - copy_width = g_renderer->EFBToScaledX(copy_width); - copy_height = g_renderer->EFBToScaledY(copy_height); - } - MathUtil::Rectangle srcrect, dstrect; srcrect.left = src_x; srcrect.top = src_y; - srcrect.right = (src_x + copy_width); - srcrect.bottom = (src_y + copy_height); - + srcrect.right = (src_x + src_width); + srcrect.bottom = (src_y + src_height); dstrect.left = dst_x; dstrect.top = dst_y; - dstrect.right = (dst_x + copy_width); - dstrect.bottom = (dst_y + copy_height); + dstrect.right = (dst_x + dst_width); + dstrect.bottom = (dst_y + dst_height); - // If one copy is stereo, and the other isn't... not much we can do here :/ - const u32 layers_to_copy = std::min(entry->GetNumLayers(), stitched_entry->GetNumLayers()); - for (u32 layer = 0; layer < layers_to_copy; layer++) + // We may have to scale if one of the copies is not internal resolution. + if (srcrect.GetWidth() != dstrect.GetWidth() || srcrect.GetHeight() != dstrect.GetHeight()) { - stitched_entry->texture->CopyRectangleFromTexture(entry->texture.get(), srcrect, layer, 0, - dstrect, layer, 0); - } - updated_entry = true; - - if (tex_info.is_palette_texture) - { - // Remove the temporary converted texture, it won't be used anywhere else - // TODO: It would be nice to convert and copy in one step, but this code path isn't common - InvalidateTexture(GetTexCacheIter(entry)); + g_renderer->ScaleTexture(stitched_entry->framebuffer.get(), dstrect, entry->texture.get(), + srcrect); } else { - // Link the two textures together, so we won't apply this partial update again - entry->CreateReference(stitched_entry); - // Mark the texture update as used, as if it was loaded directly - entry->frameCount = FRAMECOUNT_INVALID; - } - } - - if (!updated_entry) - { - // Kinda annoying that we have to throw away the texture we just created, but with the above - // code requiring the TCacheEntry object exists, can't do much at the moment. - InvalidateTexture(GetTexCacheIter(stitched_entry)); - return nullptr; - } - - stitched_entry->texture->FinishedRendering(); - return stitched_entry; -} - -TextureCacheBase::TCacheEntry* -TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers) -{ - // create the entry/texture - const TextureConfig config(tex_info.native_width, tex_info.native_height, - tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8, - AbstractTextureFlag_RenderTarget); - TCacheEntry* entry = AllocateCacheEntry(config); - if (!entry) - return nullptr; - - textures_by_address.emplace(tex_info.address, entry); - if (tex_info.texture_cache_safety_color_sample_size == 0 || - std::max(tex_info.total_bytes, tex_info.palette_size) <= - (u32)tex_info.texture_cache_safety_color_sample_size * 8) - { - entry->textures_by_hash_iter = textures_by_hash.emplace(tex_info.full_hash, entry); - } - - entry->SetGeneralParameters(tex_info.address, tex_info.total_bytes, tex_info.full_format, false); - entry->SetDimensions(tex_info.native_width, tex_info.native_height, tex_info.computed_levels); - entry->SetHashes(tex_info.base_hash, tex_info.full_hash); - entry->is_custom_tex = false; - entry->memory_stride = entry->BytesPerRow(); - entry->SetNotCopy(); - - INCSTAT(stats.numTexturesUploaded); - SETSTAT(stats.numTexturesAlive, textures_by_address.size()); - - return entry; -} - -TextureCacheBase::TCacheEntry* -TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info) -{ - // We can decode on the GPU if it is a supported format and the flag is enabled. - // Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both - // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since - // there's no conversion between formats. In the future this could be extended with a separate - // shader, however. - const bool decode_on_gpu = - g_ActiveConfig.UseGPUTextureDecoding() && - !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8); - - // Since it's coming from RAM, it can only have one layer (no stereo). - TCacheEntry* entry = CreateNormalTexture(tex_info, 1); - entry->may_have_overlapping_textures = false; - LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu); - entry->texture->FinishedRendering(); - return entry; -} - -void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update, - const TextureLookupInformation& tex_info, - bool decode_on_gpu) -{ - const u8* tlut = &texMem[tex_info.tlut_address]; - - if (!decode_on_gpu || - !DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, - tex_info.full_format.texfmt, tex_info.native_width, - tex_info.native_height, tex_info.expanded_width, tex_info.expanded_height, - tex_info.bytes_per_block * - (tex_info.expanded_width / tex_info.block_width), - tlut, tex_info.full_format.tlutfmt)) - { - size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height; - CheckTempSize(decoded_texture_size); - if (!(tex_info.full_format.texfmt == TextureFormat::RGBA8 && tex_info.from_tmem)) - { - TexDecoder_Decode(temp, tex_info.src_data, tex_info.expanded_width, tex_info.expanded_height, - tex_info.full_format.texfmt, tlut, tex_info.full_format.tlutfmt); - } - else - { - u8* src_data_gb = &texMem[tex_info.tmem_address_odd]; - TexDecoder_DecodeRGBA8FromTmem(temp, tex_info.src_data, src_data_gb, tex_info.expanded_width, - tex_info.expanded_height); + // If one copy is stereo, and the other isn't... not much we can do here :/ + const u32 layers_to_copy = std::min(entry->GetNumLayers(), stitched_entry->GetNumLayers()); + for (u32 layer = 0; layer < layers_to_copy; layer++) + { + stitched_entry->texture->CopyRectangleFromTexture(entry->texture.get(), srcrect, layer, 0, + dstrect, layer, 0); + } } - entry_to_update->texture->Load(0, tex_info.native_width, tex_info.native_height, - tex_info.expanded_width, temp, decoded_texture_size); + // Link the two textures together, so we won't apply this partial update again + entry->CreateReference(stitched_entry); + + // Mark the texture update as used, as if it was loaded directly + entry->frameCount = FRAMECOUNT_INVALID; } } @@ -1746,8 +1608,8 @@ void TextureCacheBase::CopyRenderTargetToTexture( // For the latter, we keep the EFB resolution for the virtual XFB blit. u32 tex_w = width; u32 tex_h = height; - u32 scaled_tex_w = g_renderer->EFBToScaledX(srcRect.GetWidth()); - u32 scaled_tex_h = g_renderer->EFBToScaledY(srcRect.GetHeight()); + u32 scaled_tex_w = g_renderer->EFBToScaledX(width); + u32 scaled_tex_h = g_renderer->EFBToScaledY(height); if (scaleByHalf) { @@ -1930,7 +1792,11 @@ void TextureCacheBase::CopyRenderTargetToTexture( iter.first = InvalidateTexture(iter.first, true); continue; } - overlapping_entry->may_have_overlapping_textures = true; + + // We don't want to change the may_have_overlapping_textures flag on XFB container entries + // because otherwise they can't be re-used/updated, leaking textures for several frames. + if (!overlapping_entry->is_xfb_container) + overlapping_entry->may_have_overlapping_textures = true; // There are cases (Rogue Squadron 2 / Texas Holdem on Wiiware) where // for xfb copies the textures overlap which causes the hash of the first copy @@ -2566,6 +2432,7 @@ void TextureCacheBase::TCacheEntry::SetXfbCopy(u32 stride) { is_efb_copy = false; is_xfb_copy = true; + is_xfb_container = false; memory_stride = stride; ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small"); @@ -2577,6 +2444,7 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride) { is_efb_copy = true; is_xfb_copy = false; + is_xfb_container = false; memory_stride = stride; ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small"); @@ -2586,8 +2454,9 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride) void TextureCacheBase::TCacheEntry::SetNotCopy() { - is_xfb_copy = false; is_efb_copy = false; + is_xfb_copy = false; + is_xfb_container = false; } int TextureCacheBase::TCacheEntry::HashSampleSize() const diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index bd3ceab046..980b864aa1 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -77,42 +77,6 @@ struct EFBCopyFilterCoefficients float lower; }; -struct TextureLookupInformation -{ - u32 address; - - u32 block_width; - u32 block_height; - u32 bytes_per_block; - - u32 expanded_width; - u32 expanded_height; - u32 native_width; - u32 native_height; - u32 total_bytes; - u32 native_levels = 1; - u32 computed_levels; - - u64 base_hash; - u64 full_hash; - - TextureAndTLUTFormat full_format; - u32 tlut_address = 0; - - bool is_palette_texture = false; - u32 palette_size = 0; - - bool use_mipmaps = false; - - bool from_tmem = false; - u32 tmem_address_even = 0; - u32 tmem_address_odd = 0; - - int texture_cache_safety_color_sample_size = 0; // Default to safe hashing - - u8* src_data; -}; - class TextureCacheBase { private: @@ -138,6 +102,7 @@ public: // content, aren't just downscaled bool should_force_safe_hashing = false; // for XFB bool is_xfb_copy = false; + bool is_xfb_container = false; u64 id; bool reference_changed = false; // used by xfb to determine when a reference xfb changed @@ -243,20 +208,9 @@ public: TLUTFormat tlutfmt = TLUTFormat::IA8, bool use_mipmaps = false, u32 tex_levels = 1, bool from_tmem = false, u32 tmem_address_even = 0, u32 tmem_address_odd = 0); + TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride, + MathUtil::Rectangle* display_rect); - TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat texformat, - int textureCacheSafetyColorSampleSize); - std::optional - ComputeTextureInformation(u32 address, u32 width, u32 height, TextureFormat texformat, - int textureCacheSafetyColorSampleSize, bool from_tmem, - u32 tmem_address_even, u32 tmem_address_odd, u32 tlutaddr, - TLUTFormat tlutfmt, u32 levels); - TCacheEntry* GetXFBFromCache(const TextureLookupInformation& tex_info); - TCacheEntry* GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info); - TCacheEntry* GetTextureFromMemory(const TextureLookupInformation& tex_info); - TCacheEntry* CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers); - void LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update, - const TextureLookupInformation& tex_info, bool decode_on_gpu); virtual void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, @@ -322,10 +276,13 @@ private: void SetBackupConfig(const VideoConfig& config); + TCacheEntry* GetXFBFromCache(u32 address, u32 width, u32 height, u32 stride, u64 hash); + TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt); + void StitchXFBCopy(TCacheEntry* entry_to_update); void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary); void CheckTempSize(size_t required_size);