TextureCache: Simplify XFB reconstruction

This also better handles in-memory interlaced XFB data placed by the CPU by considering the stride from the VI.
2019-03-31 14:11:53 +10:00 · 2019-03-31 14:11:53 +10:00 · 708bd3d9f7
parent dbaba0062a
commit 708bd3d9f7
8 changed files with 228 additions and 427 deletions
--- a/Source/Core/Core/HW/VideoInterface.cpp
+++ b/Source/Core/Core/HW/VideoInterface.cpp
@ -686,6 +686,9 @@ static void BeginField(FieldType field, u64 ticks)
    xfbAddr = GetXFBAddressTop();
  }

+  // Multiply the stride by 2 to get the byte offset for each subsequent line.
+  fbStride *= 2;
+
  if (potentially_interlaced_xfb && interlaced_video_mode && g_ActiveConfig.bForceProgressive)
  {
    // Strictly speaking, in interlaced mode, we're only supposed to read
@ -704,10 +707,10 @@ static void BeginField(FieldType field, u64 ticks)
    // offset the xfb by (-stride_of_one_line) to get the start
    // address of the full xfb.
    if (field == FieldType::Odd && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB + 1 && xfbAddr)
-      xfbAddr -= fbStride * 2;
+      xfbAddr -= fbStride;

    if (field == FieldType::Even && m_VBlankTimingOdd.PRB == m_VBlankTimingEven.PRB - 1 && xfbAddr)
-      xfbAddr -= fbStride * 2;
+      xfbAddr -= fbStride;
  }

  LogField(field, xfbAddr);
@ -787,10 +790,8 @@ void Update(u64 ticks)
 }

 // Create a fake VI mode for a fifolog
-void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height)
+void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height)
 {
-  u32 fb_stride = fb_width;
-
  bool interlaced = fb_height > 480 / 2;
  if (interlaced)
  {
@ -807,7 +808,7 @@ void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height)
  m_VBlankTimingEven.PRB = 503 - fb_height * 2;
  m_VBlankTimingEven.PSB = 4;
  m_PictureConfiguration.WPL = fb_width / 16;
-  m_PictureConfiguration.STD = fb_stride / 16;
+  m_PictureConfiguration.STD = (fb_stride / 2) / 16;

  UpdateParameters();

--- a/Source/Core/Core/HW/VideoInterface.h
+++ b/Source/Core/Core/HW/VideoInterface.h
@ -374,6 +374,6 @@ u32 GetTicksPerField();
 float GetAspectRatio();

 // Create a fake VI mode for a fifolog
-void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_height);
+void FakeVIUpdate(u32 xfb_address, u32 fb_width, u32 fb_stride, u32 fb_height);

 }  // namespace VideoInterface
--- a/Source/Core/VideoCommon/AsyncRequests.cpp
+++ b/Source/Core/VideoCommon/AsyncRequests.cpp
@ -113,7 +113,6 @@ void AsyncRequests::SetEnable(bool enable)

 void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)
 {
-  EFBRectangle rc;
  switch (e.type)
  {
  case Event::EFB_POKE_COLOR:
@ -145,7 +144,7 @@ void AsyncRequests::HandleEvent(const AsyncRequests::Event& e)

  case Event::SWAP_EVENT:
    g_renderer->Swap(e.swap_event.xfbAddr, e.swap_event.fbWidth, e.swap_event.fbStride,
-                     e.swap_event.fbHeight, rc, e.time);
+                     e.swap_event.fbHeight, e.time);
    break;

  case Event::BBOX_READ:
--- a/Source/Core/VideoCommon/BPStructs.cpp
+++ b/Source/Core/VideoCommon/BPStructs.cpp
@ -312,14 +312,13 @@ static void BPWritten(const BPCmd& bp)
      if (g_ActiveConfig.bImmediateXFB)
      {
        // below div two to convert from bytes to pixels - it expects width, not stride
-        g_renderer->Swap(destAddr, destStride / 2, destStride / 2, height, srcRect,
-                         CoreTiming::GetTicks());
+        g_renderer->Swap(destAddr, destStride / 2, destStride, height, CoreTiming::GetTicks());
      }
      else
      {
        if (FifoPlayer::GetInstance().IsRunningWithFakeVideoInterfaceUpdates())
        {
-          VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), height);
+          VideoInterface::FakeVIUpdate(destAddr, srcRect.GetWidth(), destStride, height);
        }
      }
    }
--- a/Source/Core/VideoCommon/RenderBase.cpp
+++ b/Source/Core/VideoCommon/RenderBase.cpp
@ -1154,8 +1154,7 @@ void Renderer::EndUIFrame()
  BeginImGuiFrame();
 }

-void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
-                    u64 ticks)
+void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks)
 {
  const AspectMode suggested = g_ActiveConfig.suggested_aspect_mode;
  if (suggested == AspectMode::Analog || suggested == AspectMode::AnalogWide)
@ -1188,33 +1187,15 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
  // behind the renderer.
  FlushFrameDump();

-  if (xfbAddr && fbWidth && fbStride && fbHeight)
+  if (xfb_addr && fb_width && fb_stride && fb_height)
  {
-    constexpr int force_safe_texture_cache_hash = 0;
    // Get the current XFB from texture cache
-    auto* xfb_entry = g_texture_cache->GetXFBTexture(
-        xfbAddr, fbStride, fbHeight, TextureFormat::XFB, force_safe_texture_cache_hash);
-
+    MathUtil::Rectangle<int> xfb_rect;
+    const auto* xfb_entry =
+        g_texture_cache->GetXFBTexture(xfb_addr, fb_width, fb_height, fb_stride, &xfb_rect);
    if (xfb_entry && xfb_entry->id != m_last_xfb_id)
    {
-      const TextureConfig& texture_config = xfb_entry->texture->GetConfig();
-      m_last_xfb_texture = xfb_entry->texture.get();
      m_last_xfb_id = xfb_entry->id;
-      m_last_xfb_ticks = ticks;
-
-      auto xfb_rect = texture_config.GetRect();
-
-      // It's possible that the returned XFB texture is native resolution
-      // even when we're rendering at higher than native resolution
-      // if the XFB was was loaded entirely from console memory.
-      // If so, adjust the rectangle by native resolution instead of scaled resolution.
-      const u32 native_stride_width_difference = fbStride - fbWidth;
-      if (texture_config.width == xfb_entry->native_width)
-        xfb_rect.right -= native_stride_width_difference;
-      else
-        xfb_rect.right -= EFBToScaledX(native_stride_width_difference);
-
-      m_last_xfb_region = xfb_rect;

      // Since we use the common pipelines here and draw vertices if a batch is currently being
      // built by the vertex loader, we end up trampling over its pointer, as we share the buffer
@ -1247,7 +1228,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const

        // Update the window size based on the frame that was just rendered.
        // Due to depending on guest state, we need to call this every frame.
-        SetWindowSize(texture_config.width, texture_config.height);
+        SetWindowSize(xfb_rect.GetWidth(), xfb_rect.GetHeight());
      }

      m_fps_counter.Update();
@ -1259,7 +1240,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
      DolphinAnalytics::Instance()->ReportPerformanceInfo(std::move(perf_sample));

      if (IsFrameDumping())
-        DumpCurrentFrame();
+        DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks);

      // Begin new frame
      m_frame_count++;
@ -1295,8 +1276,8 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const
    }

    // Update our last xfb values
-    m_last_xfb_width = (fbStride < 1 || fbStride > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fbStride;
-    m_last_xfb_height = (fbHeight < 1 || fbHeight > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fbHeight;
+    m_last_xfb_width = (fb_width < 1 || fb_width > MAX_XFB_WIDTH) ? MAX_XFB_WIDTH : fb_width;
+    m_last_xfb_height = (fb_height < 1 || fb_height > MAX_XFB_HEIGHT) ? MAX_XFB_HEIGHT : fb_height;
  }
  else
  {
@ -1333,8 +1314,11 @@ bool Renderer::IsFrameDumping()
  return false;
 }

-void Renderer::DumpCurrentFrame()
+void Renderer::DumpCurrentFrame(const AbstractTexture* src_texture,
+                                const MathUtil::Rectangle<int>& src_rect, u64 ticks)
 {
+  int source_width = src_rect.GetWidth();
+  int source_height = src_rect.GetHeight();
  int target_width, target_height;
  if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless())
  {
@ -1344,22 +1328,20 @@ void Renderer::DumpCurrentFrame()
  }
  else
  {
-    std::tie(target_width, target_height) = CalculateOutputDimensions(
-        m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height);
+    std::tie(target_width, target_height) = CalculateOutputDimensions(source_width, source_height);
  }

  // We only need to render a copy if we need to stretch/scale the XFB copy.
-  const AbstractTexture* source_tex = m_last_xfb_texture;
-  MathUtil::Rectangle<int> source_rect = m_last_xfb_region;
-  if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height)
+  MathUtil::Rectangle<int> copy_rect = src_rect;
+  if (source_width != target_width || source_height != target_height)
  {
    if (!CheckFrameDumpRenderTexture(target_width, target_height))
      return;

-    source_tex = m_frame_dump_render_texture.get();
-    source_rect = MathUtil::Rectangle<int>(0, 0, target_width, target_height);
-    ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture,
-                 m_last_xfb_region);
+    ScaleTexture(m_frame_dump_render_framebuffer.get(), m_frame_dump_render_framebuffer->GetRect(),
+                 src_texture, src_rect);
+    src_texture = m_frame_dump_render_texture.get();
+    copy_rect = src_texture->GetRect();
  }

  // Index 0 was just sent to AVI dump. Swap with the second texture.
@ -1369,12 +1351,9 @@ void Renderer::DumpCurrentFrame()
  if (!CheckFrameDumpReadbackTexture(target_width, target_height))
    return;

-  const auto converted_region =
-      ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight());
-  m_frame_dump_readback_textures[0]->CopyFromTexture(
-      source_tex, converted_region, 0, 0,
-      MathUtil::Rectangle<int>(0, 0, target_width, target_height));
-  m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks);
+  m_frame_dump_readback_textures[0]->CopyFromTexture(src_texture, copy_rect, 0, 0,
+                                                     m_frame_dump_readback_textures[0]->GetRect());
+  m_last_frame_state = AVIDump::FetchState(ticks);
  m_last_frame_exported = true;
 }

--- a/Source/Core/VideoCommon/RenderBase.h
+++ b/Source/Core/VideoCommon/RenderBase.h
@ -213,8 +213,7 @@ public:
  virtual void WaitForGPUIdle() {}

  // Finish up the current frame, print some stats
-  void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc,
-            u64 ticks);
+  void Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u64 ticks);

  // Draws the specified XFB buffer to the screen, performing any post-processing.
  // Assumes that the backbuffer has already been bound and cleared.
@ -350,10 +349,7 @@ private:
  bool m_last_frame_exported = false;

  // Tracking of XFB textures so we don't render duplicate frames.
-  AbstractTexture* m_last_xfb_texture = nullptr;
  u64 m_last_xfb_id = std::numeric_limits<u64>::max();
-  u64 m_last_xfb_ticks = 0;
-  EFBRectangle m_last_xfb_region;

  // Note: Only used for auto-ir
  u32 m_last_xfb_width = MAX_XFB_WIDTH;
@ -377,7 +373,8 @@ private:
  bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height);

  // Fills the frame dump staging texture with the current XFB texture.
-  void DumpCurrentFrame();
+  void DumpCurrentFrame(const AbstractTexture* src_texture,
+                        const MathUtil::Rectangle<int>& src_rect, u64 ticks);

  // Asynchronously encodes the specified pointer of frame data to the frame dump.
  void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state);
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@ -471,6 +471,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
    }
    ++iter.first;
  }
+
  return entry_to_update;
 }

@ -952,7 +953,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
          entry->native_height == nativeH)
      {
        entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt);
-
+        entry->texture->FinishedRendering();
        return entry;
      }
    }
@ -1003,7 +1004,7 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
          entry->native_width == nativeW && entry->native_height == nativeH)
      {
        entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt);
-
+        entry->texture->FinishedRendering();
        return entry;
      }
      ++hash_iter;
@ -1209,142 +1210,115 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo
  return entry;
 }

-TextureCacheBase::TCacheEntry*
-TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat tex_format,
-                                int texture_cache_safety_color_sample_size)
+static void GetDisplayRectForXFBEntry(TextureCacheBase::TCacheEntry* entry, u32 width, u32 height,
+                                      MathUtil::Rectangle<int>* display_rect)
 {
-  auto tex_info = ComputeTextureInformation(address, width, height, tex_format,
-                                            texture_cache_safety_color_sample_size, false, 0, 0, 0,
-                                            TLUTFormat::IA8, 1);
-  if (!tex_info)
-    return nullptr;
+  // Scale the sub-rectangle to the full resolution of the texture.
+  display_rect->left = 0;
+  display_rect->top = 0;
+  display_rect->right = static_cast<int>(width * entry->GetWidth() / entry->native_width);
+  display_rect->bottom = static_cast<int>(height * entry->GetHeight() / entry->native_height);
+}

-  // Try a direct lookup by address/hash.
-  const TextureLookupInformation tex_info_value = tex_info.value();
-  TCacheEntry* entry = GetXFBFromCache(tex_info_value);
-  if (entry)
-    return entry;
-
-  // At this point, the XFB wasn't found in cache. This means the address is most likely not
-  // pointing at an xfb copy but instead an area of memory.  Let's attempt to stitch all entries in
-  // this memory space together
-  bool loaded_from_overlapping = true;
-  entry = GetTextureFromOverlappingTextures(tex_info_value);
-  if (!entry)
+TextureCacheBase::TCacheEntry*
+TextureCacheBase::GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
+                                MathUtil::Rectangle<int>* display_rect)
+{
+  const u8* src_data = Memory::GetPointer(address);
+  if (!src_data)
  {
-    // At this point, the xfb address is truly "bogus" it likely is an area of memory defined by the
-    // CPU, so load it from memory.
-    entry = GetTextureFromMemory(tex_info_value);
-    loaded_from_overlapping = false;
+    ERROR_LOG(VIDEO, "Trying to load XFB texture from invalid address 0x%8x", address);
+    return nullptr;
  }

+  // Compute total texture size. XFB textures aren't tiled, so this is simple.
+  const u32 total_size = height * stride;
+  const u64 hash = Common::GetHash64(src_data, total_size, 0);
+
+  // Do we currently have a version of this XFB copy in VRAM?
+  TCacheEntry* entry = GetXFBFromCache(address, width, height, stride, hash);
+  if (entry)
+  {
+    if (entry->is_xfb_container)
+    {
+      StitchXFBCopy(entry);
+      entry->texture->FinishedRendering();
+    }
+
+    GetDisplayRectForXFBEntry(entry, width, height, display_rect);
+    return entry;
+  }
+
+  // Create a new VRAM texture, and fill it with the data from guest RAM.
+  entry = AllocateCacheEntry(TextureConfig(width, height, 1, 1, 1, AbstractTextureFormat::RGBA8,
+                                           AbstractTextureFlag_RenderTarget));
+  entry->SetGeneralParameters(address, total_size,
+                              TextureAndTLUTFormat(TextureFormat::XFB, TLUTFormat::IA8), true);
+  entry->SetDimensions(width, height, 1);
+  entry->SetHashes(hash, hash);
+  entry->SetXfbCopy(stride);
+  entry->is_xfb_container = true;
+  entry->is_custom_tex = false;
+  entry->may_have_overlapping_textures = false;
+  entry->frameCount = FRAMECOUNT_INVALID;
+  if (!g_ActiveConfig.UseGPUTextureDecoding() ||
+      !DecodeTextureOnGPU(entry, 0, src_data, total_size, entry->format.texfmt, width, height,
+                          width, height, stride, texMem, entry->format.tlutfmt))
+  {
+    const u32 decoded_size = width * height * sizeof(u32);
+    CheckTempSize(decoded_size);
+    TexDecoder_DecodeXFB(temp, src_data, width, height, stride);
+    entry->texture->Load(0, width, height, width, temp, decoded_size);
+  }
+
+  // Stitch any VRAM copies into the new RAM copy.
+  StitchXFBCopy(entry);
+  entry->texture->FinishedRendering();
+
+  // Insert into the texture cache so we can re-use it next frame, if needed.
+  textures_by_address.emplace(entry->addr, entry);
+  SETSTAT(stats.numTexturesAlive, textures_by_address.size());
+  INCSTAT(stats.numTexturesUploaded);
+
  if (g_ActiveConfig.bDumpXFBTarget)
  {
    // While this isn't really an xfb copy, we can treat it as such
    // for dumping purposes
    static int xfb_count = 0;
-    const std::string xfb_type = loaded_from_overlapping ? "combined" : "from_memory";
-    entry->texture->Save(StringFromFormat("%sxfb_%s_%i.png",
+    entry->texture->Save(StringFromFormat("loaded_xfb_%i.png",
                                          File::GetUserPath(D_DUMPTEXTURES_IDX).c_str(),
-                                          xfb_type.c_str(), xfb_count++),
+                                          xfb_count++),
                         0);
  }

+  GetDisplayRectForXFBEntry(entry, width, height, display_rect);
  return entry;
 }

-std::optional<TextureLookupInformation> TextureCacheBase::ComputeTextureInformation(
-    u32 address, u32 width, u32 height, TextureFormat tex_format,
-    int texture_cache_safety_color_sample_size, bool from_tmem, u32 tmem_address_even,
-    u32 tmem_address_odd, u32 tlut_address, TLUTFormat tlut_format, u32 levels)
+TextureCacheBase::TCacheEntry* TextureCacheBase::GetXFBFromCache(u32 address, u32 width, u32 height,
+                                                                 u32 stride, u64 hash)
 {
-  TextureLookupInformation tex_info;
-
-  tex_info.from_tmem = from_tmem;
-  tex_info.tmem_address_even = tmem_address_even;
-  tex_info.tmem_address_odd = tmem_address_odd;
-
-  tex_info.address = address;
-
-  if (from_tmem)
-    tex_info.src_data = &texMem[tex_info.tmem_address_even];
-  else
-    tex_info.src_data = Memory::GetPointer(tex_info.address);
-
-  if (tex_info.src_data == nullptr)
-  {
-    ERROR_LOG(VIDEO, "Trying to use an invalid texture address 0x%8x", tex_info.address);
-    return {};
-  }
-
-  tex_info.texture_cache_safety_color_sample_size = texture_cache_safety_color_sample_size;
-
-  // TexelSizeInNibbles(format) * width * height / 16;
-  tex_info.block_width = TexDecoder_GetBlockWidthInTexels(tex_format);
-  tex_info.block_height = TexDecoder_GetBlockHeightInTexels(tex_format);
-
-  tex_info.bytes_per_block = (tex_info.block_width * tex_info.block_height *
-                              TexDecoder_GetTexelSizeInNibbles(tex_format)) /
-                             2;
-
-  tex_info.expanded_width = Common::AlignUp(width, tex_info.block_width);
-  tex_info.expanded_height = Common::AlignUp(height, tex_info.block_height);
-
-  tex_info.total_bytes = TexDecoder_GetTextureSizeInBytes(tex_info.expanded_width,
-                                                          tex_info.expanded_height, tex_format);
-
-  tex_info.native_width = width;
-  tex_info.native_height = height;
-  tex_info.native_levels = levels;
-
-  // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
-  // the mipmap chain
-  // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we
-  // limit the mipmap count to 6 there
-  tex_info.computed_levels = std::min<u32>(
-      IntLog2(std::max(tex_info.native_width, tex_info.native_height)) + 1, tex_info.native_levels);
-
-  tex_info.full_format = TextureAndTLUTFormat(tex_format, tlut_format);
-  tex_info.tlut_address = tlut_address;
-
-  // TODO: This doesn't hash GB tiles for preloaded RGBA8 textures (instead, it's hashing more data
-  // from the low tmem bank than it should)
-  tex_info.base_hash = Common::GetHash64(tex_info.src_data, tex_info.total_bytes,
-                                         tex_info.texture_cache_safety_color_sample_size);
-
-  tex_info.is_palette_texture = IsColorIndexed(tex_format);
-
-  if (tex_info.is_palette_texture)
-  {
-    tex_info.palette_size = TexDecoder_GetPaletteSize(tex_format);
-    tex_info.full_hash = tex_info.base_hash ^
-                         Common::GetHash64(&texMem[tex_info.tlut_address], tex_info.palette_size,
-                                           tex_info.texture_cache_safety_color_sample_size);
-  }
-  else
-  {
-    tex_info.full_hash = tex_info.base_hash;
-  }
-
-  return tex_info;
-}
-
-TextureCacheBase::TCacheEntry*
-TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info)
-{
-  auto iter_range = textures_by_address.equal_range(tex_info.address);
+  auto iter_range = textures_by_address.equal_range(address);
  TexAddrCache::iterator iter = iter_range.first;

  while (iter != iter_range.second)
  {
    TCacheEntry* entry = iter->second;

-    if ((entry->is_xfb_copy || entry->format.texfmt == TextureFormat::XFB) &&
-        entry->native_width == tex_info.native_width &&
-        entry->native_height == tex_info.native_height &&
-        entry->memory_stride == entry->BytesPerRow() && !entry->may_have_overlapping_textures)
+    // The only thing which has to match exactly is the stride. We can use a partial rectangle if
+    // the VI width/height differs from that of the XFB copy.
+    if (entry->is_xfb_copy && entry->memory_stride == stride && entry->native_width >= width &&
+        entry->native_height >= height && !entry->may_have_overlapping_textures)
    {
-      if (tex_info.base_hash == entry->hash && !entry->reference_changed)
+      // But if the dimensions do differ, we must compute the hash on the sub-rectangle.
+      u64 check_hash = hash;
+      if (entry->native_width != width || entry->native_height != height)
+      {
+        check_hash = Common::GetHash64(Memory::GetPointer(entry->addr),
+                                       entry->memory_stride * entry->native_height, 0);
+      }
+
+      if (entry->hash == check_hash && !entry->reference_changed)
      {
        return entry;
      }
@ -1364,37 +1338,36 @@ TextureCacheBase::GetXFBFromCache(const TextureLookupInformation& tex_info)
  return nullptr;
 }

-TextureCacheBase::TCacheEntry*
-TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info)
+void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
 {
-  u32 numBlocksX = tex_info.native_width / tex_info.block_width;
-
-  // XFBs created for the purpose of being a container for textures from memory
-  // or as a container for overlapping textures, never need to be combined
-  // with other textures
-  TCacheEntry* stitched_entry =
-      CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers());
-  stitched_entry->may_have_overlapping_textures = false;
-
-  // It is possible that some of the overlapping textures overlap each other.
-  // This behavior has been seen with XFB copies in Rogue Leader.
-  // To get the correct result, we apply the texture updates in the order the textures were
-  // originally loaded. This ensures that the parts of the texture that would have been overwritten
-  // in memory on real hardware get overwritten the same way here too.
-  // This should work, but it may be a better idea to keep track of partial XFB copy invalidations
-  // instead, which would reduce the amount of copying work here.
+  // It is possible that some of the overlapping textures overlap each other. This behavior has been
+  // seen with XFB copies in Rogue Leader. To get the correct result, we apply the texture updates
+  // in the order the textures were originally loaded. This ensures that the parts of the texture
+  // that would have been overwritten in memory on real hardware get overwritten the same way here
+  // too. This should work, but it may be a better idea to keep track of partial XFB copy
+  // invalidations instead, which would reduce the amount of copying work here.
  std::vector<TCacheEntry*> candidates;
+  bool create_upscaled_copy = false;

-  auto iter = FindOverlappingTextures(tex_info.address, tex_info.total_bytes);
+  auto iter = FindOverlappingTextures(stitched_entry->addr, stitched_entry->size_in_bytes);
  while (iter.first != iter.second)
  {
+    // Currently, this checks the stride of the VRAM copy against the VI request. Therefore, for
+    // interlaced modes, VRAM copies won't be considered candidates. This is okay for now, because
+    // our force progressive hack means that an XFB copy should always have a matching stride. If
+    // the hack is disabled, XFB2RAM should also be enabled. Should we wish to implement interlaced
+    // stitching in the future, this would require a shader which grabs every second line.
    TCacheEntry* entry = iter.first->second;
-    if (entry->IsCopy() && !entry->tmem_only &&
-        entry->OverlapsMemoryRange(tex_info.address, tex_info.total_bytes) &&
+    if (entry != stitched_entry && entry->IsCopy() && !entry->tmem_only &&
+        entry->OverlapsMemoryRange(stitched_entry->addr, stitched_entry->size_in_bytes) &&
        entry->memory_stride == stitched_entry->memory_stride)
    {
      if (entry->hash == entry->CalculateHash())
      {
+        // Can't check the height here because of Y scaling.
+        if (entry->native_width != entry->GetWidth())
+          create_upscaled_copy = true;
+
        candidates.emplace_back(entry);
      }
      else
@ -1407,101 +1380,94 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
    ++iter.first;
  }

+  if (candidates.empty())
+    return;
+
  std::sort(candidates.begin(), candidates.end(),
            [](const TCacheEntry* a, const TCacheEntry* b) { return a->id < b->id; });

-  bool updated_entry = false;
+  // We only upscale when necessary to preserve resolution. i.e. when there are upscaled partial
+  // copies to be stitched together.
+  if (create_upscaled_copy)
+  {
+    ScaleTextureCacheEntryTo(stitched_entry, g_renderer->EFBToScaledX(stitched_entry->native_width),
+                             g_renderer->EFBToScaledY(stitched_entry->native_height));
+  }
+
  for (TCacheEntry* entry : candidates)
  {
-    if (tex_info.is_palette_texture)
-    {
-      TCacheEntry* decoded_entry =
-          ApplyPaletteToEntry(entry, nullptr, tex_info.full_format.tlutfmt);
-      if (decoded_entry)
-      {
-        // Link the efb copy with the partially updated texture, so we won't apply this partial
-        // update again
-        entry->CreateReference(stitched_entry);
-        // Mark the texture update as used, as if it was loaded directly
-        entry->frameCount = FRAMECOUNT_INVALID;
-        entry = decoded_entry;
-      }
-      else
-      {
-        continue;
-      }
-    }
-
-    s32 src_x, src_y, dst_x, dst_y;
-
-    // Note for understanding the math:
-    // Normal textures can't be strided, so the 2 missing cases with src_x > 0 don't exist
+    int src_x, src_y, dst_x, dst_y;
    if (entry->addr >= stitched_entry->addr)
    {
-      s32 block_offset = (entry->addr - stitched_entry->addr) / tex_info.bytes_per_block;
-      s32 block_x = block_offset % numBlocksX;
-      s32 block_y = block_offset / numBlocksX;
+      int pixel_offset = (entry->addr - stitched_entry->addr) / 2;
      src_x = 0;
      src_y = 0;
-      dst_x = block_x * tex_info.block_width;
-      dst_y = block_y * tex_info.block_height;
+      dst_x = pixel_offset % stitched_entry->native_width;
+      dst_y = pixel_offset / stitched_entry->native_width;
    }
    else
    {
-      s32 srcNumBlocksX = entry->native_width / tex_info.block_width;
-      s32 block_offset = (stitched_entry->addr - entry->addr) / tex_info.bytes_per_block;
-      s32 block_x = block_offset % srcNumBlocksX;
-      s32 block_y = block_offset / srcNumBlocksX;
-      src_x = block_x * tex_info.block_width;
-      src_y = block_y * tex_info.block_height;
+      int pixel_offset = (stitched_entry->addr - entry->addr) / 2;
+      src_x = pixel_offset % entry->native_width;
+      src_y = pixel_offset / entry->native_width;
      dst_x = 0;
      dst_y = 0;
    }

+    const int native_width =
+        std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x);
+    const int native_height =
+        std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y);
+    int src_width = native_width;
+    int src_height = native_height;
+    int dst_width = native_width;
+    int dst_height = native_height;
+
+    // Scale to internal resolution.
+    if (entry->native_width != entry->GetWidth() || entry->native_height != entry->GetHeight())
+    {
+      src_x = g_renderer->EFBToScaledX(src_x);
+      src_y = g_renderer->EFBToScaledY(src_y);
+      src_width = g_renderer->EFBToScaledX(src_width);
+      src_height = g_renderer->EFBToScaledY(src_height);
+    }
+    if (create_upscaled_copy)
+    {
+      dst_x = g_renderer->EFBToScaledX(dst_x);
+      dst_y = g_renderer->EFBToScaledY(dst_y);
+      dst_width = g_renderer->EFBToScaledX(dst_width);
+      dst_height = g_renderer->EFBToScaledY(dst_height);
+    }
+
    // If the source rectangle is outside of what we actually have in VRAM, skip the copy.
    // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates
    // to the graphics driver, which can cause GPU resets.
-    if (static_cast<u32>(src_x) >= entry->native_width ||
-        static_cast<u32>(src_y) >= entry->native_height ||
-        static_cast<u32>(dst_x) >= stitched_entry->native_width ||
-        static_cast<u32>(dst_y) >= stitched_entry->native_height)
+    if (static_cast<u32>(src_x + src_width) > entry->GetWidth() ||
+        static_cast<u32>(src_y + src_height) > entry->GetHeight() ||
+        static_cast<u32>(dst_x + dst_width) > stitched_entry->GetWidth() ||
+        static_cast<u32>(dst_y + dst_height) > stitched_entry->GetHeight())
    {
      continue;
    }

-    u32 copy_width = std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x);
-    u32 copy_height = std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y);
-
-    // If one of the textures is scaled, scale both with the current efb scaling factor
-    if (stitched_entry->native_width != stitched_entry->GetWidth() ||
-        stitched_entry->native_height != stitched_entry->GetHeight() ||
-        entry->native_width != entry->GetWidth() || entry->native_height != entry->GetHeight())
-    {
-      ScaleTextureCacheEntryTo(stitched_entry,
-                               g_renderer->EFBToScaledX(stitched_entry->native_width),
-                               g_renderer->EFBToScaledY(stitched_entry->native_height));
-      ScaleTextureCacheEntryTo(entry, g_renderer->EFBToScaledX(entry->native_width),
-                               g_renderer->EFBToScaledY(entry->native_height));
-
-      src_x = g_renderer->EFBToScaledX(src_x);
-      src_y = g_renderer->EFBToScaledY(src_y);
-      dst_x = g_renderer->EFBToScaledX(dst_x);
-      dst_y = g_renderer->EFBToScaledY(dst_y);
-      copy_width = g_renderer->EFBToScaledX(copy_width);
-      copy_height = g_renderer->EFBToScaledY(copy_height);
-    }
-
    MathUtil::Rectangle<int> srcrect, dstrect;
    srcrect.left = src_x;
    srcrect.top = src_y;
-    srcrect.right = (src_x + copy_width);
-    srcrect.bottom = (src_y + copy_height);
-
+    srcrect.right = (src_x + src_width);
+    srcrect.bottom = (src_y + src_height);
    dstrect.left = dst_x;
    dstrect.top = dst_y;
-    dstrect.right = (dst_x + copy_width);
-    dstrect.bottom = (dst_y + copy_height);
+    dstrect.right = (dst_x + dst_width);
+    dstrect.bottom = (dst_y + dst_height);

+    // We may have to scale if one of the copies is not internal resolution.
+    if (srcrect.GetWidth() != dstrect.GetWidth() || srcrect.GetHeight() != dstrect.GetHeight())
+    {
+      g_renderer->ScaleTexture(stitched_entry->framebuffer.get(), dstrect, entry->texture.get(),
+                               srcrect);
+    }
+    else
+    {
      // If one copy is stereo, and the other isn't... not much we can do here :/
      const u32 layers_to_copy = std::min(entry->GetNumLayers(), stitched_entry->GetNumLayers());
      for (u32 layer = 0; layer < layers_to_copy; layer++)
@ -1509,118 +1475,14 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati
        stitched_entry->texture->CopyRectangleFromTexture(entry->texture.get(), srcrect, layer, 0,
                                                          dstrect, layer, 0);
      }
-    updated_entry = true;
-
-    if (tex_info.is_palette_texture)
-    {
-      // Remove the temporary converted texture, it won't be used anywhere else
-      // TODO: It would be nice to convert and copy in one step, but this code path isn't common
-      InvalidateTexture(GetTexCacheIter(entry));
    }
-    else
-    {
+
    // Link the two textures together, so we won't apply this partial update again
    entry->CreateReference(stitched_entry);
+
    // Mark the texture update as used, as if it was loaded directly
    entry->frameCount = FRAMECOUNT_INVALID;
  }
-  }
-
-  if (!updated_entry)
-  {
-    // Kinda annoying that we have to throw away the texture we just created, but with the above
-    // code requiring the TCacheEntry object exists, can't do much at the moment.
-    InvalidateTexture(GetTexCacheIter(stitched_entry));
-    return nullptr;
-  }
-
-  stitched_entry->texture->FinishedRendering();
-  return stitched_entry;
-}
-
-TextureCacheBase::TCacheEntry*
-TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers)
-{
-  // create the entry/texture
-  const TextureConfig config(tex_info.native_width, tex_info.native_height,
-                             tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8,
-                             AbstractTextureFlag_RenderTarget);
-  TCacheEntry* entry = AllocateCacheEntry(config);
-  if (!entry)
-    return nullptr;
-
-  textures_by_address.emplace(tex_info.address, entry);
-  if (tex_info.texture_cache_safety_color_sample_size == 0 ||
-      std::max(tex_info.total_bytes, tex_info.palette_size) <=
-          (u32)tex_info.texture_cache_safety_color_sample_size * 8)
-  {
-    entry->textures_by_hash_iter = textures_by_hash.emplace(tex_info.full_hash, entry);
-  }
-
-  entry->SetGeneralParameters(tex_info.address, tex_info.total_bytes, tex_info.full_format, false);
-  entry->SetDimensions(tex_info.native_width, tex_info.native_height, tex_info.computed_levels);
-  entry->SetHashes(tex_info.base_hash, tex_info.full_hash);
-  entry->is_custom_tex = false;
-  entry->memory_stride = entry->BytesPerRow();
-  entry->SetNotCopy();
-
-  INCSTAT(stats.numTexturesUploaded);
-  SETSTAT(stats.numTexturesAlive, textures_by_address.size());
-
-  return entry;
-}
-
-TextureCacheBase::TCacheEntry*
-TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info)
-{
-  // We can decode on the GPU if it is a supported format and the flag is enabled.
-  // Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
-  // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
-  // there's no conversion between formats. In the future this could be extended with a separate
-  // shader, however.
-  const bool decode_on_gpu =
-      g_ActiveConfig.UseGPUTextureDecoding() &&
-      !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8);
-
-  // Since it's coming from RAM, it can only have one layer (no stereo).
-  TCacheEntry* entry = CreateNormalTexture(tex_info, 1);
-  entry->may_have_overlapping_textures = false;
-  LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu);
-  entry->texture->FinishedRendering();
-  return entry;
-}
-
-void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update,
-                                                      const TextureLookupInformation& tex_info,
-                                                      bool decode_on_gpu)
-{
-  const u8* tlut = &texMem[tex_info.tlut_address];
-
-  if (!decode_on_gpu ||
-      !DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes,
-                          tex_info.full_format.texfmt, tex_info.native_width,
-                          tex_info.native_height, tex_info.expanded_width, tex_info.expanded_height,
-                          tex_info.bytes_per_block *
-                              (tex_info.expanded_width / tex_info.block_width),
-                          tlut, tex_info.full_format.tlutfmt))
-  {
-    size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height;
-    CheckTempSize(decoded_texture_size);
-    if (!(tex_info.full_format.texfmt == TextureFormat::RGBA8 && tex_info.from_tmem))
-    {
-      TexDecoder_Decode(temp, tex_info.src_data, tex_info.expanded_width, tex_info.expanded_height,
-                        tex_info.full_format.texfmt, tlut, tex_info.full_format.tlutfmt);
-    }
-    else
-    {
-      u8* src_data_gb = &texMem[tex_info.tmem_address_odd];
-      TexDecoder_DecodeRGBA8FromTmem(temp, tex_info.src_data, src_data_gb, tex_info.expanded_width,
-                                     tex_info.expanded_height);
-    }
-
-    entry_to_update->texture->Load(0, tex_info.native_width, tex_info.native_height,
-                                   tex_info.expanded_width, temp, decoded_texture_size);
-  }
 }

 EFBCopyFilterCoefficients
@ -1746,8 +1608,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(
  // For the latter, we keep the EFB resolution for the virtual XFB blit.
  u32 tex_w = width;
  u32 tex_h = height;
-  u32 scaled_tex_w = g_renderer->EFBToScaledX(srcRect.GetWidth());
-  u32 scaled_tex_h = g_renderer->EFBToScaledY(srcRect.GetHeight());
+  u32 scaled_tex_w = g_renderer->EFBToScaledX(width);
+  u32 scaled_tex_h = g_renderer->EFBToScaledY(height);

  if (scaleByHalf)
  {
@ -1930,6 +1792,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(
        iter.first = InvalidateTexture(iter.first, true);
        continue;
      }
+
+      // We don't want to change the may_have_overlapping_textures flag on XFB container entries
+      // because otherwise they can't be re-used/updated, leaking textures for several frames.
+      if (!overlapping_entry->is_xfb_container)
        overlapping_entry->may_have_overlapping_textures = true;

      // There are cases (Rogue Squadron 2 / Texas Holdem on Wiiware) where
@ -2566,6 +2432,7 @@ void TextureCacheBase::TCacheEntry::SetXfbCopy(u32 stride)
 {
  is_efb_copy = false;
  is_xfb_copy = true;
+  is_xfb_container = false;
  memory_stride = stride;

  ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small");
@ -2577,6 +2444,7 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)
 {
  is_efb_copy = true;
  is_xfb_copy = false;
+  is_xfb_container = false;
  memory_stride = stride;

  ASSERT_MSG(VIDEO, memory_stride >= BytesPerRow(), "Memory stride is too small");
@ -2586,8 +2454,9 @@ void TextureCacheBase::TCacheEntry::SetEfbCopy(u32 stride)

 void TextureCacheBase::TCacheEntry::SetNotCopy()
 {
-  is_xfb_copy = false;
  is_efb_copy = false;
+  is_xfb_copy = false;
+  is_xfb_container = false;
 }

 int TextureCacheBase::TCacheEntry::HashSampleSize() const
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@ -77,42 +77,6 @@ struct EFBCopyFilterCoefficients
  float lower;
 };

-struct TextureLookupInformation
-{
-  u32 address;
-
-  u32 block_width;
-  u32 block_height;
-  u32 bytes_per_block;
-
-  u32 expanded_width;
-  u32 expanded_height;
-  u32 native_width;
-  u32 native_height;
-  u32 total_bytes;
-  u32 native_levels = 1;
-  u32 computed_levels;
-
-  u64 base_hash;
-  u64 full_hash;
-
-  TextureAndTLUTFormat full_format;
-  u32 tlut_address = 0;
-
-  bool is_palette_texture = false;
-  u32 palette_size = 0;
-
-  bool use_mipmaps = false;
-
-  bool from_tmem = false;
-  u32 tmem_address_even = 0;
-  u32 tmem_address_odd = 0;
-
-  int texture_cache_safety_color_sample_size = 0;  // Default to safe hashing
-
-  u8* src_data;
-};
-
 class TextureCacheBase
 {
 private:
@ -138,6 +102,7 @@ public:
                                      // content, aren't just downscaled
    bool should_force_safe_hashing = false;  // for XFB
    bool is_xfb_copy = false;
+    bool is_xfb_container = false;
    u64 id;

    bool reference_changed = false;  // used by xfb to determine when a reference xfb changed
@ -243,20 +208,9 @@ public:
                          TLUTFormat tlutfmt = TLUTFormat::IA8, bool use_mipmaps = false,
                          u32 tex_levels = 1, bool from_tmem = false, u32 tmem_address_even = 0,
                          u32 tmem_address_odd = 0);
+  TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, u32 stride,
+                             MathUtil::Rectangle<int>* display_rect);

-  TCacheEntry* GetXFBTexture(u32 address, u32 width, u32 height, TextureFormat texformat,
-                             int textureCacheSafetyColorSampleSize);
-  std::optional<TextureLookupInformation>
-  ComputeTextureInformation(u32 address, u32 width, u32 height, TextureFormat texformat,
-                            int textureCacheSafetyColorSampleSize, bool from_tmem,
-                            u32 tmem_address_even, u32 tmem_address_odd, u32 tlutaddr,
-                            TLUTFormat tlutfmt, u32 levels);
-  TCacheEntry* GetXFBFromCache(const TextureLookupInformation& tex_info);
-  TCacheEntry* GetTextureFromOverlappingTextures(const TextureLookupInformation& tex_info);
-  TCacheEntry* GetTextureFromMemory(const TextureLookupInformation& tex_info);
-  TCacheEntry* CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers);
-  void LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_update,
-                                      const TextureLookupInformation& tex_info, bool decode_on_gpu);
  virtual void BindTextures();
  void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height,
                                 u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect,
@ -322,10 +276,13 @@ private:

  void SetBackupConfig(const VideoConfig& config);

+  TCacheEntry* GetXFBFromCache(u32 address, u32 width, u32 height, u32 stride, u64 hash);
+
  TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);

  TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
                                       TLUTFormat tlutfmt);
+  void StitchXFBCopy(TCacheEntry* entry_to_update);

  void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level, bool is_arbitrary);
  void CheckTempSize(size_t required_size);