VideoCommon: Changes to TextureCache to support decoding in backend

2016-11-27 18:14:59 +10:00 · 2016-11-27 18:14:59 +10:00 · 79ba946d70
parent 6ffc16d1b0
commit 79ba946d70
2 changed files with 70 additions and 16 deletions
--- a/Source/Core/VideoCommon/TextureCacheBase.cpp
+++ b/Source/Core/VideoCommon/TextureCacheBase.cpp
@ -110,7 +110,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
  if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples ||
      config.bTexFmtOverlayEnable != backup_config.texfmt_overlay ||
      config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center ||
-      config.bHiresTextures != backup_config.hires_textures)
+      config.bHiresTextures != backup_config.hires_textures ||
+      config.bEnableGPUTextureDecoding != backup_config.gpu_texture_decoding)
  {
    Invalidate();

@ -209,6 +210,7 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
  backup_config.cache_hires_textures = config.bCacheHiresTextures;
  backup_config.stereo_3d = config.iStereoMode > 0;
  backup_config.efb_mono_depth = config.bStereoEFBMonoDepth;
+  backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
 }

 TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry,
@ -526,6 +528,7 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)

  const u32 texture_size =
      TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
+  u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
  u32 additional_mips_size = 0;  // not including level 0, which is texture_size

  // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
@ -755,6 +758,17 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
  // how many levels the allocated texture shall have
  const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;

+  // We can decode on the GPU if it is a supported format and the flag is enabled.
+  // Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
+  // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
+  // there's no conversion between formats. In the future this could be extended with a separate
+  // shader, however.
+  bool decode_on_gpu =
+      !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
+      g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
+                                                static_cast<TlutFormat>(tlutfmt)) &&
+      !(from_tmem && texformat == GX_TF_RGBA8);
+
  // create the entry/texture
  TCacheEntryConfig config;
  config.width = width;
@ -768,10 +782,19 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
    return nullptr;

  if (!hires_tex)
+  {
+    const u8* tlut = &texMem[tlutaddr];
+    if (decode_on_gpu)
+    {
+      u32 row_stride = bytes_per_block * (expandedWidth / bsw);
+      g_texture_cache->DecodeTextureOnGPU(
+          entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
+          expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
+    }
+    else
    {
      if (!(texformat == GX_TF_RGBA8 && from_tmem))
      {
-      const u8* tlut = &texMem[tlutaddr];
        TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
                          (TlutFormat)tlutfmt);
      }
@ -781,6 +804,9 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
            &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
        TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
      }
+
+      entry->Load(temp, width, height, expandedWidth, 0);
+    }
  }

  iter = textures_by_address.emplace(address, entry);
@ -797,9 +823,6 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
  entry->is_efb_copy = false;
  entry->is_custom_tex = hires_tex != nullptr;

-  // load texture
-  entry->Load(temp, width, height, expandedWidth, 0);
-
  std::string basename = "";
  if (g_ActiveConfig.bDumpTextures && !hires_tex)
  {
@ -840,13 +863,26 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
      const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);

      const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
+      size_t mip_size =
+          TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
      const u8* tlut = &texMem[tlutaddr];
+
+      if (decode_on_gpu)
+      {
+        u32 row_stride = bytes_per_block * (mip_width / bsw);
+        g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
+                                            static_cast<TextureFormat>(texformat), mip_width,
+                                            mip_height, expanded_mip_width, expanded_mip_height,
+                                            row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
+      }
+      else
+      {
        TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
                          tlut, (TlutFormat)tlutfmt);
-      mip_src_data +=
-          TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
-
        entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
+      }
+
+      mip_src_data += mip_size;

      if (g_ActiveConfig.bDumpTextures)
        DumpTexture(entry, basename, level);
--- a/Source/Core/VideoCommon/TextureCacheBase.h
+++ b/Source/Core/VideoCommon/TextureCacheBase.h
@ -171,6 +171,23 @@ public:
  virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
                              TlutFormat format) = 0;

+  // Returns true if the texture data and palette formats are supported by the GPU decoder.
+  virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
+  {
+    return false;
+  }
+
+  // Decodes the specified data to the GPU texture specified by entry.
+  // width, height are the size of the image in pixels.
+  // aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
+  // row_stride is the number of bytes for a row of blocks, not pixels.
+  virtual void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
+                                  size_t data_size, TextureFormat format, u32 width, u32 height,
+                                  u32 aligned_width, u32 aligned_height, u32 row_stride,
+                                  const u8* palette, TlutFormat palette_format)
+  {
+  }
+
 protected:
  TextureCacheBase();

@ -225,6 +242,7 @@ private:
    bool copy_cache_enable;
    bool stereo_3d;
    bool efb_mono_depth;
+    bool gpu_texture_decoding;
  };
  BackupConfig backup_config = {};
 };