VideoCommon: Changes to TextureCache to support decoding in backend

This commit is contained in:
Stenzek 2016-11-27 18:14:59 +10:00
parent 6ffc16d1b0
commit 79ba946d70
2 changed files with 70 additions and 16 deletions

View File

@ -110,7 +110,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples ||
config.bTexFmtOverlayEnable != backup_config.texfmt_overlay ||
config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center ||
config.bHiresTextures != backup_config.hires_textures)
config.bHiresTextures != backup_config.hires_textures ||
config.bEnableGPUTextureDecoding != backup_config.gpu_texture_decoding)
{
Invalidate();
@ -209,6 +210,7 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
backup_config.cache_hires_textures = config.bCacheHiresTextures;
backup_config.stereo_3d = config.iStereoMode > 0;
backup_config.efb_mono_depth = config.bStereoEFBMonoDepth;
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
}
TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry,
@ -526,6 +528,7 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 texture_size =
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
u32 additional_mips_size = 0; // not including level 0, which is texture_size
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
@ -755,6 +758,17 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
// how many levels the allocated texture shall have
const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;
// We can decode on the GPU if it is a supported format and the flag is enabled.
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu =
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
static_cast<TlutFormat>(tlutfmt)) &&
!(from_tmem && texformat == GX_TF_RGBA8);
// create the entry/texture
TCacheEntryConfig config;
config.width = width;
@ -768,10 +782,19 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
return nullptr;
if (!hires_tex)
{
const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{
if (!(texformat == GX_TF_RGBA8 && from_tmem))
{
const u8* tlut = &texMem[tlutaddr];
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
(TlutFormat)tlutfmt);
}
@ -781,6 +804,9 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
}
entry->Load(temp, width, height, expandedWidth, 0);
}
}
iter = textures_by_address.emplace(address, entry);
@ -797,9 +823,6 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
entry->is_efb_copy = false;
entry->is_custom_tex = hires_tex != nullptr;
// load texture
entry->Load(temp, width, height, expandedWidth, 0);
std::string basename = "";
if (g_ActiveConfig.bDumpTextures && !hires_tex)
{
@ -840,13 +863,26 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
size_t mip_size =
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
static_cast<TextureFormat>(texformat), mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, (TlutFormat)tlutfmt);
mip_src_data +=
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
}
mip_src_data += mip_size;
if (g_ActiveConfig.bDumpTextures)
DumpTexture(entry, basename, level);

View File

@ -171,6 +171,23 @@ public:
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
TlutFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
return false;
}
// Decodes the specified data to the GPU texture specified by entry.
// width, height are the size of the image in pixels.
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
// row_stride is the number of bytes for a row of blocks, not pixels.
virtual void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
}
protected:
TextureCacheBase();
@ -225,6 +242,7 @@ private:
bool copy_cache_enable;
bool stereo_3d;
bool efb_mono_depth;
bool gpu_texture_decoding;
};
BackupConfig backup_config = {};
};