GS: Add full (hash cache) texture preloading option

This commit is contained in:
Connor McLaughlin 2022-01-23 15:19:06 +10:00 committed by lightningterror
parent 6b2a851dec
commit 5d33af13ca
10 changed files with 344 additions and 186 deletions

View File

@ -133,7 +133,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.accurateDATE, "EmuCore/GS", "accurate_date", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.accurateDATE, "EmuCore/GS", "accurate_date", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.conservativeBufferAllocation, "EmuCore/GS", "conservative_framebuffer", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.conservativeBufferAllocation, "EmuCore/GS", "conservative_framebuffer", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gpuPaletteConversion, "EmuCore/GS", "paltex", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gpuPaletteConversion, "EmuCore/GS", "paltex", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.preloadTexture, "EmuCore/GS", "preload_texture", false); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.texturePreloading, "EmuCore/GS", "texture_preloading",
static_cast<int>(TexturePreloadingLevel::Off));
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// HW Renderer Fixes // HW Renderer Fixes

View File

@ -560,12 +560,12 @@
</item> </item>
</widget> </widget>
</item> </item>
<item row="8" column="0" colspan="2"> <item row="9" column="0" colspan="2">
<layout class="QGridLayout" name="basicCheckboxGridLayout"> <layout class="QGridLayout" name="basicCheckboxGridLayout">
<item row="0" column="0"> <item row="1" column="0">
<widget class="QCheckBox" name="accurateDATE"> <widget class="QCheckBox" name="gpuPaletteConversion">
<property name="text"> <property name="text">
<string>Accurate Destination Alpha Test</string> <string>GPU Palette Conversion</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -576,21 +576,14 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="0"> <item row="0" column="0">
<widget class="QCheckBox" name="gpuPaletteConversion"> <widget class="QCheckBox" name="accurateDATE">
<property name="text"> <property name="text">
<string>GPU Palette Conversion</string> <string>Accurate Destination Alpha Test</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="1"> <item row="1" column="1">
<widget class="QCheckBox" name="preloadTexture">
<property name="text">
<string>Preload Textures</string>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="enableHWFixes"> <widget class="QCheckBox" name="enableHWFixes">
<property name="text"> <property name="text">
<string>Enable Hardware Renderer Fixes</string> <string>Enable Hardware Renderer Fixes</string>
@ -599,6 +592,32 @@
</item> </item>
</layout> </layout>
</item> </item>
<item row="8" column="0">
<widget class="QLabel" name="label_20">
<property name="text">
<string>Texture Preloading:</string>
</property>
</widget>
</item>
<item row="8" column="1">
<widget class="QComboBox" name="texturePreloading">
<item>
<property name="text">
<string>None</string>
</property>
</item>
<item>
<property name="text">
<string>Partial</string>
</property>
</item>
<item>
<property name="text">
<string>Full (Hash Cache)</string>
</property>
</item>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<widget class="QGroupBox" name="hardwareFixesTab"> <widget class="QGroupBox" name="hardwareFixesTab">

View File

@ -179,6 +179,13 @@ enum class AccBlendLevel : u8
Ultra, Ultra,
}; };
enum class TexturePreloadingLevel : u8
{
Off,
Partial,
Full,
};
// Template function for casting enumerations to their underlying type // Template function for casting enumerations to their underlying type
template <typename Enumeration> template <typename Enumeration>
typename std::underlying_type<Enumeration>::type enum_cast(Enumeration E) typename std::underlying_type<Enumeration>::type enum_cast(Enumeration E)
@ -450,8 +457,7 @@ struct Pcsx2Config
SaveRT : 1, SaveRT : 1,
SaveFrame : 1, SaveFrame : 1,
SaveTexture : 1, SaveTexture : 1,
SaveDepth : 1, SaveDepth : 1;
PreloadTexture : 1;
}; };
}; };
@ -490,6 +496,7 @@ struct Pcsx2Config
AccBlendLevel AccurateBlendingUnit{AccBlendLevel::Basic}; AccBlendLevel AccurateBlendingUnit{AccBlendLevel::Basic};
CRCHackLevel CRCHack{CRCHackLevel::Automatic}; CRCHackLevel CRCHack{CRCHackLevel::Automatic};
BiFiltering TextureFiltering{BiFiltering::PS2}; BiFiltering TextureFiltering{BiFiltering::PS2};
TexturePreloadingLevel TexturePreloading{TexturePreloadingLevel::Off};
int Dithering{2}; int Dithering{2};
int MaxAnisotropy{0}; int MaxAnisotropy{0};
int SWExtraThreads{2}; int SWExtraThreads{2};

View File

@ -798,6 +798,7 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config)
// reload texture cache when trilinear filtering or mipmap options change // reload texture cache when trilinear filtering or mipmap options change
if (GSConfig.HWMipmap != old_config.HWMipmap || if (GSConfig.HWMipmap != old_config.HWMipmap ||
GSConfig.TexturePreloading != old_config.TexturePreloading ||
GSConfig.UserHacks_TriFilter != old_config.UserHacks_TriFilter || GSConfig.UserHacks_TriFilter != old_config.UserHacks_TriFilter ||
GSConfig.GPUPaletteConversion != old_config.GPUPaletteConversion) GSConfig.GPUPaletteConversion != old_config.GPUPaletteConversion)
{ {
@ -1208,6 +1209,10 @@ void GSApp::Init()
m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::PS2), "Trilinear", "")); m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::PS2), "Trilinear", ""));
m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::Forced), "Trilinear", "Ultra/Slow")); m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::Forced), "Trilinear", "Ultra/Slow"));
m_gs_texture_preloading.push_back(GSSetting(static_cast<u32>(TexturePreloadingLevel::Off), "None", "Default"));
m_gs_texture_preloading.push_back(GSSetting(static_cast<u32>(TexturePreloadingLevel::Partial), "Partial", ""));
m_gs_texture_preloading.push_back(GSSetting(static_cast<u32>(TexturePreloadingLevel::Full), "Full", "Hash Cache"));
m_gs_generic_list.push_back(GSSetting(-1, "Automatic", "Default")); m_gs_generic_list.push_back(GSSetting(-1, "Automatic", "Default"));
m_gs_generic_list.push_back(GSSetting(0, "Force-Disabled", "")); m_gs_generic_list.push_back(GSSetting(0, "Force-Disabled", ""));
m_gs_generic_list.push_back(GSSetting(1, "Force-Enabled", "")); m_gs_generic_list.push_back(GSSetting(1, "Force-Enabled", ""));
@ -1321,7 +1326,6 @@ void GSApp::Init()
m_default_configuration["paltex"] = "0"; m_default_configuration["paltex"] = "0";
m_default_configuration["png_compression_level"] = std::to_string(Z_BEST_SPEED); m_default_configuration["png_compression_level"] = std::to_string(Z_BEST_SPEED);
m_default_configuration["preload_frame_with_gs_data"] = "0"; m_default_configuration["preload_frame_with_gs_data"] = "0";
m_default_configuration["preload_texture"] = "0";
m_default_configuration["Renderer"] = std::to_string(static_cast<int>(GSRendererType::Auto)); m_default_configuration["Renderer"] = std::to_string(static_cast<int>(GSRendererType::Auto));
m_default_configuration["resx"] = "1024"; m_default_configuration["resx"] = "1024";
m_default_configuration["resy"] = "1024"; m_default_configuration["resy"] = "1024";
@ -1339,6 +1343,7 @@ void GSApp::Init()
m_default_configuration["shaderfx_conf"] = "shaders/GS_FX_Settings.ini"; m_default_configuration["shaderfx_conf"] = "shaders/GS_FX_Settings.ini";
m_default_configuration["shaderfx_glsl"] = "shaders/GS.fx"; m_default_configuration["shaderfx_glsl"] = "shaders/GS.fx";
m_default_configuration["skip_duplicate_frames"] = "0"; m_default_configuration["skip_duplicate_frames"] = "0";
m_default_configuration["texture_preloading"] = "0";
m_default_configuration["ThreadedPresentation"] = "0"; m_default_configuration["ThreadedPresentation"] = "0";
m_default_configuration["throttle_present_rate"] = "0"; m_default_configuration["throttle_present_rate"] = "0";
m_default_configuration["TVShader"] = "0"; m_default_configuration["TVShader"] = "0";

View File

@ -133,6 +133,7 @@ public:
std::vector<GSSetting> m_gs_dithering; std::vector<GSSetting> m_gs_dithering;
std::vector<GSSetting> m_gs_bifilter; std::vector<GSSetting> m_gs_bifilter;
std::vector<GSSetting> m_gs_trifilter; std::vector<GSSetting> m_gs_trifilter;
std::vector<GSSetting> m_gs_texture_preloading;
std::vector<GSSetting> m_gs_hack; std::vector<GSSetting> m_gs_hack;
std::vector<GSSetting> m_gs_generic_list; std::vector<GSSetting> m_gs_generic_list;
std::vector<GSSetting> m_gs_offset_hack; std::vector<GSSetting> m_gs_offset_hack;

View File

@ -110,14 +110,25 @@ static const GSVector2i default_rt_size(2048, 2048);
static const GSVector2i default_rt_size(0, 0); static const GSVector2i default_rt_size(0, 0);
#endif #endif
extern Pcsx2Config::GSOptions GSConfig;
// Maximum texture size to skip preload/hash path. // Maximum texture size to skip preload/hash path.
// This is the width/height from the registers, i.e. not the power of 2. // This is the width/height from the registers, i.e. not the power of 2.
__fi static bool CanCacheTextureSize(u32 tw, u32 th)
{
static constexpr u32 MAXIMUM_CACHE_SIZE = 10; // 1024
return (GSConfig.TexturePreloading == TexturePreloadingLevel::Full && tw <= MAXIMUM_CACHE_SIZE && th <= MAXIMUM_CACHE_SIZE);
}
__fi static bool CanPreloadTextureSize(u32 tw, u32 th) __fi static bool CanPreloadTextureSize(u32 tw, u32 th)
{ {
static constexpr u32 MAXIMUM_SIZE_IN_ONE_DIRECTION = 10; // 1024 static constexpr u32 MAXIMUM_SIZE_IN_ONE_DIRECTION = 10; // 1024
static constexpr u32 MAXIMUM_SIZE_IN_OTHER_DIRECTION = 8; // 256 static constexpr u32 MAXIMUM_SIZE_IN_OTHER_DIRECTION = 8; // 256
static constexpr u32 MAXIMUM_SIZE_IN_BOTH_DIRECTIONS = 9; // 512 static constexpr u32 MAXIMUM_SIZE_IN_BOTH_DIRECTIONS = 9; // 512
if (GSConfig.TexturePreloading < TexturePreloadingLevel::Partial)
return false;
// We use an area-based approach here. We want to hash long font maps, // We use an area-based approach here. We want to hash long font maps,
// like 128x1024 (used in FFX), but skip 1024x512 textures (e.g. Xenosaga). // like 128x1024 (used in FFX), but skip 1024x512 textures (e.g. Xenosaga).
const u32 max_dimension = (tw > th) ? tw : th; const u32 max_dimension = (tw > th) ? tw : th;

View File

@ -19,6 +19,8 @@
#include "GS/GSGL.h" #include "GS/GSGL.h"
#include "GS/GSIntrin.h" #include "GS/GSIntrin.h"
#include "GS/GSUtil.h" #include "GS/GSUtil.h"
#include "common/Align.h"
#include "common/HashCombine.h"
#define XXH_STATIC_LINKING_ONLY 1 #define XXH_STATIC_LINKING_ONLY 1
#define XXH_INLINE_ALL 1 #define XXH_INLINE_ALL 1
@ -26,6 +28,7 @@
bool GSTextureCache::m_disable_partial_invalidation = false; bool GSTextureCache::m_disable_partial_invalidation = false;
bool GSTextureCache::m_wrap_gs_mem = false; bool GSTextureCache::m_wrap_gs_mem = false;
u8* GSTextureCache::m_temp;
GSTextureCache::GSTextureCache(GSRenderer* r) GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r) : m_renderer(r)
@ -94,6 +97,10 @@ void GSTextureCache::RemoveAll()
m_dst[type].clear(); m_dst[type].clear();
} }
for (auto it : m_hash_cache)
g_gs_device->Recycle(it.second.texture);
m_hash_cache.clear();
m_palette_map.Clear(); m_palette_map.Clear();
} }
@ -155,7 +162,7 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
TEX0.TBP0, psm_str(psm)); TEX0.TBP0, psm_str(psm));
// Create a shared texture source // Create a shared texture source
src = new Source(m_renderer, TEX0, TEXA, m_temp, true); src = new Source(m_renderer, TEX0, TEXA, true);
src->m_texture = dst->m_texture; src->m_texture = dst->m_texture;
src->m_shared_texture = true; src->m_shared_texture = true;
src->m_target = true; // So renderer can check if a conversion is required src->m_target = true; // So renderer can check if a conversion is required
@ -746,7 +753,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
if (!s->m_target) if (!s->m_target)
{ {
if (m_disable_partial_invalidation && s->m_repeating) if (s->m_from_hash_cache || (m_disable_partial_invalidation && s->m_repeating))
{ {
m_src.RemoveAt(s); m_src.RemoveAt(s);
} }
@ -1119,6 +1126,21 @@ void GSTextureCache::IncAge()
} }
} }
const u32 max_hash_cache_age = 30;
for (auto it = m_hash_cache.begin(); it != m_hash_cache.end();)
{
HashCacheEntry& e = it->second;
if (e.refcount == 0 && ++e.age > max_hash_cache_age)
{
g_gs_device->Recycle(e.texture);
m_hash_cache.erase(it++);
}
else
{
++it;
}
}
m_src.m_used = false; m_src.m_used = false;
// Clearing of Rendertargets causes flickering in many scene transitions. // Clearing of Rendertargets causes flickering in many scene transitions.
@ -1165,7 +1187,7 @@ void GSTextureCache::IncAge()
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, bool mipmap) GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, bool mipmap)
{ {
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
Source* src = new Source(m_renderer, TEX0, TEXA, m_temp); Source* src = new Source(m_renderer, TEX0, TEXA, false);
int tw = 1 << TEX0.TW; int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH; int th = 1 << TEX0.TH;
@ -1426,7 +1448,37 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
} }
else else
{ {
if (GSConfig.GPUPaletteConversion && psm.pal > 0) // try the hash cache
if (!mipmap && CanCacheTextureSize(TEX0.TW, TEX0.TH))
{
const bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0);
const u32* clut = (!paltex && psm.pal > 0) ? static_cast<const u32*>(m_renderer->m_mem.m_clut) : nullptr;
const HashCacheKey key{ HashCacheKey::Create(TEX0, TEXA, m_renderer, clut) };
auto it = m_hash_cache.find(key);
if (it == m_hash_cache.end())
{
// hash and upload texture
src->m_texture = g_gs_device->CreateTexture(tw, th, paltex ? false : mipmap, paltex ? GSTexture::Format::UNorm8 : GSTexture::Format::Color);
PreloadTexture(TEX0, TEXA, m_renderer->m_mem, paltex, src->m_texture, 0);
// insert it into the hash cache
HashCacheEntry entry{ src->m_texture, 1, 0 };
it = m_hash_cache.emplace(key, entry).first;
}
else
{
// use existing texture
src->m_texture = it->second.texture;
it->second.refcount++;
}
src->m_from_hash_cache = &it->second;
if (psm.pal > 0)
AttachPaletteToSource(src, psm.pal, paltex);
}
else if (GSConfig.GPUPaletteConversion && psm.pal > 0)
{ {
src->m_texture = g_gs_device->CreateTexture(tw, th, false, GSTexture::Format::UNorm8); src->m_texture = g_gs_device->CreateTexture(tw, th, false, GSTexture::Format::UNorm8);
AttachPaletteToSource(src, psm.pal, true); AttachPaletteToSource(src, psm.pal, true);
@ -1452,7 +1504,7 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int
{ {
ASSERT(type == RenderTarget || type == DepthStencil); ASSERT(type == RenderTarget || type == DepthStencil);
Target* t = new Target(m_renderer, TEX0, m_temp, m_can_convert_depth, type); Target* t = new Target(m_renderer, TEX0, m_can_convert_depth, type);
// FIXME: initial data should be unswizzled from local mem in Update() if dirty // FIXME: initial data should be unswizzled from local mem in Update() if dirty
@ -1605,11 +1657,11 @@ void GSTextureCache::PrintMemoryUsage()
// GSTextureCache::Surface // GSTextureCache::Surface
GSTextureCache::Surface::Surface(GSRenderer* r, u8* temp) GSTextureCache::Surface::Surface(GSRenderer* r)
: m_renderer(r) : m_renderer(r)
, m_texture(NULL) , m_texture(NULL)
, m_from_hash_cache(NULL)
, m_age(0) , m_age(0)
, m_temp(temp)
, m_32_bits_fmt(false) , m_32_bits_fmt(false)
, m_shared_texture(false) , m_shared_texture(false)
, m_end_block(0) , m_end_block(0)
@ -1621,7 +1673,7 @@ GSTextureCache::Surface::~Surface()
{ {
// Shared textures are pointers copy. Therefore no allocation // Shared textures are pointers copy. Therefore no allocation
// to recycle. // to recycle.
if (!m_shared_texture && m_texture) if (!m_shared_texture && !m_from_hash_cache && m_texture)
g_gs_device->Recycle(m_texture); g_gs_device->Recycle(m_texture);
} }
@ -1647,8 +1699,8 @@ bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i
// GSTextureCache::Source // GSTextureCache::Source
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u8* temp, bool dummy_container) GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container)
: Surface(r, temp) : Surface(r)
, m_palette_obj(nullptr) , m_palette_obj(nullptr)
, m_palette(nullptr) , m_palette(nullptr)
, m_valid_rect(0, 0) , m_valid_rect(0, 0)
@ -1699,7 +1751,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int level)
{ {
Surface::UpdateAge(); Surface::UpdateAge();
if (m_target || (m_complete_layers & (1u << level))) if (m_target || m_from_hash_cache || (m_complete_layers & (1u << level)))
return; return;
if (CanPreload()) if (CanPreload())
@ -1906,28 +1958,6 @@ void GSTextureCache::Source::Flush(u32 count, int layer)
m_write.count -= count; m_write.count -= count;
} }
using BlockHashState = XXH3_state_t;
__fi static void BlockHashReset(BlockHashState& st)
{
XXH3_64bits_reset(&st);
}
__fi static void BlockHashAccumulate(BlockHashState& st, const u8* bp)
{
XXH3_64bits_update(&st, bp, BLOCK_SIZE);
}
__fi static void BlockHashAccumulate(BlockHashState& st, const u8* bp, u32 size)
{
XXH3_64bits_update(&st, bp, size);
}
__fi static GSTextureCache::Source::HashType FinishBlockHash(BlockHashState& st)
{
return XXH3_64bits_digest(&st);
}
void GSTextureCache::Source::PreloadLevel(int level) void GSTextureCache::Source::PreloadLevel(int level)
{ {
// m_TEX0 is adjusted for mips (messy, should be changed). // m_TEX0 is adjusted for mips (messy, should be changed).
@ -1936,42 +1966,8 @@ void GSTextureCache::Source::PreloadLevel(int level)
const int tw = 1 << m_TEX0.TW; const int tw = 1 << m_TEX0.TW;
const int th = 1 << m_TEX0.TH; const int th = 1 << m_TEX0.TH;
// For textures which are smaller than the block size, we expand and then hash.
// This is because otherwise we get the padding bytes, which can be random junk.
if (tw < bs.x || th < bs.y)
{
PreloadSmallLevel(level);
return;
}
// From GSLocalMemory foreachBlock(), used for reading textures.
// We want to hash the exact same blocks here.
const GSVector4i rect(0, 0, tw, th);
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
const GSOffset& off = m_renderer->m_context->offset.tex;
GSLocalMemory& mem = m_renderer->m_mem;
HashType hash;
{
BlockHashState hash_st;
BlockHashReset(hash_st);
GSOffset::BNHelper bn = off.bnMulti(block_rect.left, block_rect.top);
const int right = block_rect.right >> off.blockShiftX();
const int bottom = block_rect.bottom >> off.blockShiftY();
const int xAdd = (1 << off.blockShiftX()) * (psm.bpp / 8);
for (; bn.blkY() < bottom; bn.nextBlockY())
{
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
{
BlockHashAccumulate(hash_st, mem.BlockPtr(bn.value()));
}
}
hash = FinishBlockHash(hash_st);
}
// Layer is complete again, regardless of whether the hash matches or not (and we reupload). // Layer is complete again, regardless of whether the hash matches or not (and we reupload).
const HashType hash = HashTexture(m_renderer, m_TEX0, m_TEXA);
const u8 layer_bit = static_cast<u8>(1) << level; const u8 layer_bit = static_cast<u8>(1) << level;
m_complete_layers |= layer_bit; m_complete_layers |= layer_bit;
@ -1982,91 +1978,8 @@ void GSTextureCache::Source::PreloadLevel(int level)
m_valid_hashes |= layer_bit; m_valid_hashes |= layer_bit;
m_layer_hash[level] = hash; m_layer_hash[level] = hash;
// Expand texture/apply palette. // And upload the texture.
const int read_width = std::max(tw, psm.bs.x); PreloadTexture(m_TEX0, m_TEXA, m_renderer->m_mem, m_palette != nullptr, m_texture, level);
u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
u32 row_size = static_cast<u32>(tw) * sizeof(u32);
GSLocalMemory::readTexture rtx = psm.rtx;
if (m_palette)
{
pitch >>= 2;
row_size >>= 2;
rtx = psm.rtxP;
}
// If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer.
GSTexture::GSMap map;
if (rect.eq(block_rect) && m_texture->Map(map, &rect, level))
{
(m_renderer->m_mem.*rtx)(off, block_rect, map.bits, map.pitch, m_TEXA);
m_texture->Unmap();
}
else
{
u8* buff = m_temp;
(m_renderer->m_mem.*rtx)(off, block_rect, buff, pitch, m_TEXA);
m_texture->Update(rect, buff, pitch, level);
}
}
void GSTextureCache::Source::PreloadSmallLevel(int level)
{
// m_TEX0 is adjusted for mips (messy, should be changed).
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
const GSVector2i& bs = psm.bs;
const int tw = 1 << m_TEX0.TW;
const int th = 1 << m_TEX0.TH;
const GSVector4i rect(0, 0, tw, th);
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
const GSOffset& off = m_renderer->m_context->offset.tex;
GSLocalMemory& mem = m_renderer->m_mem;
// Expand texture/apply palette.
u32 pitch = static_cast<u32>(block_rect.z) * sizeof(u32);
u32 row_size = static_cast<u32>(tw) * sizeof(u32);
GSLocalMemory::readTexture rtx = psm.rtx;
if (m_palette)
{
pitch >>= 2;
row_size >>= 2;
rtx = psm.rtxP;
}
// Use temp buffer for expanding, since we may not need to update.
u8* buff = m_temp;
(m_renderer->m_mem.*rtx)(off, block_rect, buff, pitch, m_TEXA);
// Hash the expanded texture.
HashType hash;
{
u8* ptr = buff;
BlockHashState state;
BlockHashReset(state);
if (pitch == row_size)
{
BlockHashAccumulate(state, ptr, pitch * static_cast<u32>(th));
}
else
{
for (int y = 0; y < th; y++, ptr += pitch)
BlockHashAccumulate(state, ptr, row_size);
}
hash = FinishBlockHash(state);
}
// Layer is complete again, regardless of whether the hash matches or not (and we reupload).
const u8 layer_bit = static_cast<u8>(1) << level;
m_complete_layers |= layer_bit;
// Check whether the hash matches. Black textures will be 0, so check the valid bit.
if ((m_valid_hashes & layer_bit) && m_layer_hash[level] == hash)
return;
m_valid_hashes |= layer_bit;
m_layer_hash[level] = hash;
// Upload to GPU.
m_texture->Update(rect, buff, pitch, level);
} }
bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key) bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key)
@ -2076,8 +1989,8 @@ bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key)
// GSTextureCache::Target // GSTextureCache::Target
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, u8* temp, const bool depth_supported, const int type) GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, const bool depth_supported, const int type)
: Surface(r, temp) : Surface(r)
, m_type(type) , m_type(type)
, m_used(false) , m_used(false)
, m_depth_supported(depth_supported) , m_depth_supported(depth_supported)
@ -2219,7 +2132,16 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO
void GSTextureCache::SourceMap::RemoveAll() void GSTextureCache::SourceMap::RemoveAll()
{ {
for (auto s : m_surfaces) for (auto s : m_surfaces)
{
if (s->m_from_hash_cache)
{
pxAssert(s->m_from_hash_cache->refcount > 0);
if ((--s->m_from_hash_cache->refcount) == 0)
s->m_from_hash_cache->age = 0;
}
delete s; delete s;
}
m_surfaces.clear(); m_surfaces.clear();
@ -2250,6 +2172,13 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
}); });
} }
if (s->m_from_hash_cache)
{
pxAssert(s->m_from_hash_cache->refcount > 0);
if ((--s->m_from_hash_cache->refcount) == 0)
s->m_from_hash_cache->age = 0;
}
delete s; delete s;
} }
@ -2640,3 +2569,159 @@ bool GSTextureCache::SurfaceOffsetKeyEqual::operator()(const GSTextureCache::Sur
} }
return true; return true;
} }
using BlockHashState = XXH3_state_t;
__fi static void BlockHashReset(BlockHashState& st)
{
XXH3_64bits_reset(&st);
}
__fi static void BlockHashAccumulate(BlockHashState& st, const u8* bp)
{
XXH3_64bits_update(&st, bp, BLOCK_SIZE);
}
__fi static void BlockHashAccumulate(BlockHashState& st, const u8* bp, u32 size)
{
XXH3_64bits_update(&st, bp, size);
}
__fi static GSTextureCache::HashType FinishBlockHash(BlockHashState& st)
{
return XXH3_64bits_digest(&st);
}
GSTextureCache::HashType GSTextureCache::HashTexture(GSRenderer* renderer, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const GSVector2i& bs = psm.bs;
const int tw = 1 << TEX0.TW;
const int th = 1 << TEX0.TH;
// From GSLocalMemory foreachBlock(), used for reading textures.
// We want to hash the exact same blocks here.
const GSVector4i rect(0, 0, tw, th);
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
GSLocalMemory& mem = renderer->m_mem;
GSOffset off = mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
// For textures which are smaller than the block size, we expand and then hash.
// This is because otherwise we get the padding bytes, which can be random junk.
GSTextureCache::HashType hash;
BlockHashState hash_st;
if (tw < bs.x || th < bs.y)
{
// Expand texture indices. Align to 32 bytes for AVX2.
const u32 pitch = Common::AlignUpPow2(static_cast<u32>(block_rect.w), 32);
const u32 row_size = static_cast<u32>(tw);
const GSLocalMemory::readTexture rtx = psm.rtxP;
// Use temp buffer for expanding, since we may not need to update.
(renderer->m_mem.*rtx)(off, block_rect, m_temp, pitch, TEXA);
// Hash the expanded texture.
u8* ptr = m_temp;
BlockHashReset(hash_st);
if (pitch == row_size)
{
BlockHashAccumulate(hash_st, ptr, pitch * static_cast<u32>(th));
}
else
{
for (int y = 0; y < th; y++, ptr += pitch)
BlockHashAccumulate(hash_st, ptr, row_size);
}
hash = FinishBlockHash(hash_st);
}
else
{
BlockHashReset(hash_st);
GSOffset::BNHelper bn = off.bnMulti(block_rect.left, block_rect.top);
const int right = block_rect.right >> off.blockShiftX();
const int bottom = block_rect.bottom >> off.blockShiftY();
const int xAdd = (1 << off.blockShiftX()) * (psm.bpp / 8);
for (; bn.blkY() < bottom; bn.nextBlockY())
{
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
{
BlockHashAccumulate(hash_st, mem.BlockPtr(bn.value()));
}
}
hash = FinishBlockHash(hash_st);
}
return hash;
}
void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
{
// m_TEX0 is adjusted for mips (messy, should be changed).
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const GSVector2i& bs = psm.bs;
const int tw = 1 << TEX0.TW;
const int th = 1 << TEX0.TH;
// Expand texture/apply palette.
const GSVector4i rect(0, 0, tw, th);
const GSVector4i block_rect(rect.ralign<Align_Outside>(bs));
const GSOffset off(mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM));
const int read_width = std::max(tw, psm.bs.x);
u32 pitch = static_cast<u32>(read_width) * sizeof(u32);
u32 row_size = static_cast<u32>(tw) * sizeof(u32);
GSLocalMemory::readTexture rtx = psm.rtx;
if (paltex)
{
pitch >>= 2;
row_size >>= 2;
rtx = psm.rtxP;
}
// If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer.
GSTexture::GSMap map;
if (rect.eq(block_rect) && tex->Map(map, &rect, level))
{
(mem.*rtx)(off, block_rect, map.bits, map.pitch, TEXA);
tex->Unmap();
}
else
{
// Align pitch to 32 bytes for AVX2 if we're going through the temp buffer path.
pitch = Common::AlignUpPow2(pitch, 32);
u8* buff = m_temp;
(mem.*rtx)(off, block_rect, buff, pitch, TEXA);
tex->Update(rect, buff, pitch, level);
}
}
GSTextureCache::HashCacheKey::HashCacheKey()
: TEX0Hash(0)
, CLUTHash(0)
{
TEX0.U64 = 0;
TEXA.U64 = 0;
}
GSTextureCache::HashCacheKey GSTextureCache::HashCacheKey::Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSRenderer* renderer, const u32* clut)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
HashCacheKey ret;
ret.TEX0.U64 = TEX0.U64 & 0x00000007FFF00000ULL;
ret.TEXA.U64 = (psm.pal == 0 && psm.fmt > 0) ? (TEXA.U64 & 0x000000FF000080FFULL) : 0;
ret.CLUTHash = clut ? GSTextureCache::PaletteKeyHash{}({clut, psm.pal}) : 0;
ret.TEX0Hash = HashTexture(renderer, TEX0, TEXA);
return ret;
}
u64 GSTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& key) const
{
std::size_t h = 0;
HashCombine(h, key.TEX0Hash, key.CLUTHash, key.TEX0.U64, key.TEXA.U64);
return h;
}

View File

@ -40,6 +40,35 @@ public:
return valid && overlap; return valid && overlap;
} }
using HashType = u64;
struct HashCacheKey
{
HashType TEX0Hash, CLUTHash;
GIFRegTEX0 TEX0;
GIFRegTEXA TEXA;
HashCacheKey();
static HashCacheKey Create(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSRenderer* renderer, const u32* clut);
__fi bool operator==(const HashCacheKey& e) const { return std::memcmp(this, &e, sizeof(*this)) == 0; }
__fi bool operator!=(const HashCacheKey& e) const { return std::memcmp(this, &e, sizeof(*this)) != 0; }
__fi bool operator<(const HashCacheKey& e) const { return std::memcmp(this, &e, sizeof(*this)) < 0; }
};
struct HashCacheKeyHash
{
u64 operator()(const HashCacheKey& key) const;
};
struct HashCacheEntry
{
GSTexture* texture;
u32 refcount;
u32 age;
};
class Surface : public GSAlignedClass<32> class Surface : public GSAlignedClass<32>
{ {
protected: protected:
@ -47,16 +76,16 @@ public:
public: public:
GSTexture* m_texture; GSTexture* m_texture;
HashCacheEntry* m_from_hash_cache;
GIFRegTEX0 m_TEX0; GIFRegTEX0 m_TEX0;
GIFRegTEXA m_TEXA; GIFRegTEXA m_TEXA;
int m_age; int m_age;
u8* m_temp;
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
bool m_shared_texture; bool m_shared_texture;
u32 m_end_block; // Hint of the surface area. u32 m_end_block; // Hint of the surface area.
public: public:
Surface(GSRenderer* r, u8* temp); Surface(GSRenderer* r);
virtual ~Surface(); virtual ~Surface();
void UpdateAge(); void UpdateAge();
@ -111,10 +140,6 @@ public:
class Source : public Surface class Source : public Surface
{ {
public:
using HashType = u64;
private:
struct struct
{ {
GSVector4i* rect; GSVector4i* rect;
@ -122,7 +147,6 @@ public:
} m_write; } m_write;
void PreloadLevel(int level); void PreloadLevel(int level);
void PreloadSmallLevel(int level);
void Write(const GSVector4i& r, int layer); void Write(const GSVector4i& r, int layer);
void Flush(u32 count, int layer); void Flush(u32 count, int layer);
@ -149,10 +173,10 @@ public:
GSOffset::PageLooper m_pages; GSOffset::PageLooper m_pages;
public: public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u8* temp, bool dummy_container = false); Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool dummy_container = false);
virtual ~Source(); virtual ~Source();
__fi bool CanPreload() const { return (GSConfig.PreloadTexture && CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH)); } __fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); }
void Update(const GSVector4i& rect, int layer = 0); void Update(const GSVector4i& rect, int layer = 0);
void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0); void UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer = 0);
@ -171,7 +195,7 @@ public:
bool m_dirty_alpha; bool m_dirty_alpha;
public: public:
Target(GSRenderer* r, const GIFRegTEX0& TEX0, u8* temp, const bool depth_supported, const int type); Target(GSRenderer* r, const GIFRegTEX0& TEX0, const bool depth_supported, const int type);
void UpdateValidity(const GSVector4i& rect); void UpdateValidity(const GSVector4i& rect);
@ -250,9 +274,10 @@ protected:
GSRenderer* m_renderer; GSRenderer* m_renderer;
PaletteMap m_palette_map; PaletteMap m_palette_map;
SourceMap m_src; SourceMap m_src;
std::unordered_map<HashCacheKey, HashCacheEntry, HashCacheKeyHash> m_hash_cache;
FastList<Target*> m_dst[2]; FastList<Target*> m_dst[2];
bool m_preload_frame; bool m_preload_frame;
u8* m_temp; static u8* m_temp;
bool m_can_convert_depth; bool m_can_convert_depth;
bool m_cpu_fb_conversion; bool m_cpu_fb_conversion;
static bool m_disable_partial_invalidation; static bool m_disable_partial_invalidation;
@ -264,6 +289,9 @@ protected:
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, bool mipmap = false); Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, bool mipmap = false);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear); Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
static HashType HashTexture(GSRenderer* renderer, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
// TODO: virtual void Write(Target* t, const GSVector4i& r) = 0; // TODO: virtual void Write(Target* t, const GSVector4i& r) = 0;

View File

@ -285,7 +285,6 @@ RendererTab::RendererTab(wxWindow* parent)
auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq); auto* paltex_prereq = m_ui.addCheckBox(hw_checks_box, "GPU Palette Conversion", "paltex", IDC_PALTEX, hw_prereq);
auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; }; auto aniso_prereq = [this, paltex_prereq]{ return m_is_hardware && paltex_prereq->GetValue() == false; };
m_ui.addCheckBox(hw_checks_box, "Preload Textures", "preload_texture", IDC_PRELOAD_TEXTURES, hw_prereq);
auto* hw_choice_grid = new wxFlexGridSizer(2, space, space); auto* hw_choice_grid = new wxFlexGridSizer(2, space, space);
@ -296,6 +295,7 @@ RendererTab::RendererTab(wxWindow* parent)
m_ui.addComboBoxAndLabel(hw_choice_grid, "Mipmapping:", "mipmap_hw", &theApp.m_gs_hw_mipmapping, IDC_MIPMAP_HW, hw_prereq); m_ui.addComboBoxAndLabel(hw_choice_grid, "Mipmapping:", "mipmap_hw", &theApp.m_gs_hw_mipmapping, IDC_MIPMAP_HW, hw_prereq);
m_ui.addComboBoxAndLabel(hw_choice_grid, "CRC Hack Level:", "crc_hack_level", &theApp.m_gs_crc_level, IDC_CRC_LEVEL, hw_prereq); m_ui.addComboBoxAndLabel(hw_choice_grid, "CRC Hack Level:", "crc_hack_level", &theApp.m_gs_crc_level, IDC_CRC_LEVEL, hw_prereq);
m_ui.addComboBoxAndLabel(hw_choice_grid, "Blending Accuracy:", "accurate_blending_unit", &theApp.m_gs_acc_blend_level, IDC_ACCURATE_BLEND_UNIT, hw_prereq); m_ui.addComboBoxAndLabel(hw_choice_grid, "Blending Accuracy:", "accurate_blending_unit", &theApp.m_gs_acc_blend_level, IDC_ACCURATE_BLEND_UNIT, hw_prereq);
m_ui.addComboBoxAndLabel(hw_choice_grid, "Texture Preloading:", "texture_preloading", &theApp.m_gs_texture_preloading, IDC_PRELOAD_TEXTURES, hw_prereq);
hardware_box->Add(hw_checks_box, wxSizerFlags().Centre()); hardware_box->Add(hw_checks_box, wxSizerFlags().Centre());
hardware_box->AddSpacer(space); hardware_box->AddSpacer(space);

View File

@ -359,6 +359,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
OpEqu(AccurateBlendingUnit) && OpEqu(AccurateBlendingUnit) &&
OpEqu(CRCHack) && OpEqu(CRCHack) &&
OpEqu(TextureFiltering) && OpEqu(TextureFiltering) &&
OpEqu(TexturePreloading) &&
OpEqu(Dithering) && OpEqu(Dithering) &&
OpEqu(MaxAnisotropy) && OpEqu(MaxAnisotropy) &&
OpEqu(SWExtraThreads) && OpEqu(SWExtraThreads) &&
@ -510,7 +511,6 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingBoolEx(SaveFrame, "savef"); GSSettingBoolEx(SaveFrame, "savef");
GSSettingBoolEx(SaveTexture, "savet"); GSSettingBoolEx(SaveTexture, "savet");
GSSettingBoolEx(SaveDepth, "savez"); GSSettingBoolEx(SaveDepth, "savez");
GSSettingBoolEx(PreloadTexture, "preload_texture");
GSSettingIntEnumEx(InterlaceMode, "interlace"); GSSettingIntEnumEx(InterlaceMode, "interlace");
@ -523,6 +523,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings()
GSSettingIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit"); GSSettingIntEnumEx(AccurateBlendingUnit, "accurate_blending_unit");
GSSettingIntEnumEx(CRCHack, "crc_hack_level"); GSSettingIntEnumEx(CRCHack, "crc_hack_level");
GSSettingIntEnumEx(TextureFiltering, "filter"); GSSettingIntEnumEx(TextureFiltering, "filter");
GSSettingIntEnumEx(TexturePreloading, "texture_preloading");
GSSettingIntEx(Dithering, "dithering_ps2"); GSSettingIntEx(Dithering, "dithering_ps2");
GSSettingIntEx(MaxAnisotropy, "MaxAnisotropy"); GSSettingIntEx(MaxAnisotropy, "MaxAnisotropy");
GSSettingIntEx(SWExtraThreads, "extrathreads"); GSSettingIntEx(SWExtraThreads, "extrathreads");