diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3d32b949c..81f606e84 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -1545,8 +1545,8 @@ bool GPU_HW::CompilePipelines(Error* error) // VRAM write replacement { - std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateCopyFragmentShader(), error); + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMReplacementBlitFragmentShader(), error); if (!fs) return false; @@ -2924,41 +2924,14 @@ void GPU_HW::LoadVertices() } } -bool GPU_HW::BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, - u32 width, u32 height) +bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height) { - if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || - m_vram_replacement_texture->GetHeight() < tex->GetHeight() || g_gpu_device->GetFeatures().prefer_unused_textures) - { - g_gpu_device->RecycleTexture(std::move(m_vram_replacement_texture)); - - if (!(m_vram_replacement_texture = g_gpu_device->FetchTexture( - tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, - GPUTexture::Flags::None, tex->GetPixels(), tex->GetPitch()))) - { - return false; - } - } - else - { - if (!m_vram_replacement_texture->Update(0, 0, tex->GetWidth(), tex->GetHeight(), tex->GetPixels(), tex->GetPitch())) - { - ERROR_LOG("Update {}x{} texture failed.", width, height); - return false; - } - } - GL_SCOPE_FMT("BlitVRAMReplacementTexture() {}x{} to {},{} => {},{} ({}x{})", tex->GetWidth(), tex->GetHeight(), dst_x, dst_y, dst_x + width, dst_y + height, width, height); - const float src_rect[4] = { - 0.0f, 0.0f, static_cast(tex->GetWidth()) / static_cast(m_vram_replacement_texture->GetWidth()), - static_cast(tex->GetHeight()) / static_cast(m_vram_replacement_texture->GetHeight())}; - - g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler()); + g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler()); g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get()); g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height); - g_gpu_device->PushUniformBuffer(src_rect, sizeof(src_rect)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -3381,7 +3354,7 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b } else { - const GPUTextureCache::TextureReplacementImage* rtex = GPUTextureCache::GetVRAMReplacement(width, height, data); + GPUTexture* rtex = GPUTextureCache::GetVRAMReplacement(width, height, data); if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale)) { diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 53fb6da01..77595b5ba 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -228,8 +228,7 @@ private: void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds); - bool BlitVRAMReplacementTexture(const GPUTextureCache::TextureReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height); + bool BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); /// Expands a line into two triangles. void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth); @@ -259,7 +258,6 @@ private: std::unique_ptr m_vram_read_texture; std::unique_ptr m_vram_readback_texture; std::unique_ptr m_vram_readback_download_texture; - std::unique_ptr m_vram_replacement_texture; std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 64cdcb1ef..5179c5ed5 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1284,6 +1284,22 @@ float3 SampleVRAM24(uint2 icoords) return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateVRAMReplacementBlitFragmentShader() const +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1); + + ss << R"( +{ + o_col0 = SAMPLE_TEXTURE(samp0, v_tex0); +} +)"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateWireframeGeometryShader() const { std::stringstream ss; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 8894e484b..e26228fb3 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -32,6 +32,7 @@ public: std::string GenerateVRAMUpdateDepthFragmentShader(bool msaa) const; std::string GenerateVRAMExtractFragmentShader(u32 resolution_scale, u32 multisamples, bool color_24bit, bool depth_buffer) const; + std::string GenerateVRAMReplacementBlitFragmentShader() const; std::string GenerateAdaptiveDownsampleVertexShader() const; std::string GenerateAdaptiveDownsampleMipFragmentShader() const; diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 3c06e6f3f..d76e2f9e1 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -16,6 +16,7 @@ #include "common/error.h" #include "common/file_system.h" #include "common/gsvector_formatter.h" +#include "common/heterogeneous_containers.h" #include "common/log.h" #include "common/path.h" #include "common/string_util.h" @@ -128,7 +129,7 @@ struct TextureReplacementSubImage { GSVector4i dst_rect; GSVector4i src_rect; - const TextureReplacementImage& image; + GPUTexture* texture; float scale_x; float scale_y; bool invert_alpha; @@ -229,7 +230,8 @@ struct DumpedTextureKeyHash } // namespace using HashCache = std::unordered_map; -using TextureCache = std::unordered_map; +using ReplacementImageCache = PreferUnorderedStringMap; +using GPUReplacementImageCache = PreferUnorderedStringMap, u32>>; using VRAMReplacementMap = std::unordered_map; using TextureReplacementMap = @@ -304,6 +306,8 @@ static void LoadTextureReplacementAliases(const ryml::ConstNodeRef& root, bool l bool load_texture_replacement_aliases); static const TextureReplacementImage* GetTextureReplacementImage(const std::string& path); +static GPUTexture* GetTextureReplacementGPUImage(const std::string& path); +static void CompactTextureReplacementGPUImages(); static void PreloadReplacementTextures(); static void PurgeUnreferencedTexturesFromCache(); @@ -529,7 +533,10 @@ struct GPUTextureCacheState TextureReplacementMap texture_page_texture_replacements; // TODO: Check the size, purge some when it gets too large. - TextureCache replacement_image_cache; + ReplacementImageCache replacement_image_cache; + GPUReplacementImageCache gpu_replacement_image_cache; + size_t gpu_replacement_image_cache_vram_usage = 0; + std::vector> gpu_replacement_image_cache_purge_list; std::unordered_set dumped_vram_writes; std::unordered_set dumped_textures; @@ -744,10 +751,18 @@ void GPUTextureCache::Shutdown() ClearHashCache(); DestroyPipelines(); s_state.replacement_texture_render_target.reset(); + s_state.gpu_replacement_image_cache_purge_list = {}; s_state.hash_cache_purge_list = {}; s_state.temp_vram_write_list = {}; s_state.track_vram_writes = false; + for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end();) + { + g_gpu_device->RecycleTexture(std::move(it->second.first)); + it = s_state.gpu_replacement_image_cache.erase(it); + } + s_state.gpu_replacement_image_cache_vram_usage = 0; + s_state.replacement_image_cache.clear(); s_state.vram_replacements.clear(); s_state.vram_write_texture_replacements.clear(); @@ -2150,6 +2165,8 @@ void GPUTextureCache::Compact() RemoveFromHashCache(s_state.hash_cache_purge_list[purge_index++].first); } } + + CompactTextureReplacementGPUImages(); } size_t GPUTextureCache::HashCacheKeyHash::operator()(const HashCacheKey& k) const @@ -2463,8 +2480,7 @@ void GPUTextureCache::SetGameID(std::string game_id) ReloadTextureReplacements(false); } -const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, - const void* pixels) +GPUTexture* GPUTextureCache::GetVRAMReplacement(u32 width, u32 height, const void* pixels) { const VRAMReplacementName hash = GetVRAMWriteHash(width, height, pixels); @@ -2472,7 +2488,7 @@ const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetVRAMReplacem if (it == s_state.vram_replacements.end()) return nullptr; - return GetTextureReplacementImage(it->second); + return GetTextureReplacementGPUImage(it->second); } bool GPUTextureCache::ShouldDumpVRAMWrite(u32 width, u32 height) @@ -2675,12 +2691,7 @@ void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vectorsecond.first)) continue; - const TextureReplacementImage* image = GetTextureReplacementImage(it->second.second); - if (!image) - continue; - const TextureReplacementName& name = it->second.first; - const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); const GSVector4i rect_in_write_space = name.GetDestRect(); const GSVector4i rect_in_page_space = rect_in_write_space.sub32(offset_to_page_v); @@ -2700,7 +2711,12 @@ void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vector(TEXTURE_PAGE_WIDTH)); DebugAssert(rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second); + if (!texture) + continue; + + const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec()); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -2755,12 +2771,12 @@ void GPUTextureCache::GetTexturePageTextureReplacements(std::vectorsecond.second); - if (!image) + GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second); + if (!texture) continue; - const GSVector2 scale = GSVector2(GSVector2i(image->GetWidth(), image->GetHeight())) / GSVector2(name.GetSizeVec()); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), *image, scale.x, scale.y, + const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec()); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -2986,6 +3002,95 @@ const GPUTextureCache::TextureReplacementImage* GPUTextureCache::GetTextureRepla return &it->second; } +GPUTexture* GPUTextureCache::GetTextureReplacementGPUImage(const std::string& path) +{ + // Already in cache? + const auto git = s_state.gpu_replacement_image_cache.find(path); + if (git != s_state.gpu_replacement_image_cache.end()) + { + git->second.second = System::GetFrameNumber(); + return git->second.first.get(); + } + + // Need to upload it. + Error error; + std::unique_ptr tex; + + // Check CPU cache first. + const auto it = s_state.replacement_image_cache.find(path); + if (it != s_state.replacement_image_cache.end()) + { + tex = g_gpu_device->FetchAndUploadTextureImage(it->second, GPUTexture::Flags::None, &error); + } + else + { + // Need to load it. + Image cpu_image; + if (cpu_image.LoadFromFile(path.c_str(), &error)) + tex = g_gpu_device->FetchAndUploadTextureImage(cpu_image, GPUTexture::Flags::None, &error); + } + + if (!tex) + { + ERROR_LOG("Failed to load/upload '{}': {}", Path::GetFileName(path), error.GetDescription()); + return nullptr; + } + + const size_t vram_usage = tex->GetVRAMUsage(); + s_state.gpu_replacement_image_cache_vram_usage += vram_usage; + + VERBOSE_LOG("Uploaded '{}': {}x{} {} {:.2f} KB", Path::GetFileName(path), tex->GetWidth(), tex->GetHeight(), + GPUTexture::GetFormatName(tex->GetFormat()), static_cast(vram_usage) / 1024.0f); + + return s_state.gpu_replacement_image_cache.emplace(path, std::make_pair(std::move(tex), System::GetFrameNumber())) + .first->second.first.get(); +} + +void GPUTextureCache::CompactTextureReplacementGPUImages() +{ + // Instead of compacting to exactly the maximum, let's go down to the maximum less 16MB. + // That way we can hopefully avoid compacting again for a few frames. + static constexpr size_t EXTRA_COMPACT_SIZE = 16 * 1024 * 1024; + + const size_t max_usage = static_cast(s_state.config.max_replacement_cache_vram_usage_mb) * 1048576; + if (s_state.gpu_replacement_image_cache_vram_usage <= max_usage) + return; + + VERBOSE_LOG("Compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); + + const u32 frame_number = System::GetFrameNumber(); + s_state.gpu_replacement_image_cache_purge_list.reserve(s_state.gpu_replacement_image_cache.size()); + for (auto it = s_state.gpu_replacement_image_cache.begin(); it != s_state.gpu_replacement_image_cache.end(); ++it) + s_state.gpu_replacement_image_cache_purge_list.emplace_back(it, frame_number - it->second.second); + + // Reverse sort, put the oldest on the end. + std::sort(s_state.gpu_replacement_image_cache_purge_list.begin(), + s_state.gpu_replacement_image_cache_purge_list.end(), + [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; }); + + // See first comment above. + const size_t target_size = (max_usage < EXTRA_COMPACT_SIZE) ? max_usage : (max_usage - EXTRA_COMPACT_SIZE); + while (s_state.gpu_replacement_image_cache_vram_usage > target_size && + !s_state.gpu_replacement_image_cache_purge_list.empty()) + { + GPUReplacementImageCache::iterator iter = s_state.gpu_replacement_image_cache_purge_list.back().first; + s_state.gpu_replacement_image_cache_purge_list.pop_back(); + + std::unique_ptr tex = std::move(iter->second.first); + s_state.gpu_replacement_image_cache.erase(iter); + s_state.gpu_replacement_image_cache_vram_usage -= tex->GetVRAMUsage(); + g_gpu_device->RecycleTexture(std::move(tex)); + } + + s_state.gpu_replacement_image_cache_purge_list.clear(); + + VERBOSE_LOG("Finished compacting replacement GPU image cache, count = {}, size = {:.1f} MB", + s_state.gpu_replacement_image_cache.size(), + static_cast(s_state.gpu_replacement_image_cache_vram_usage) / 1048576.0f); +} + void GPUTextureCache::PreloadReplacementTextures() { static constexpr float UPDATE_INTERVAL = 1.0f; @@ -3203,31 +3308,35 @@ void GPUTextureCache::ReloadTextureReplacements(bool show_info) void GPUTextureCache::PurgeUnreferencedTexturesFromCache() { - TextureCache old_map = std::move(s_state.replacement_image_cache); - s_state.replacement_image_cache = TextureCache(); + ReplacementImageCache old_map = std::move(s_state.replacement_image_cache); + GPUReplacementImageCache old_gpu_map = std::move(s_state.gpu_replacement_image_cache); + s_state.replacement_image_cache = ReplacementImageCache(); + s_state.gpu_replacement_image_cache = GPUReplacementImageCache(); - for (const auto& it : s_state.vram_replacements) - { - const auto it2 = old_map.find(it.second); + const auto reinsert_texture = [&old_map, &old_gpu_map](const std::string& name) { + const auto it2 = old_map.find(name); if (it2 != old_map.end()) { - s_state.replacement_image_cache.emplace(it.second, std::move(it2->second)); + s_state.replacement_image_cache.emplace(name, std::move(it2->second)); old_map.erase(it2); } - } - for (const auto& map : {s_state.vram_write_texture_replacements, s_state.texture_page_texture_replacements}) - { - for (const auto& it : map) + const auto it3 = old_gpu_map.find(name); + if (it3 != old_gpu_map.end()) { - const auto it2 = old_map.find(it.second.second); - if (it2 != old_map.end()) - { - s_state.replacement_image_cache.emplace(it.second.second, std::move(it2->second)); - old_map.erase(it2); - } + s_state.gpu_replacement_image_cache.emplace(name, std::move(it3->second)); + old_gpu_map.erase(it3); } - } + }; + + for (const auto& it : s_state.vram_replacements) + reinsert_texture(it.second); + + for (const auto& it : s_state.vram_write_texture_replacements) + reinsert_texture(it.second.second); + + for (const auto& it : s_state.texture_page_texture_replacements) + reinsert_texture(it.second.second); } void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, HashType pal_hash, @@ -3317,22 +3426,15 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, for (const TextureReplacementSubImage& si : subimages) { - std::unique_ptr temp_texture = - g_gpu_device->FetchAndUploadTextureImage(si.image, GPUTexture::Flags::None); - if (!temp_texture) - continue; - const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); - texture_size = GSVector2(GSVector2i(temp_texture->GetWidth(), temp_texture->GetHeight())); + texture_size = GSVector2(si.texture->GetSizeVec()); GSVector2::store(&uniforms[0], texture_size); GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->SetViewportAndScissor(dst_rect); - g_gpu_device->SetTextureSampler(0, temp_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(0, si.texture, g_gpu_device->GetNearestSampler()); g_gpu_device->SetPipeline(si.invert_alpha ? s_state.replacement_semitransparent_draw_pipeline.get() : s_state.replacement_draw_pipeline.get()); g_gpu_device->Draw(3, 0); - - g_gpu_device->RecycleTexture(std::move(temp_texture)); } g_gpu_device->CopyTextureRegion(replacement_tex.get(), 0, 0, 0, 0, s_state.replacement_texture_render_target.get(), 0, diff --git a/src/core/gpu_hw_texture_cache.h b/src/core/gpu_hw_texture_cache.h index 1c071c920..07482da7b 100644 --- a/src/core/gpu_hw_texture_cache.h +++ b/src/core/gpu_hw_texture_cache.h @@ -128,7 +128,7 @@ void SetGameID(std::string game_id); void ReloadTextureReplacements(bool show_info); // VRAM Write Replacements -const TextureReplacementImage* GetVRAMReplacement(u32 width, u32 height, const void* pixels); +GPUTexture* GetVRAMReplacement(u32 width, u32 height, const void* pixels); void DumpVRAMWrite(u32 width, u32 height, const void* pixels); bool ShouldDumpVRAMWrite(u32 width, u32 height); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index cbecae52e..d13091974 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -454,6 +454,9 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro texture_replacements.config.replacement_scale_linear_filter = si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", false); + texture_replacements.config.max_replacement_cache_vram_usage_mb = + si.GetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", 512); + texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); texture_replacements.config.max_vram_write_coalesce_width = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", 0u); @@ -714,6 +717,9 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", texture_replacements.config.replacement_scale_linear_filter); + si.SetUIntValue("TextureReplacements", "MaxReplacementCacheVRAMUsage", + texture_replacements.config.max_replacement_cache_vram_usage_mb); + si.SetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", texture_replacements.config.max_vram_write_splits); si.SetUIntValue("TextureReplacements", "MaxVRAMWriteCoalesceWidth", texture_replacements.config.max_vram_write_coalesce_width); diff --git a/src/core/settings.h b/src/core/settings.h index 9dbbefbfc..9bdb63f2f 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -245,6 +245,8 @@ struct Settings bool convert_copies_to_writes : 1 = false; bool replacement_scale_linear_filter = false; + u32 max_replacement_cache_vram_usage_mb = 512; + u32 max_vram_write_splits = 0; u32 max_vram_write_coalesce_width = 0; u32 max_vram_write_coalesce_height = 0; diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index c264cb73b..386034ba8 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -122,6 +122,7 @@ public: ALWAYS_INLINE Format GetFormat() const { return m_format; } ALWAYS_INLINE Flags GetFlags() const { return m_flags; } ALWAYS_INLINE bool HasFlag(Flags flag) const { return ((static_cast(m_flags) & static_cast(flag)) != 0); } + ALWAYS_INLINE GSVector2i GetSizeVec() const { return GSVector2i(m_width, m_height); } ALWAYS_INLINE GSVector4i GetRect() const { return GSVector4i(0, 0, static_cast(m_width), static_cast(m_height));