diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index b2b087af2f..0401efabe4 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -918,6 +918,62 @@ void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat) #endif +std::pair GSGetRGBA8AlphaMinMax(const void* data, u32 width, u32 height, u32 stride) +{ + GSVector4i minc = GSVector4i::xffffffff(); + GSVector4i maxc = GSVector4i::zero(); + + const u8* ptr = static_cast(data); + if ((width % 4) == 0) + { + for (u32 r = 0; r < height; r++) + { + const u8* rptr = ptr; + for (u32 c = 0; c < width; c += 4) + { + const GSVector4i v = GSVector4i::load(rptr); + rptr += sizeof(GSVector4i); + minc = minc.min_u32(v); + maxc = maxc.max_u32(v); + } + + ptr += stride; + } + } + else + { + const u32 aligned_width = Common::AlignDownPow2(width, 4); + static constexpr const GSVector4i masks[3][2] = { + {GSVector4i::cxpr(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0), GSVector4i::cxpr(0, 0, 0, 0xFFFFFFFF)}, + {GSVector4i::cxpr(0xFFFFFFFF, 0xFFFFFFFF, 0, 0), GSVector4i::cxpr(0, 0, 0xFFFFFFFF, 0xFFFFFFFF)}, + {GSVector4i::cxpr(0xFFFFFFFF, 0, 0, 0), GSVector4i::cxpr(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)}, + }; + const GSVector4i last_mask_and = masks[(width & 3) - 1][0]; + const GSVector4i last_mask_or = masks[(width & 3) - 1][1]; + + for (u32 r = 0; r < height; r++) + { + const u8* rptr = ptr; + for (u32 c = 0; c < aligned_width; c += 4) + { + const GSVector4i v = GSVector4i::load(rptr); + rptr += sizeof(GSVector4i); + minc = minc.min_u32(v); + maxc = maxc.max_u32(v); + } + + const GSVector4i v = GSVector4i::load(rptr); + minc = minc.min_u32(v | last_mask_or); + maxc = maxc.max_u32(v & last_mask_and); + + ptr += stride; + } + } + + return std::make_pair(static_cast(minc.minv_u32() >> 24), + static_cast(maxc.maxv_u32() >> 24)); +} + static void HotkeyAdjustUpscaleMultiplier(s32 delta) { const u32 new_multiplier = static_cast(std::clamp(static_cast(EmuConfig.GS.UpscaleMultiplier) + delta, 1, 8)); diff --git a/pcsx2/GS/GSExtra.h b/pcsx2/GS/GSExtra.h index 4678c21943..122c85c573 100644 --- a/pcsx2/GS/GSExtra.h +++ b/pcsx2/GS/GSExtra.h @@ -19,6 +19,8 @@ #include "pcsx2/Config.h" #include "common/Align.h" +#include + /// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster template __forceinline bool BitEqual(const T& a, const T& b) @@ -129,6 +131,9 @@ __fi static T VectorAlign(T value) return Common::AlignUpPow2(value, VECTOR_ALIGNMENT); } +/// Returns the maximum alpha value across a range of data. Assumes stride is 16 byte aligned. +std::pair GSGetRGBA8AlphaMinMax(const void* data, u32 width, u32 height, u32 stride); + // clang-format off #ifdef _MSC_VER diff --git a/pcsx2/GS/GSVector4i.h b/pcsx2/GS/GSVector4i.h index 1121009cad..c34e62365f 100644 --- a/pcsx2/GS/GSVector4i.h +++ b/pcsx2/GS/GSVector4i.h @@ -394,6 +394,30 @@ public: return GSVector4i(_mm_max_epu32(m, a)); } + __forceinline u32 minv_s32() const + { + const __m128i vmin = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + return std::min(_mm_extract_epi32(vmin, 0), _mm_extract_epi32(vmin, 1)); + } + + __forceinline u32 minv_u32() const + { + const __m128i vmin = _mm_min_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + return std::min(_mm_extract_epi32(vmin, 0), _mm_extract_epi32(vmin, 1)); + } + + __forceinline u32 maxv_s32() const + { + const __m128i vmax = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + return std::max(_mm_extract_epi32(vmax, 0), _mm_extract_epi32(vmax, 1)); + } + + __forceinline u32 maxv_u32() const + { + const __m128i vmax = _mm_max_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2))); + return std::max(_mm_extract_epi32(vmax, 0), _mm_extract_epi32(vmax, 1)); + } + __forceinline static int min_i16(int a, int b) { return store(load(a).min_i16(load(b))); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 2b2c883f48..154f1eed0f 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2117,8 +2117,8 @@ void GSRendererHW::Draw() return; } - if (src->m_target) - CalcAlphaMinMax(src->m_from_target->m_alpha_min, src->m_from_target->m_alpha_max); + if(GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0) + CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second); } // Estimate size based on the scissor rectangle and height cache. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 5b2417a064..5fa95991f2 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -3364,6 +3364,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_target = true; src->m_from_target = dst; src->m_from_target_TEX0 = dst->m_TEX0; + src->m_alpha_minmax.first = dst->m_alpha_min; + src->m_alpha_minmax.second = dst->m_alpha_max; if (psm.pal > 0) { @@ -3404,6 +3406,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_from_target_TEX0 = dst->m_TEX0; src->m_valid_rect = dst->m_valid; src->m_end_block = dst->m_end_block; + src->m_alpha_minmax.first = dst->m_alpha_min; + src->m_alpha_minmax.second = dst->m_alpha_max; dst->Update(); @@ -3615,6 +3619,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr) { src->m_texture = src->m_from_hash_cache->texture; + src->m_alpha_minmax = src->m_from_hash_cache->alpha_minmax; + if (gpu_clut) AttachPaletteToSource(src, gpu_clut); else if (psm.pal > 0) @@ -4001,12 +4007,14 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 if (replace) { bool replacement_texture_pending = false; - GSTexture* replacement_tex = GSTextureReplacements::LookupReplacementTexture(key, lod != nullptr, &replacement_texture_pending); + std::pair alpha_minmax; + GSTexture* replacement_tex = GSTextureReplacements::LookupReplacementTexture(key, lod != nullptr, + &replacement_texture_pending, &alpha_minmax); if (replacement_tex) { // found a replacement texture! insert it into the hash cache, and clear paltex (since it's not indexed) paltex = false; - const HashCacheEntry entry{ replacement_tex, 1u, 0u, true }; + const HashCacheEntry entry{ replacement_tex, 1u, 0u, alpha_minmax, true }; m_hash_cache_replacement_memory_usage += entry.texture->GetMemUsage(); return &m_hash_cache.emplace(key, entry).first->second; } @@ -4058,7 +4066,9 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 } // upload base level - PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0); + const bool is_direct = (GSLocalMemory::m_psm[TEX0.PSM].pal == 0); + std::pair alpha_minmax = {0u, 255u}; + PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0, is_direct ? &alpha_minmax : nullptr); // upload mips if present if (lod) @@ -4067,8 +4077,15 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 const int nmips = lod->y - lod->x + 1; for (int mip = 1; mip < nmips; mip++) { - const GIFRegTEX0 MIP_TEX0{ g_gs_renderer->GetTex0Layer(basemip + mip) }; - PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip); + const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)}; + std::pair mip_alpha_minmax; + PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip, + is_direct ? &mip_alpha_minmax : nullptr); + if (!is_direct) + { + alpha_minmax.first = std::min(alpha_minmax.first, mip_alpha_minmax.first); + alpha_minmax.second = std::max(alpha_minmax.second, mip_alpha_minmax.second); + } } } @@ -4077,7 +4094,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0 key.RemoveCLUTHash(); // insert into the cache cache, and we're done - const HashCacheEntry entry{ tex, 1u, 0u, false }; + const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, false}; m_hash_cache_memory_usage += tex->GetMemUsage(); return &m_hash_cache.emplace(key, entry).first->second; } @@ -4466,6 +4483,11 @@ GSTextureCache::Source::~Source() } } +bool GSTextureCache::Source::IsPaletteFormat() const +{ + return (GSLocalMemory::m_psm[m_TEX0.PSM].pal > 0); +} + void GSTextureCache::Source::SetPages() { const int tw = 1 << m_TEX0.TW; @@ -4722,7 +4744,19 @@ void GSTextureCache::Source::PreloadLevel(int level) m_layer_hash[level] = hash; // And upload the texture. - PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level); + if (IsPaletteFormat()) + { + PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, + m_texture, level, nullptr); + } + else + { + std::pair mip_alpha_minmax; + PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, + m_texture, level, &mip_alpha_minmax); + m_alpha_minmax.first = std::min(m_alpha_minmax.first, mip_alpha_minmax.first); + m_alpha_minmax.second = std::min(m_alpha_minmax.second, mip_alpha_minmax.second); + } } bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key) @@ -5084,12 +5118,17 @@ void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_text { s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture); s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr; + s->m_alpha_minmax = s->m_palette_obj->GetAlphaMinMax(); } void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut) { s->m_palette_obj = nullptr; s->m_palette = gpu_clut; + + // Unknown. + s->m_alpha_minmax.first = 0; + s->m_alpha_minmax.second = 255; } GSTextureCache::SurfaceOffset GSTextureCache::ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t) @@ -5287,7 +5326,7 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } -void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex) +void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. m_hash_cache_replacement_memory_usage += tex->GetMemUsage(); @@ -5297,13 +5336,14 @@ void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* { // We must've got evicted before we finished loading. No matter, add it in there anyway; // if it's not used again, it'll get tossed out later. - const HashCacheEntry entry{tex, 1u, 0u, true}; + const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, true}; m_hash_cache.emplace(key, entry); return; } // Reset age so we don't get thrown out too early. it->second.age = 0; + it->second.alpha_minmax = alpha_minmax; // Update memory usage, swap the textures, and recycle the old one for reuse. if (!it->second.is_replacement) @@ -5329,6 +5369,8 @@ GSTextureCache::Palette::Palette(u16 pal, bool need_gs_texture) { InitializeTexture(); } + + m_alpha_minmax = GSGetRGBA8AlphaMinMax(m_clut, pal, 1, 0); } GSTextureCache::Palette::~Palette() @@ -5719,7 +5761,8 @@ GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, con return FinishBlockHash(hash_st); } -void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level) +void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, + bool paltex, GSTexture* tex, u32 level, std::pair* alpha_minmax) { // m_TEX0 is adjusted for mips (messy, should be changed). const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; @@ -5743,7 +5786,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE // If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer. const GSVector4i unoffset_rect(0, 0, tw, th); GSTexture::GSMap map; - if (rect.eq(block_rect) && tex->Map(map, &unoffset_rect, level)) + if (rect.eq(block_rect) && !alpha_minmax && tex->Map(map, &unoffset_rect, level)) { rtx(mem, off, block_rect, map.bits, map.pitch, TEXA); tex->Unmap(); @@ -5757,6 +5800,11 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE const u8* ptr = buff + (pitch * static_cast(rect.top - block_rect.top)) + (static_cast(rect.left - block_rect.left) << (paltex ? 0 : 2)); + if (alpha_minmax) + { + pxAssert(GSLocalMemory::m_psm[TEX0.PSM].pal == 0); + *alpha_minmax = GSGetRGBA8AlphaMinMax(buff, unoffset_rect.width(), unoffset_rect.height(), pitch); + } tex->Update(unoffset_rect, ptr, pitch, level); } } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index b200897a2b..53b8fde57d 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -15,12 +15,13 @@ #pragma once -#include - #include "GS/Renderers/Common/GSRenderer.h" #include "GS/Renderers/Common/GSFastList.h" #include "GS/Renderers/Common/GSDirtyRect.h" + #include +#include +#include class GSHwHack; @@ -124,6 +125,7 @@ public: GSTexture* texture; u32 refcount; u16 age; + std::pair alpha_minmax; bool is_replacement; }; @@ -174,13 +176,16 @@ public: { private: u32* m_clut; - u16 m_pal; GSTexture* m_tex_palette; + u16 m_pal; + std::pair m_alpha_minmax; public: Palette(u16 pal, bool need_gs_texture); ~Palette(); + __fi std::pair GetAlphaMinMax() const { return m_alpha_minmax; } + // Disable copy constructor and copy operator Palette(const Palette&) = delete; Palette& operator=(const Palette&) = delete; @@ -277,6 +282,7 @@ public: u8 m_complete_layers = 0; bool m_target = false; bool m_repeating = false; + std::pair m_alpha_minmax = {0u, 255u}; std::vector* m_p2t = nullptr; // Keep a trace of the target origin. There is no guarantee that pointer will // still be valid on future. However it ought to be good when the source is created @@ -296,10 +302,12 @@ public: __fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); } __fi bool IsFromTarget() const { return m_target; } + bool IsPaletteFormat() const; __fi const SourceRegion& GetRegion() const { return m_region; } __fi GSVector2i GetRegionSize() const { return m_region.GetSize(m_unscaled_size.x, m_unscaled_size.y); } __fi GSVector4i GetRegionRect() const { return m_region.GetRect(m_unscaled_size.x, m_unscaled_size.y); } + __fi const std::pair GetAlphaMinMax() const { return m_alpha_minmax; } void SetPages(); @@ -422,7 +430,7 @@ protected: void RemoveFromHashCache(HashCacheMap::iterator it); void AgeHashCache(); - static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level); + static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level, std::pair* alpha_minmax); static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region); // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; @@ -514,7 +522,7 @@ public: void InvalidateTemporarySource(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. - void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex); + void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); }; extern std::unique_ptr g_texture_cache; diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp index 781090a678..451ea453b8 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.cpp @@ -433,7 +433,8 @@ bool GSTextureReplacements::HasReplacementTextureWithOtherPalette(const GSTextur return s_replacement_textures_without_clut_hash.find(name) != s_replacement_textures_without_clut_hash.end(); } -GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending) +GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, + bool* pending, std::pair* alpha_minmax) { const TextureName name(CreateTextureName(hash, 0)); *pending = false; @@ -450,6 +451,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache: if (it != s_replacement_texture_cache.end()) { // replacement is cached, can immediately upload to host GPU + *alpha_minmax = it->second.alpha_minmax; return CreateReplacementTexture(it->second, mipmap); } } @@ -476,6 +478,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache: const ReplacementTexture& rtex = s_replacement_texture_cache.emplace(name, std::move(replacement.value())).first->second; // and upload to gpu + *alpha_minmax = rtex.alpha_minmax; return CreateReplacementTexture(rtex, mipmap); } } @@ -490,6 +493,8 @@ std::optional GSTextureReplacements:: if (!loader(filename.c_str(), &rtex, only_base_image)) return std::nullopt; + rtex.alpha_minmax = GSGetRGBA8AlphaMinMax(rtex.data.data(), rtex.width, rtex.height, rtex.pitch); + return rtex; } @@ -632,7 +637,7 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures() // upload and inject into TC GSTexture* tex = CreateReplacementTexture(it->second, mipmap); if (tex) - g_texture_cache->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex); + g_texture_cache->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex, it->second.alpha_minmax); } s_async_loaded_textures.clear(); } diff --git a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h index 6761870e37..c9c7f2d5ea 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureReplacements.h +++ b/pcsx2/GS/Renderers/HW/GSTextureReplacements.h @@ -17,6 +17,8 @@ #include "GS/Renderers/HW/GSTextureCache.h" +#include + namespace GSTextureReplacements { struct ReplacementTexture @@ -24,6 +26,7 @@ namespace GSTextureReplacements u32 width; u32 height; GSTexture::Format format; + std::pair alpha_minmax; u32 pitch; std::vector data; @@ -48,7 +51,7 @@ namespace GSTextureReplacements bool HasAnyReplacementTextures(); bool HasReplacementTextureWithOtherPalette(const GSTextureCache::HashCacheKey& hash); - GSTexture* LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending); + GSTexture* LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending, std::pair* alpha_minmax); GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, bool mipmap); void ProcessAsyncLoadedTextures();