GS/HW: Calculate alpha values for sources

This commit is contained in:
Stenzek 2023-07-09 04:39:30 +01:00 committed by refractionpcsx2
parent ef7ce4dfc8
commit a371c5b7d0
8 changed files with 170 additions and 21 deletions

View File

@ -918,6 +918,62 @@ void GSFreeWrappedMemory(void* ptr, size_t size, size_t repeat)
#endif
std::pair<u8, u8> GSGetRGBA8AlphaMinMax(const void* data, u32 width, u32 height, u32 stride)
{
GSVector4i minc = GSVector4i::xffffffff();
GSVector4i maxc = GSVector4i::zero();
const u8* ptr = static_cast<const u8*>(data);
if ((width % 4) == 0)
{
for (u32 r = 0; r < height; r++)
{
const u8* rptr = ptr;
for (u32 c = 0; c < width; c += 4)
{
const GSVector4i v = GSVector4i::load<true>(rptr);
rptr += sizeof(GSVector4i);
minc = minc.min_u32(v);
maxc = maxc.max_u32(v);
}
ptr += stride;
}
}
else
{
const u32 aligned_width = Common::AlignDownPow2(width, 4);
static constexpr const GSVector4i masks[3][2] = {
{GSVector4i::cxpr(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0), GSVector4i::cxpr(0, 0, 0, 0xFFFFFFFF)},
{GSVector4i::cxpr(0xFFFFFFFF, 0xFFFFFFFF, 0, 0), GSVector4i::cxpr(0, 0, 0xFFFFFFFF, 0xFFFFFFFF)},
{GSVector4i::cxpr(0xFFFFFFFF, 0, 0, 0), GSVector4i::cxpr(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)},
};
const GSVector4i last_mask_and = masks[(width & 3) - 1][0];
const GSVector4i last_mask_or = masks[(width & 3) - 1][1];
for (u32 r = 0; r < height; r++)
{
const u8* rptr = ptr;
for (u32 c = 0; c < aligned_width; c += 4)
{
const GSVector4i v = GSVector4i::load<true>(rptr);
rptr += sizeof(GSVector4i);
minc = minc.min_u32(v);
maxc = maxc.max_u32(v);
}
const GSVector4i v = GSVector4i::load<true>(rptr);
minc = minc.min_u32(v | last_mask_or);
maxc = maxc.max_u32(v & last_mask_and);
ptr += stride;
}
}
return std::make_pair<u8, u8>(static_cast<u8>(minc.minv_u32() >> 24),
static_cast<u8>(maxc.maxv_u32() >> 24));
}
static void HotkeyAdjustUpscaleMultiplier(s32 delta)
{
const u32 new_multiplier = static_cast<u32>(std::clamp(static_cast<s32>(EmuConfig.GS.UpscaleMultiplier) + delta, 1, 8));

View File

@ -19,6 +19,8 @@
#include "pcsx2/Config.h"
#include "common/Align.h"
#include <utility>
/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster
template <typename T>
__forceinline bool BitEqual(const T& a, const T& b)
@ -129,6 +131,9 @@ __fi static T VectorAlign(T value)
return Common::AlignUpPow2(value, VECTOR_ALIGNMENT);
}
/// Returns the maximum alpha value across a range of data. Assumes stride is 16 byte aligned.
std::pair<u8, u8> GSGetRGBA8AlphaMinMax(const void* data, u32 width, u32 height, u32 stride);
// clang-format off
#ifdef _MSC_VER

View File

@ -394,6 +394,30 @@ public:
return GSVector4i(_mm_max_epu32(m, a));
}
__forceinline u32 minv_s32() const
{
const __m128i vmin = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2)));
return std::min<s32>(_mm_extract_epi32(vmin, 0), _mm_extract_epi32(vmin, 1));
}
__forceinline u32 minv_u32() const
{
const __m128i vmin = _mm_min_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2)));
return std::min<u32>(_mm_extract_epi32(vmin, 0), _mm_extract_epi32(vmin, 1));
}
__forceinline u32 maxv_s32() const
{
const __m128i vmax = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2)));
return std::max<s32>(_mm_extract_epi32(vmax, 0), _mm_extract_epi32(vmax, 1));
}
__forceinline u32 maxv_u32() const
{
const __m128i vmax = _mm_max_epu32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(3, 2, 3, 2)));
return std::max<u32>(_mm_extract_epi32(vmax, 0), _mm_extract_epi32(vmax, 1));
}
__forceinline static int min_i16(int a, int b)
{
return store(load(a).min_i16(load(b)));

View File

@ -2117,8 +2117,8 @@ void GSRendererHW::Draw()
return;
}
if (src->m_target)
CalcAlphaMinMax(src->m_from_target->m_alpha_min, src->m_from_target->m_alpha_max);
if(GSLocalMemory::m_psm[src->m_TEX0.PSM].pal == 0)
CalcAlphaMinMax(src->m_alpha_minmax.first, src->m_alpha_minmax.second);
}
// Estimate size based on the scissor rectangle and height cache.

View File

@ -3364,6 +3364,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_target = true;
src->m_from_target = dst;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_alpha_minmax.first = dst->m_alpha_min;
src->m_alpha_minmax.second = dst->m_alpha_max;
if (psm.pal > 0)
{
@ -3404,6 +3406,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_valid_rect = dst->m_valid;
src->m_end_block = dst->m_end_block;
src->m_alpha_minmax.first = dst->m_alpha_min;
src->m_alpha_minmax.second = dst->m_alpha_max;
dst->Update();
@ -3615,6 +3619,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod, region)) != nullptr)
{
src->m_texture = src->m_from_hash_cache->texture;
src->m_alpha_minmax = src->m_from_hash_cache->alpha_minmax;
if (gpu_clut)
AttachPaletteToSource(src, gpu_clut);
else if (psm.pal > 0)
@ -4001,12 +4007,14 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
if (replace)
{
bool replacement_texture_pending = false;
GSTexture* replacement_tex = GSTextureReplacements::LookupReplacementTexture(key, lod != nullptr, &replacement_texture_pending);
std::pair<u8, u8> alpha_minmax;
GSTexture* replacement_tex = GSTextureReplacements::LookupReplacementTexture(key, lod != nullptr,
&replacement_texture_pending, &alpha_minmax);
if (replacement_tex)
{
// found a replacement texture! insert it into the hash cache, and clear paltex (since it's not indexed)
paltex = false;
const HashCacheEntry entry{ replacement_tex, 1u, 0u, true };
const HashCacheEntry entry{ replacement_tex, 1u, 0u, alpha_minmax, true };
m_hash_cache_replacement_memory_usage += entry.texture->GetMemUsage();
return &m_hash_cache.emplace(key, entry).first->second;
}
@ -4058,7 +4066,9 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
}
// upload base level
PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0);
const bool is_direct = (GSLocalMemory::m_psm[TEX0.PSM].pal == 0);
std::pair<u8, u8> alpha_minmax = {0u, 255u};
PreloadTexture(TEX0, TEXA, region, g_gs_renderer->m_mem, paltex, tex, 0, is_direct ? &alpha_minmax : nullptr);
// upload mips if present
if (lod)
@ -4067,8 +4077,15 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
const int nmips = lod->y - lod->x + 1;
for (int mip = 1; mip < nmips; mip++)
{
const GIFRegTEX0 MIP_TEX0{ g_gs_renderer->GetTex0Layer(basemip + mip) };
PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip);
const GIFRegTEX0 MIP_TEX0{g_gs_renderer->GetTex0Layer(basemip + mip)};
std::pair<u8, u8> mip_alpha_minmax;
PreloadTexture(MIP_TEX0, TEXA, region.AdjustForMipmap(mip), g_gs_renderer->m_mem, paltex, tex, mip,
is_direct ? &mip_alpha_minmax : nullptr);
if (!is_direct)
{
alpha_minmax.first = std::min(alpha_minmax.first, mip_alpha_minmax.first);
alpha_minmax.second = std::max(alpha_minmax.second, mip_alpha_minmax.second);
}
}
}
@ -4077,7 +4094,7 @@ GSTextureCache::HashCacheEntry* GSTextureCache::LookupHashCache(const GIFRegTEX0
key.RemoveCLUTHash();
// insert into the cache cache, and we're done
const HashCacheEntry entry{ tex, 1u, 0u, false };
const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, false};
m_hash_cache_memory_usage += tex->GetMemUsage();
return &m_hash_cache.emplace(key, entry).first->second;
}
@ -4466,6 +4483,11 @@ GSTextureCache::Source::~Source()
}
}
bool GSTextureCache::Source::IsPaletteFormat() const
{
return (GSLocalMemory::m_psm[m_TEX0.PSM].pal > 0);
}
void GSTextureCache::Source::SetPages()
{
const int tw = 1 << m_TEX0.TW;
@ -4722,7 +4744,19 @@ void GSTextureCache::Source::PreloadLevel(int level)
m_layer_hash[level] = hash;
// And upload the texture.
PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr, m_texture, level);
if (IsPaletteFormat())
{
PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr,
m_texture, level, nullptr);
}
else
{
std::pair<u8, u8> mip_alpha_minmax;
PreloadTexture(m_TEX0, m_TEXA, m_region.AdjustForMipmap(level), g_gs_renderer->m_mem, m_palette != nullptr,
m_texture, level, &mip_alpha_minmax);
m_alpha_minmax.first = std::min(m_alpha_minmax.first, mip_alpha_minmax.first);
m_alpha_minmax.second = std::min(m_alpha_minmax.second, mip_alpha_minmax.second);
}
}
bool GSTextureCache::Source::ClutMatch(const PaletteKey& palette_key)
@ -5084,12 +5118,17 @@ void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_text
{
s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture);
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
s->m_alpha_minmax = s->m_palette_obj->GetAlphaMinMax();
}
void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut)
{
s->m_palette_obj = nullptr;
s->m_palette = gpu_clut;
// Unknown.
s->m_alpha_minmax.first = 0;
s->m_alpha_minmax.second = 255;
}
GSTextureCache::SurfaceOffset GSTextureCache::ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t)
@ -5287,7 +5326,7 @@ void GSTextureCache::InvalidateTemporarySource()
m_temporary_source = nullptr;
}
void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex)
void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax)
{
// When we insert we update memory usage. Old texture gets removed below.
m_hash_cache_replacement_memory_usage += tex->GetMemUsage();
@ -5297,13 +5336,14 @@ void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture*
{
// We must've got evicted before we finished loading. No matter, add it in there anyway;
// if it's not used again, it'll get tossed out later.
const HashCacheEntry entry{tex, 1u, 0u, true};
const HashCacheEntry entry{tex, 1u, 0u, alpha_minmax, true};
m_hash_cache.emplace(key, entry);
return;
}
// Reset age so we don't get thrown out too early.
it->second.age = 0;
it->second.alpha_minmax = alpha_minmax;
// Update memory usage, swap the textures, and recycle the old one for reuse.
if (!it->second.is_replacement)
@ -5329,6 +5369,8 @@ GSTextureCache::Palette::Palette(u16 pal, bool need_gs_texture)
{
InitializeTexture();
}
m_alpha_minmax = GSGetRGBA8AlphaMinMax(m_clut, pal, 1, 0);
}
GSTextureCache::Palette::~Palette()
@ -5719,7 +5761,8 @@ GSTextureCache::HashType GSTextureCache::HashTexture(const GIFRegTEX0& TEX0, con
return FinishBlockHash(hash_st);
}
void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level)
void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem,
bool paltex, GSTexture* tex, u32 level, std::pair<u8, u8>* alpha_minmax)
{
// m_TEX0 is adjusted for mips (messy, should be changed).
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
@ -5743,7 +5786,7 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
// If we can stream it directly to GPU memory, do so, otherwise go through a temp buffer.
const GSVector4i unoffset_rect(0, 0, tw, th);
GSTexture::GSMap map;
if (rect.eq(block_rect) && tex->Map(map, &unoffset_rect, level))
if (rect.eq(block_rect) && !alpha_minmax && tex->Map(map, &unoffset_rect, level))
{
rtx(mem, off, block_rect, map.bits, map.pitch, TEXA);
tex->Unmap();
@ -5757,6 +5800,11 @@ void GSTextureCache::PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
const u8* ptr = buff + (pitch * static_cast<u32>(rect.top - block_rect.top)) +
(static_cast<u32>(rect.left - block_rect.left) << (paltex ? 0 : 2));
if (alpha_minmax)
{
pxAssert(GSLocalMemory::m_psm[TEX0.PSM].pal == 0);
*alpha_minmax = GSGetRGBA8AlphaMinMax(buff, unoffset_rect.width(), unoffset_rect.height(), pitch);
}
tex->Update(unoffset_rect, ptr, pitch, level);
}
}

View File

@ -15,12 +15,13 @@
#pragma once
#include <limits>
#include "GS/Renderers/Common/GSRenderer.h"
#include "GS/Renderers/Common/GSFastList.h"
#include "GS/Renderers/Common/GSDirtyRect.h"
#include <unordered_set>
#include <utility>
#include <limits>
class GSHwHack;
@ -124,6 +125,7 @@ public:
GSTexture* texture;
u32 refcount;
u16 age;
std::pair<u8, u8> alpha_minmax;
bool is_replacement;
};
@ -174,13 +176,16 @@ public:
{
private:
u32* m_clut;
u16 m_pal;
GSTexture* m_tex_palette;
u16 m_pal;
std::pair<u8, u8> m_alpha_minmax;
public:
Palette(u16 pal, bool need_gs_texture);
~Palette();
__fi std::pair<u8, u8> GetAlphaMinMax() const { return m_alpha_minmax; }
// Disable copy constructor and copy operator
Palette(const Palette&) = delete;
Palette& operator=(const Palette&) = delete;
@ -277,6 +282,7 @@ public:
u8 m_complete_layers = 0;
bool m_target = false;
bool m_repeating = false;
std::pair<u8, u8> m_alpha_minmax = {0u, 255u};
std::vector<GSVector2i>* m_p2t = nullptr;
// Keep a trace of the target origin. There is no guarantee that pointer will
// still be valid on future. However it ought to be good when the source is created
@ -296,10 +302,12 @@ public:
__fi bool CanPreload() const { return CanPreloadTextureSize(m_TEX0.TW, m_TEX0.TH); }
__fi bool IsFromTarget() const { return m_target; }
bool IsPaletteFormat() const;
__fi const SourceRegion& GetRegion() const { return m_region; }
__fi GSVector2i GetRegionSize() const { return m_region.GetSize(m_unscaled_size.x, m_unscaled_size.y); }
__fi GSVector4i GetRegionRect() const { return m_region.GetRect(m_unscaled_size.x, m_unscaled_size.y); }
__fi const std::pair<u8, u8> GetAlphaMinMax() const { return m_alpha_minmax; }
void SetPages();
@ -422,7 +430,7 @@ protected:
void RemoveFromHashCache(HashCacheMap::iterator it);
void AgeHashCache();
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level, std::pair<u8, u8>* alpha_minmax);
static HashType HashTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, SourceRegion region);
// TODO: virtual void Write(Source* s, const GSVector4i& r) = 0;
@ -514,7 +522,7 @@ public:
void InvalidateTemporarySource();
/// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred.
void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex);
void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair<u8, u8>& alpha_minmax);
};
extern std::unique_ptr<GSTextureCache> g_texture_cache;

View File

@ -433,7 +433,8 @@ bool GSTextureReplacements::HasReplacementTextureWithOtherPalette(const GSTextur
return s_replacement_textures_without_clut_hash.find(name) != s_replacement_textures_without_clut_hash.end();
}
GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending)
GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap,
bool* pending, std::pair<u8, u8>* alpha_minmax)
{
const TextureName name(CreateTextureName(hash, 0));
*pending = false;
@ -450,6 +451,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache:
if (it != s_replacement_texture_cache.end())
{
// replacement is cached, can immediately upload to host GPU
*alpha_minmax = it->second.alpha_minmax;
return CreateReplacementTexture(it->second, mipmap);
}
}
@ -476,6 +478,7 @@ GSTexture* GSTextureReplacements::LookupReplacementTexture(const GSTextureCache:
const ReplacementTexture& rtex = s_replacement_texture_cache.emplace(name, std::move(replacement.value())).first->second;
// and upload to gpu
*alpha_minmax = rtex.alpha_minmax;
return CreateReplacementTexture(rtex, mipmap);
}
}
@ -490,6 +493,8 @@ std::optional<GSTextureReplacements::ReplacementTexture> GSTextureReplacements::
if (!loader(filename.c_str(), &rtex, only_base_image))
return std::nullopt;
rtex.alpha_minmax = GSGetRGBA8AlphaMinMax(rtex.data.data(), rtex.width, rtex.height, rtex.pitch);
return rtex;
}
@ -632,7 +637,7 @@ void GSTextureReplacements::ProcessAsyncLoadedTextures()
// upload and inject into TC
GSTexture* tex = CreateReplacementTexture(it->second, mipmap);
if (tex)
g_texture_cache->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex);
g_texture_cache->InjectHashCacheTexture(HashCacheKeyFromTextureName(name), tex, it->second.alpha_minmax);
}
s_async_loaded_textures.clear();
}

View File

@ -17,6 +17,8 @@
#include "GS/Renderers/HW/GSTextureCache.h"
#include <utility>
namespace GSTextureReplacements
{
struct ReplacementTexture
@ -24,6 +26,7 @@ namespace GSTextureReplacements
u32 width;
u32 height;
GSTexture::Format format;
std::pair<u8, u8> alpha_minmax;
u32 pitch;
std::vector<u8> data;
@ -48,7 +51,7 @@ namespace GSTextureReplacements
bool HasAnyReplacementTextures();
bool HasReplacementTextureWithOtherPalette(const GSTextureCache::HashCacheKey& hash);
GSTexture* LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending);
GSTexture* LookupReplacementTexture(const GSTextureCache::HashCacheKey& hash, bool mipmap, bool* pending, std::pair<u8, u8>* alpha_minmax);
GSTexture* CreateReplacementTexture(const ReplacementTexture& rtex, bool mipmap);
void ProcessAsyncLoadedTextures();