pcsx2/plugins/GSdx/Renderers/HW/GSTextureCache.cpp

2419 lines
70 KiB
C++
Raw Normal View History

/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSTextureCache.h"
#include "GSRendererHW.h"
#include "GSUtil.h"
bool GSTextureCache::m_disable_partial_invalidation = false;
bool GSTextureCache::m_wrap_gs_mem = false;
GSTextureCache::GSTextureCache(GSRenderer* r)
: m_renderer(r)
, m_palette_map(r)
{
2021-04-12 09:31:30 +00:00
if (theApp.GetConfigB("UserHacks"))
{
UserHacks_HalfPixelOffset = theApp.GetConfigI("UserHacks_HalfPixelOffset") == 1;
m_preload_frame = theApp.GetConfigB("preload_frame_with_gs_data");
m_disable_partial_invalidation = theApp.GetConfigB("UserHacks_DisablePartialInvalidation");
m_can_convert_depth = !theApp.GetConfigB("UserHacks_DisableDepthSupport");
m_cpu_fb_conversion = theApp.GetConfigB("UserHacks_CPU_FB_Conversion");
m_texture_inside_rt = theApp.GetConfigB("UserHacks_TextureInsideRt");
m_wrap_gs_mem = theApp.GetConfigB("wrap_gs_mem");
2021-04-12 09:31:30 +00:00
}
else
{
UserHacks_HalfPixelOffset = false;
m_preload_frame = false;
m_disable_partial_invalidation = false;
m_can_convert_depth = true;
m_cpu_fb_conversion = false;
m_texture_inside_rt = false;
m_wrap_gs_mem = false;
}
m_paltex = theApp.GetConfigB("paltex");
m_crc_hack_level = theApp.GetConfigT<CRCHackLevel>("crc_hack_level");
if (m_crc_hack_level == CRCHackLevel::Automatic)
m_crc_hack_level = GSUtil::GetRecommendedCRCHackLevel(theApp.GetCurrentRendererType());
// In theory 4MB is enough but 9MB is safer for overflow (8MB
// isn't enough in custom resolution)
// Test: onimusha 3 PAL 60Hz
m_temp = (uint8*)_aligned_malloc(9 * 1024 * 1024, 32);
m_texture_inside_rt_cache.reserve(m_texture_inside_rt_cache_size);
}
GSTextureCache::~GSTextureCache()
{
RemoveAll();
m_texture_inside_rt_cache.clear();
_aligned_free(m_temp);
}
void GSTextureCache::RemovePartial()
{
//m_src.RemoveAll();
for (int type = 0; type < 2; type++)
{
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[type])
delete t;
m_dst[type].clear();
}
}
void GSTextureCache::RemoveAll()
{
m_src.RemoveAll();
2021-04-12 09:31:30 +00:00
for (int type = 0; type < 2; type++)
{
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[type])
delete t;
m_dst[type].clear();
}
m_palette_map.Clear();
}
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette)
{
2021-04-12 09:31:30 +00:00
if (!m_can_convert_depth)
{
GL_CACHE("LookupDepthSource not supported (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM);
2021-04-12 09:31:30 +00:00
if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos)
{
// JackieChan and SVCChaos cause regressions when skipping the draw calls when depth is disabled/not supported.
// This way we make sure there are no regressions on D3D as well.
return LookupSource(TEX0, TEXA, r);
2021-04-12 09:31:30 +00:00
}
else
{
throw GSDXRecoverableError();
}
}
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
Source* src = NULL;
Target* dst = NULL;
// Check only current frame, I guess it is only used as a postprocessing effect
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[DepthStencil])
{
if (t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
2021-04-12 09:31:30 +00:00
if (t->m_age == 0)
{
// Perfect Match
dst = t;
break;
2021-04-12 09:31:30 +00:00
}
else if (t->m_age == 1)
{
// Better than nothing (Full Spectrum Warrior)
dst = t;
}
}
}
2021-04-12 09:31:30 +00:00
if (!dst)
{
// Retry on the render target (Silent Hill 4)
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[RenderTarget])
{
// FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ???
2021-04-12 09:31:30 +00:00
if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
dst = t;
break;
}
}
}
2021-04-12 09:31:30 +00:00
if (dst)
{
GL_CACHE("TC depth: dst %s hit: %d (0x%x, %s)", to_string(dst->m_type),
2021-04-12 09:31:30 +00:00
dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0, psm_str(psm));
// Create a shared texture source
src = new Source(m_renderer, TEX0, TEXA, m_temp, true);
src->m_texture = dst->m_texture;
src->m_shared_texture = true;
src->m_target = true; // So renderer can check if a conversion is required
src->m_from_target = dst->m_texture; // avoid complex condition on the renderer
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_32_bits_fmt = dst->m_32_bits_fmt;
src->m_valid_rect = dst->m_valid;
src->m_end_block = dst->m_end_block;
// Insert the texture in the hash set to keep track of it. But don't bother with
// texture cache list. It means that a new Source is created everytime we need it.
// If it is too expensive, one could cut memory allocation in Source constructor for this
// use case.
2021-04-12 09:31:30 +00:00
if (palette)
{
AttachPaletteToSource(src, psm_s.pal, true);
}
m_src.m_surfaces.insert(src);
2021-04-12 09:31:30 +00:00
}
else
{
GL_CACHE("TC depth: ERROR miss (0x%x, %s)", TEX0.TBP0, psm_str(psm));
// Possible ? In this case we could call LookupSource
// Or just put a basic texture
// src->m_texture = m_renderer->m_dev->CreateTexture(tw, th);
// In all cases rendering will be broken
//
// Note: might worth to check previous frame
// Note: otherwise return NULL and skip the draw
2021-04-12 09:31:30 +00:00
if (m_renderer->m_game.title == CRC::JackieChanAdv || m_renderer->m_game.title == CRC::SVCChaos)
{
// JackieChan and SVCChaos cause regressions when skipping the draw calls so we reuse the old code for these two.
return LookupSource(TEX0, TEXA, r);
2021-04-12 09:31:30 +00:00
}
else
{
// Full Spectrum Warrior: first draw call of cut-scene rendering
// The game tries to emulate a texture shuffle with an old depth buffer
// (don't exists yet for us due to the cache)
// Rendering is nicer (less garbage) if we skip the draw call.
throw GSDXRecoverableError();
}
//ASSERT(0);
}
return src;
}
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
2014-04-14 18:25:02 +00:00
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
// Until DX is fixed
2021-04-12 09:31:30 +00:00
if (psm_s.pal > 0)
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
const uint32* clut = m_renderer->m_mem.m_clut;
Source* src = NULL;
auto& m = m_src.m_map[TEX0.TBP0 >> 5];
2021-04-12 09:31:30 +00:00
for (auto i = m.begin(); i != m.end(); ++i)
{
Source* s = *i;
if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
continue;
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
2021-04-12 09:31:30 +00:00
if (!s->m_target)
{
// We request a palette texture (psm_s.pal). If the texture was
// converted by the CPU (!s->m_palette), we need to ensure
// palette content is the same.
2021-04-12 09:31:30 +00:00
if (psm_s.pal > 0 && !s->m_palette && !s->ClutMatch({clut, psm_s.pal}))
continue;
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
// the CPU. We need to check that TEXA is identical
if (psm_s.pal == 0 && psm_s.fmt > 0 && s->m_TEXA.u64 != TEXA.u64)
continue;
}
m.MoveFront(i.Index());
src = s;
break;
}
Target* dst = NULL;
bool half_right = false;
int x_offset = 0;
int y_offset = 0;
#ifdef DISABLE_HW_TEXTURE_CACHE
2021-04-12 09:31:30 +00:00
if (0)
#else
2021-04-12 09:31:30 +00:00
if (src == NULL)
#endif
{
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
uint32 bw = TEX0.TBW;
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
2021-04-12 09:31:30 +00:00
uint32 bp_end = psm_s.bn(tw - 1, th - 1, bp, bw); // Valid only for color formats
// Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.)
bool texture_inside_rt = ShallSearchTextureInsideRt();
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[RenderTarget])
{
if (t->m_used && t->m_dirty.empty())
{
// Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written
// 2/ RT used as 24 bits => no update of alpha channel
// 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false
// because of the previous draw call format
//
// Solution: consider the RT as 32 bits if the alpha was used in the past
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm))
{
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will be slow but
// 1/ it just works :)
// 2/ even with upscaling
// 3/ for both Direct3D and OpenGL
if (m_cpu_fb_conversion && (psm == PSM_PSMT4 || psm == PSM_PSMT8))
// Forces 4-bit and 8-bit frame buffer conversion to be done on the CPU instead of the GPU, but performance will be slower.
// There is no dedicated shader to handle 4-bit conversion (Stuntman has been confirmed to use 4-bit).
// Direct3D10/11 and OpenGL support 8-bit fb conversion but don't render some corner cases properly (Harry Potter games).
// The hack can fix glitches in some games.
Read(t, t->m_valid);
else
dst = t;
break;
2021-04-12 09:31:30 +00:00
}
else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM))
{
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
half_right = true;
dst = t;
break;
2021-04-12 09:31:30 +00:00
}
else if (texture_inside_rt && psm == PSM_PSMCT32 && t->m_TEX0.PSM == psm && t->m_TEX0.TBP0 < bp && t->m_end_block >= bp)
{
// Only PSMCT32 to limit false hits
// Check if it is possible to hit with valid <x,y> offset on the given Target
// Fixes Jak eyes rendering
bool valid_offset_may_exist = true;
// CACHE SEARCH: <x,y> offset
for (auto& el : m_texture_inside_rt_cache)
{
if (el.psm == psm && el.bp == bp && el.bp_end == bp_end && el.bw == bw &&
el.t_tex0_tbp0 == t->m_TEX0.TBP0 && el.m_end_block == t->m_end_block)
{
if (el.has_valid_offset)
{
// CACHE HIT: <x,y> offset found
dst = t;
x_offset = el.x_offset;
y_offset = el.y_offset;
}
else
{
// CACHE HIT: No valid <x,y> offset exists
valid_offset_may_exist = false;
}
break;
}
}
if (dst != nullptr)
break;
if (!valid_offset_may_exist)
continue;
// CACHE MISS
// SWEEP SEARCH: <x,y> offset
2021-04-12 09:31:30 +00:00
TexInsideRtCacheEntry entry = {psm, bp, bp_end, bw, t->m_TEX0.TBP0, t->m_end_block, false, 0, 0};
for (int candidate_x_offset = 0; candidate_x_offset < t->m_valid.z; ++candidate_x_offset)
{
for (int candidate_y_offset = 0; candidate_y_offset < t->m_valid.w; ++candidate_y_offset)
{
if (candidate_x_offset == 0 && candidate_y_offset == 0)
continue;
uint32 candidate_bp = psm_s.bn(candidate_x_offset, candidate_y_offset, t->m_TEX0.TBP0, bw);
if (bp == candidate_bp && bp_end <= t->m_end_block)
{
// SWEEP HIT: <x,y> offset found
dst = t;
x_offset = candidate_x_offset;
y_offset = candidate_y_offset;
// Add result to cache
while (m_texture_inside_rt_cache.size() > m_texture_inside_rt_cache_size)
{
GL_PERF("TC tex in rt: Size of cache %d too big, clearing it.", m_texture_inside_rt_cache.size());
m_texture_inside_rt_cache.clear();
}
entry.has_valid_offset = true;
entry.x_offset = x_offset;
entry.y_offset = y_offset;
m_texture_inside_rt_cache.emplace_back(entry);
GL_CACHE("TC tex in rt: Cached HIT element (size %d), BW: %d, PSM %s, rt 0x%x <%d,%d> + off <%d,%d> -> 0x%x <%d,%d> (END: 0x%x)",
m_texture_inside_rt_cache.size(), bw, psm_str(psm), t->m_TEX0.TBP0, t->m_valid.z, t->m_valid.w, x_offset, y_offset, bp, tw, th, bp_end);
break;
}
}
if (dst != nullptr)
break;
}
if (dst != nullptr)
break;
// SWEEP MISS: no valid <x,y> offset found
while (m_texture_inside_rt_cache.size() > m_texture_inside_rt_cache_size)
{
GL_PERF("TC tex in rt: Size of cache %d too big, clearing it.", m_texture_inside_rt_cache.size());
m_texture_inside_rt_cache.clear();
}
GL_CACHE("TC tex in rt: Cached MISS element (size %d), BW: %d, PSM %s, rt 0x%x <%d,%d> -/-> 0x%x <%d,%d> (END: 0x%x)",
m_texture_inside_rt_cache.size(), bw, psm_str(psm), t->m_TEX0.TBP0, t->m_valid.z, t->m_valid.w, bp, tw, th, bp_end);
m_texture_inside_rt_cache.emplace_back(entry);
}
}
}
// Pure depth texture format will be fetched by LookupDepthSource.
// However guess what, some games (GoW) read the depth as a standard
// color format (instead of a depth format). All pixels are scrambled
// (because color and depth don't have same location). They don't care
// pixel will be several draw calls later.
//
// Sigh... They don't help us.
2021-04-12 09:31:30 +00:00
if (dst == NULL && m_can_convert_depth)
{
// Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase
//
// 1/ Check only current frame, I guess it is only used as a postprocessing effect
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[DepthStencil])
{
if (!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled");
// Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the
// rescaling of the current function.
2021-04-12 09:31:30 +00:00
if (psm_s.bpp > 8)
{
GIFRegTEX0 depth_TEX0;
depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u);
depth_TEX0.u32[1] = TEX0.u32[1];
return LookupDepthSource(depth_TEX0, TEXA, r);
2021-04-12 09:31:30 +00:00
}
else
{
return LookupDepthSource(TEX0, TEXA, r, true);
}
}
}
}
}
bool new_source = false;
2021-04-12 09:31:30 +00:00
if (src == NULL)
{
#ifdef ENABLE_OGL_DEBUG
2021-04-12 09:31:30 +00:00
if (dst)
{
2016-08-09 13:15:30 +00:00
GL_CACHE("TC: dst %s hit (%s): %d (0x%x, %s)", to_string(dst->m_type), half_right ? "half" : "full",
2021-04-12 09:31:30 +00:00
dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0, psm_str(TEX0.PSM));
}
else
{
2016-09-30 17:18:58 +00:00
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
}
#endif
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset);
new_source = true;
2021-04-12 09:31:30 +00:00
}
else
{
2016-10-05 22:11:35 +00:00
GL_CACHE("TC: src hit: %d (0x%x, 0x%x, %s)",
2021-04-12 09:31:30 +00:00
src->m_texture ? src->m_texture->GetID() : 0,
TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0,
psm_str(TEX0.PSM));
}
2021-04-12 09:31:30 +00:00
if (src->m_palette && !new_source && !src->ClutMatch({clut, psm_s.pal}))
{
AttachPaletteToSource(src, psm_s.pal, true);
}
src->Update(r);
m_src.m_used = true;
return src;
}
void GSTextureCache::ScaleTexture(GSTexture* texture)
{
if (!m_renderer->CanUpscale())
return;
float multiplier = static_cast<float>(m_renderer->GetUpscaleMultiplier());
bool custom_resolution = (multiplier == 0);
GSVector2 scale_factor(multiplier);
if (custom_resolution)
{
int width = m_renderer->GetDisplayRect().width();
int height = m_renderer->GetDisplayRect().height();
GSVector2i requested_resolution = m_renderer->GetCustomResolution();
scale_factor.x = static_cast<float>(requested_resolution.x) / width;
scale_factor.y = static_cast<float>(requested_resolution.y) / height;
}
texture->SetScale(scale_factor);
}
bool GSTextureCache::ShallSearchTextureInsideRt()
{
return m_texture_inside_rt || (m_renderer->m_game.flags & CRC::Flags::TextureInsideRt);
}
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, uint32 fbmask)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
uint32 bp = TEX0.TBP0;
Target* dst = NULL;
auto& list = m_dst[type];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
2021-04-12 09:31:30 +00:00
if (bp == t->m_TEX0.TBP0)
{
list.MoveFront(i.Index());
dst = t;
dst->m_32_bits_fmt |= (psm_s.bpp != 16);
dst->m_TEX0 = TEX0;
break;
}
}
2021-04-12 09:31:30 +00:00
if (dst)
{
GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, %s)", to_string(type), w, h, dst->m_texture->GetID(), bp, psm_str(TEX0.PSM));
dst->Update();
dst->m_dirty_alpha |= (psm_s.trbpp == 32 && (fbmask & 0xFF000000) != 0xFF000000) || (psm_s.trbpp == 16);
2021-04-12 09:31:30 +00:00
}
else if (m_can_convert_depth)
{
2015-05-15 13:12:49 +00:00
int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil;
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data.
Target* dst_match = nullptr;
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[rev_type])
{
if (bp == t->m_TEX0.TBP0)
{
if (t->m_age == 0)
{
dst_match = t;
break;
2021-04-12 09:31:30 +00:00
}
else if (t->m_age == 1)
{
dst_match = t;
}
}
}
2021-04-12 09:31:30 +00:00
if (dst_match)
{
GSVector4 sRect(0, 0, 1, 1);
GSVector4 dRect(0, 0, w, h);
dst = CreateTarget(TEX0, w, h, type);
dst->m_32_bits_fmt = dst_match->m_32_bits_fmt;
int shader;
bool fmt_16_bits = (psm_s.bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp == 16);
2021-04-12 09:31:30 +00:00
if (type == DepthStencil)
{
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + psm_s.fmt;
2021-04-12 09:31:30 +00:00
}
else
{
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s)", w, h, bp, psm_str(TEX0.PSM), psm_str(dst_match->m_TEX0.PSM));
shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8;
}
m_renderer->m_dev->StretchRect(dst_match->m_texture, sRect, dst->m_texture, dRect, shader, false);
}
}
2021-04-12 09:31:30 +00:00
if (dst == NULL)
{
GL_CACHE("TC: Lookup Target(%s) %dx%d, miss (0x%x, %s)", to_string(type), w, h, bp, psm_str(TEX0.PSM));
2015-05-15 13:12:49 +00:00
dst = CreateTarget(TEX0, w, h, type);
// In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory.
// In practice, TC will wrongly invalidate some RT. For example due to write on the alpha
// channel but colors is still valid. Unfortunately TC doesn't support the upload of data
// in target.
//
// Cleaning the code here will likely break several games. However it might reduce
// the noise in draw call debugging. It is the main reason to enable it on debug build.
//
// From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable.
bool supported_fmt = m_can_convert_depth || psm_s.depth == 0;
2021-04-12 09:31:30 +00:00
if (m_preload_frame && TEX0.TBW > 0 && supported_fmt)
{
GL_INS("Preloading the RT DATA");
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
int page0 = TEX0.TBP0 >> 5;
int max_page = (MAX_PAGES - page0);
int max_h = 32 * max_page / TEX0.TBW;
// h is likely smaller than w (true most of the time). Reduce the upload size (speed)
max_h = std::min<int>(max_h, TEX0.TBW * 64);
dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, max_h), TEX0.PSM));
dst->Update();
2021-04-12 09:31:30 +00:00
}
else
{
#ifdef ENABLE_OGL_DEBUG
switch (type) {
2021-04-12 09:31:30 +00:00
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture); break;
default: break;
}
#endif
}
}
ScaleTexture(dst->m_texture);
2021-04-12 09:31:30 +00:00
if (used)
{
dst->m_used = true;
}
return dst;
}
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h)
{
uint32 bp = TEX0.TBP0;
Target* dst = NULL;
#if 0
// Dump the list of targets for debug
for(auto t : m_dst[RenderTarget]) {
GL_INS("TC: frame 0x%x -> 0x%x : %d (age %d)", t->m_TEX0.TBP0, t->m_end_block, t->m_texture->GetID(), t->m_age);
}
#endif
// Let's try to find a perfect frame that contains valid data
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[RenderTarget])
{
if (bp == t->m_TEX0.TBP0 && t->m_end_block >= bp)
{
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM));
break;
}
}
// 2nd try ! Try to find a frame that include the bp
2021-04-12 09:31:30 +00:00
if (dst == NULL)
{
for (auto t : m_dst[RenderTarget])
{
if (t->m_TEX0.TBP0 < bp && bp <= t->m_end_block)
{
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block, psm_str(TEX0.PSM));
break;
}
}
}
// 3rd try ! Try to find a frame that doesn't contain valid data (honestly I'm not sure we need to do it)
2021-04-12 09:31:30 +00:00
if (dst == NULL)
{
for (auto t : m_dst[RenderTarget])
{
if (bp == t->m_TEX0.TBP0)
{
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s)", w, h, dst->m_texture->GetID(), bp, t->m_end_block, psm_str(TEX0.PSM));
break;
}
}
}
#if 0
for(auto t : m_dst[RenderTarget])
{
if(bp == t->m_TEX0.TBP0)
{
dst = t;
GL_CACHE("TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x)", w, h, dst->m_texture->GetID(), bp, t->m_end_block);
2015-05-15 13:12:49 +00:00
break;
}
else
{
// HACK: try to find something close to the base pointer
if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0xe00UL && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0))
{
GL_CACHE("TC: Lookup Frame %dx%d, close hit: %d (0x%x, took 0x%x -> 0x%x)", w, h, t->m_texture->GetID(), bp, t->m_TEX0.TBP0, t->m_end_block);
dst = t;
}
}
}
#endif
2021-04-12 09:31:30 +00:00
if (dst == NULL)
{
GL_CACHE("TC: Lookup Frame %dx%d, miss (0x%x %s)", w, h, bp, psm_str(TEX0.PSM));
2015-05-15 13:12:49 +00:00
dst = CreateTarget(TEX0, w, h, RenderTarget);
ScaleTexture(dst->m_texture);
m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage
2021-04-12 09:31:30 +00:00
if (m_preload_frame)
{
// Load GS data into frame. Game can directly uploads a background or the full image in
// "CTRC" buffer. It will also avoid various black screen issue in gs dump.
//
// Code is more or less an equivalent of the SW renderer
//
// Option is hidden and not enabled by default to avoid any regression
dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, real_h), TEX0.PSM));
dst->Update();
}
}
else
{
dst->Update();
}
dst->m_used = true;
dst->m_dirty_alpha = false;
return dst;
}
// Goal: Depth And Target at the same address is not possible. On GS it is
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
// must invalidate the Target/Depth respectively
void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
{
if (!m_can_convert_depth)
return;
auto& list = m_dst[type];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
2021-04-12 09:31:30 +00:00
if (bp == t->m_TEX0.TBP0)
{
GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
list.erase(i);
delete t;
break;
}
}
}
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory
2015-05-15 18:40:09 +00:00
void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target)
{
2021-04-12 09:31:30 +00:00
if (!off)
return; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
2015-05-15 18:40:09 +00:00
uint32 bp = off->bp;
uint32 bw = off->bw;
uint32 psm = off->psm;
2021-04-12 09:31:30 +00:00
if (!target)
{
// Remove Source that have same BP as the render target (color&dss)
// rendering will dirty the copy
auto& list = m_src.m_map[bp >> 5];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
Source* s = *i;
++i;
if (GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM) ||
GSUtil::HasSharedBits(bp, psm, s->m_from_target_TEX0.TBP0, s->m_TEX0.PSM))
{
m_src.RemoveAt(s);
}
}
uint32 bbp = bp + bw * 0x10;
2021-04-12 09:31:30 +00:00
if (bw >= 16 && bbp < 16384)
{
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
auto& list = m_src.m_map[bbp >> 5];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
Source* s = *i;
++i;
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM))
{
m_src.RemoveAt(s);
}
}
}
// Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380
// Note: the game only does a 0 direct write. If some games expect some real data
// we are screwed.
2021-04-12 09:31:30 +00:00
if (m_renderer->m_game.title == CRC::HauntingGround)
{
uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats
auto type = RenderTarget;
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[type])
{
2021-04-12 09:31:30 +00:00
if (t->m_TEX0.TBP0 > bp && t->m_end_block <= end_block)
{
// Haunting ground expect to clean buffer B with a rendering into buffer A.
// Situation is quite messy as it would require to extract the data from the buffer A
// and to move in buffer B.
//
// Of course buffers don't share the same line width. You can't delete the buffer as next
// miss will load invalid data.
//
// So just clear the damn buffer and forget about it.
GL_CACHE("TC: Clear Sub Target(%s) %d (0x%x)", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
m_renderer->m_dev->ClearRenderTarget(t->m_texture, 0);
}
}
}
}
GSVector4i r;
uint32* pages = (uint32*)m_temp;
2015-05-15 18:40:09 +00:00
off->GetPages(rect, pages, &r);
bool found = false;
2021-04-12 09:31:30 +00:00
for (const uint32* p = pages; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
auto& list = m_src.m_map[page];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
Source* s = *i;
++i;
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
bool b = bp == s->m_TEX0.TBP0;
2021-04-12 09:31:30 +00:00
if (!s->m_target)
{
2021-04-12 09:31:30 +00:00
if (m_disable_partial_invalidation && s->m_repeating)
{
m_src.RemoveAt(s);
}
else
{
uint32* RESTRICT valid = s->m_valid;
// Invalidate data of input texture
2021-04-12 09:31:30 +00:00
if (s->m_repeating)
{
// Note: very hot path on snowbling engine game
2021-04-12 09:31:30 +00:00
for (const GSVector2i& k : s->m_p2t[page])
{
valid[k.x] &= k.y;
}
}
else
{
valid[page] = 0;
}
s->m_complete = false;
found |= b;
}
}
else
{
// render target used as input texture
b |= bp == s->m_from_target_TEX0.TBP0;
if (!b)
b = s->Overlaps(bp, bw, psm, rect);
2021-04-12 09:31:30 +00:00
if (b)
{
m_src.RemoveAt(s);
}
}
}
}
}
2021-04-12 09:31:30 +00:00
if (!target)
return;
2021-04-12 09:31:30 +00:00
for (int type = 0; type < 2; type++)
{
auto& list = m_dst[type];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
auto j = i++;
Target* t = *j;
// GH: (I think) this code is completely broken. Typical issue:
// EE write an alpha channel into 32 bits texture
// Results: the target is deleted (because HasCompatibleBits is false)
//
// Major issues are expected if the game try to reuse the target
// If we dirty the RT, it will likely upload partially invalid data.
// (The color on the previous example)
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
2021-04-12 09:31:30 +00:00
if (!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
GL_CACHE("TC: Dirty Target(%s) %d (0x%x) r(%d,%d,%d,%d)", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0, r.x, r.y, r.z, r.w);
t->m_dirty.push_back(GSDirtyRect(r, psm));
t->m_TEX0.TBW = bw;
}
else
{
list.erase(j);
GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
delete t;
continue;
}
2021-04-12 09:31:30 +00:00
}
else if (bp == t->m_TEX0.TBP0)
{
// EE writes the ALPHA channel. Mark it as invalid for
// the texture cache. Otherwise it will generate a wrong
// hit on the texture cache.
// Game: Conflict - Desert Storm (flickering)
t->m_dirty_alpha = false;
}
// GH: Try to detect texture write that will overlap with a target buffer
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if (bp < t->m_TEX0.TBP0)
{
uint32 rowsize = bw * 8192;
uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256);
2021-04-12 09:31:30 +00:00
if (rowsize > 0 && offset % rowsize == 0)
{
int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
2021-04-12 09:31:30 +00:00
if (r.bottom > y)
{
GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
// TODO: do not add this rect above too
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm));
t->m_TEX0.TBW = bw;
continue;
}
}
}
// FIXME: this code "fixes" black FMV issue with rule of rose.
#if 1
// Greg: I'm not sure the 'bw' equality is required but it won't hurt too much
//
// Ben 10 Alien Force : Vilgax Attacks uses a small temporary target for multiple textures (different bw)
// It is too complex to handle, and purpose of the code was to handle FMV (large bw). So let's skip small
// (128 pixels) target
2021-04-12 09:31:30 +00:00
if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
{
uint32 rowsize = bw * 8192u;
uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
2021-04-12 09:31:30 +00:00
if (rowsize > 0 && offset % rowsize == 0)
{
int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
GL_CACHE("TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0, t->m_end_block,
r.left, r.top + y, r.right, r.bottom + y, bw);
t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top + y, r.right, r.bottom + y), psm));
t->m_TEX0.TBW = bw;
continue;
}
}
#endif
}
}
}
}
// Goal: retrive the data from the GPU to the GS memory.
// Called each time you want to read from the GS memory
2015-05-15 18:40:09 +00:00
void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
{
2015-05-15 18:40:09 +00:00
uint32 bp = off->bp;
uint32 psm = off->psm;
//uint32 bw = off->bw;
// No depth handling please.
2021-04-12 09:31:30 +00:00
if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S)
{
GL_INS("ERROR: InvalidateLocalMem depth format isn't supported (%d,%d to %d,%d)", r.x, r.y, r.z, r.w);
2021-04-12 09:31:30 +00:00
if (m_can_convert_depth)
{
for (auto t : m_dst[DepthStencil])
{
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
Read(t, r.rintersect(t->m_valid));
}
}
}
return;
}
// This is a shorter but potentially slower version of the below, commented out code.
// It works for all the games mentioned below and fixes a couple of other ones as well
// (Busen0: Wizardry and Chaos Legion).
// Also in a few games the below code ran the Grandia3 case when it shouldn't :p
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[RenderTarget])
{
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
{
2021-04-12 09:31:30 +00:00
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
// GH Note: Read will do a StretchRect and then will sizzle data to the GS memory
// t->m_valid will do the full target texture whereas r.intersect(t->m_valid) will be limited
// to the useful part for the transfer.
// 1/ Logically intersect must be enough, except if we miss some call to InvalidateLocalMem
// or it need the depth part too
// 2/ Read function is slow but I suspect the swizzle part to be costly. Maybe a compute shader
// that do the swizzle at the same time of the Stretching could save CPU computation.
// note: r.rintersect breaks Wizardry and Chaos Legion
// Read(t, t->m_valid) works in all tested games but is very slow in GUST titles ><
2021-04-12 09:31:30 +00:00
if (GSTextureCache::m_disable_partial_invalidation)
{
Read(t, r.rintersect(t->m_valid));
2021-04-12 09:31:30 +00:00
}
else
{
if (r.x == 0 && r.y == 0) // Full screen read?
Read(t, t->m_valid);
else // Block level read?
Read(t, r.rintersect(t->m_valid));
}
}
2021-04-12 09:31:30 +00:00
}
else
{
GL_INS("ERROR: InvalidateLocalMem target is a depth format");
}
}
//GSTextureCache::Target* rt2 = NULL;
//int ymin = INT_MAX;
//for(auto i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
//{
// auto j = i++;
// Target* t = *j;
// if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
// {
2021-04-12 09:31:30 +00:00
// if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
// {
2021-04-12 09:31:30 +00:00
// if (GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
// {
// Read(t, r.rintersect(t->m_valid));
// return;
// }
2021-04-12 09:31:30 +00:00
// else if (psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S))
// {
// // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit
// Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid));
// return;
// }
// else
// {
// if (psm == PSM_PSMT4HH && t->m_TEX0.PSM == PSM_PSMCT32)
// {
// // Silent Hill Origins shadows: Read 8 bit using only the HIGH bits (4 bit) texture as 32 bit.
// Read(t, r.rintersect(t->m_valid));
// return;
// }
// else
// {
// //printf("Trashing render target. We have a %d type texture and we are trying to write into a %d type texture\n", t->m_TEX0.PSM, psm);
// m_dst[RenderTarget].erase(j);
// delete t;
// }
// }
// }
// // Grandia3, FFX, FFX-2 pause menus. t->m_TEX0.TBP0 magic number checks because otherwise kills xs2 videos
2021-04-12 09:31:30 +00:00
// if ((GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (bp > t->m_TEX0.TBP0))
// && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584)))
// {
// //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0);
// uint32 rowsize = bw * 8192;
// uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
2021-04-12 09:31:30 +00:00
// if (rowsize > 0 && offset % rowsize == 0)
// {
// int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
2021-04-12 09:31:30 +00:00
// if (y < ymin && y < 512)
// {
// rt2 = t;
// ymin = y;
// }
// }
// }
// }
//}
2021-04-12 09:31:30 +00:00
//if (rt2)
//{
// Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin));
//}
// TODO: ds
}
// Hack: remove Target that are strictly included in current rt. Typically uses for FMV
// For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800
// FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's
// postpone it for later.
void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt)
{
if (!rt)
return;
auto& list = m_dst[RenderTarget];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
Target* t = *i;
2021-04-12 09:31:30 +00:00
if ((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW) && (t->m_TEX0.TBP0 < t->m_end_block))
{
GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x",
2021-04-12 09:31:30 +00:00
rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block);
i = list.erase(i);
delete t;
2021-04-12 09:31:30 +00:00
}
else
{
++i;
}
}
}
void GSTextureCache::IncAge()
{
int maxage = m_src.m_used ? 3 : 30;
// You can't use m_map[page] because Source* are duplicated on several pages.
2021-04-12 09:31:30 +00:00
for (auto i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end();)
{
Source* s = *i;
2021-04-12 09:31:30 +00:00
if (s->m_shared_texture)
{
// Shared textures are temporary only added in the hash set but not in the texture
// cache list therefore you can't use RemoveAt
i = m_src.m_surfaces.erase(i);
delete s;
2021-04-12 09:31:30 +00:00
}
else
{
++i;
2021-04-12 09:31:30 +00:00
if (++s->m_age > maxage)
{
m_src.RemoveAt(s);
}
}
}
m_src.m_used = false;
// Clearing of Rendertargets causes flickering in many scene transitions.
// Sigh, this seems to be used to invalidate surfaces. So set a huge maxage to avoid flicker,
// but still invalidate surfaces. (Disgaea 2 fmv when booting the game through the BIOS)
// Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions.
maxage = 400; // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it
2021-04-12 09:31:30 +00:00
for (int type = 0; type < 2; type++)
{
auto& list = m_dst[type];
2021-04-12 09:31:30 +00:00
for (auto i = list.begin(); i != list.end();)
{
Target* t = *i;
// This variable is used to detect the texture shuffle effect. There is a high
// probability that game will do it on the current RT.
// Variable is cleared here to avoid issue with game that uses a 16 bits
// render target
2021-04-12 09:31:30 +00:00
if (t->m_age > 0)
{
// GoW2 uses the effect at the start of the frame
t->m_32_bits_fmt = false;
}
2021-04-12 09:31:30 +00:00
if (++t->m_age > maxage)
{
i = list.erase(i);
GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type),
2021-04-12 09:31:30 +00:00
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
delete t;
2021-04-12 09:31:30 +00:00
}
else
{
++i;
}
}
}
}
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
Source* src = new Source(m_renderer, TEX0, TEXA, m_temp);
int tw = 1 << TEX0.TW;
int th = 1 << TEX0.TH;
2014-04-14 18:25:02 +00:00
//int tp = TEX0.TBW << 6;
bool hack = false;
if (dst && (x_offset != 0 || y_offset != 0))
{
GSVector2 scale = dst->m_texture->GetScale();
int x = (int)(scale.x * x_offset);
int y = (int)(scale.y * y_offset);
int w = (int)(scale.x * tw);
int h = (int)(scale.y * th);
GSTexture* sTex = dst->m_texture;
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h);
GSVector4i area(x, y, x + w, y + h);
m_renderer->m_dev->CopyRect(sTex, dTex, area);
// Keep a trace of origin of the texture
src->m_texture = dTex;
src->m_target = true;
src->m_from_target = dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_texture->SetScale(scale);
src->m_end_block = dst->m_end_block;
2021-04-12 09:31:30 +00:00
if (psm.pal > 0)
{
// Attach palette for GPU texture conversion
AttachPaletteToSource(src, psm.pal, true);
}
}
else if (dst && static_cast<GSRendererHW*>(m_renderer)->IsDummyTexture())
{
// This shortcut is a temporary solution. It isn't a good solution
// as it won't work with Channel Shuffle/Texture Shuffle pattern
// (we need texture cache result to detect those effects).
// Instead a better solution would be to defer the copy/StrechRect later
// in the rendering.
// Still this poor solution is enough for a huge speed up in a couple of games
//
// Be aware that you can't use StrechRect between BeginScene/EndScene.
// So it could be tricky to put in the middle of the DrawPrims
// Texture is created to keep code compatibility
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(tw, th);
// Keep a trace of origin of the texture
src->m_texture = dTex;
src->m_target = true;
src->m_from_target = dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_end_block = dst->m_end_block;
// Even if we sample the framebuffer directly we might need the palette
// to handle the format conversion on GPU
if (psm.pal > 0)
AttachPaletteToSource(src, psm.pal, true);
}
else if (dst)
{
// TODO: clean up this mess
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
bool is_8bits = TEX0.PSM == PSM_PSMT8;
2021-04-12 09:31:30 +00:00
if (is_8bits)
{
GL_INS("Reading RT as a packed-indexed 8 bits format");
shader = ShaderConvert_RGBA_TO_8I;
}
#ifdef ENABLE_OGL_DEBUG
2021-04-12 09:31:30 +00:00
if (TEX0.PSM == PSM_PSMT4)
{
GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported");
}
#endif
2021-04-12 09:31:30 +00:00
if (GSLocalMemory::m_psm[TEX0.PSM].bpp > 8)
{
src->m_32_bits_fmt = dst->m_32_bits_fmt;
}
// Keep a trace of origin of the texture
src->m_target = true;
src->m_from_target = dst->m_texture;
src->m_from_target_TEX0 = dst->m_TEX0;
src->m_valid_rect = dst->m_valid;
src->m_end_block = dst->m_end_block;
dst->Update();
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(dst->m_texture->GetScale().x * tw);
int h = (int)(dst->m_texture->GetScale().y * th);
2021-04-12 09:31:30 +00:00
if (is_8bits)
{
// Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw;
h = th;
}
GSVector2i dstsize = dst->m_texture->GetSize();
// pitch conversion
2021-04-12 09:31:30 +00:00
if (dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM
{
// This is so broken :p
////Better not do the code below, "fixes" like every game that ever gets here..
////Edit: Ratchet and Clank needs this to show most of it's graphics at all.
////Someone else fix this please, I can't :p
////delete src; return NULL;
//// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
//ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
//src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y, false);
//GSVector4 size = GSVector4(dstsize).xyxy();
//GSVector4 scale = GSVector4(dst->m_texture->GetScale()).xyxy();
//int blockWidth = 64;
//int blockHeight = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
//GSVector4i br(0, 0, blockWidth, blockHeight);
//int sw = (int)dst->m_TEX0.TBW << 6;
//int dw = (int)TEX0.TBW << 6;
//int dh = 1 << TEX0.TH;
2021-04-12 09:31:30 +00:00
//if (sw != 0)
//for (int dy = 0; dy < dh; dy += blockHeight)
//{
2021-04-12 09:31:30 +00:00
// for (int dx = 0; dx < dw; dx += blockWidth)
// {
2015-05-15 18:40:09 +00:00
// int off = dy * dw / blockHeight + dx;
2015-05-15 18:40:09 +00:00
// int sx = off % sw;
// int sy = off / sw;
2015-05-15 18:47:14 +00:00
// GSVector4 sRect = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
2015-05-15 18:49:25 +00:00
// GSVector4 dRect = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
2015-05-15 18:49:25 +00:00
// m_renderer->m_dev->StretchRect(dst->m_texture, sRect, src->m_texture, dRect);
// // TODO: this is quite a lot of StretchRect, do it with one Draw
// }
//}
}
2021-04-12 09:31:30 +00:00
else if (tw < 1024)
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
hack = true;
2021-04-12 09:31:30 +00:00
//if (tw == 256 && th == 128 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00))
//{
// delete src;
// return NULL;
//}
}
// width/height conversion
GSVector2 scale = dst->m_texture->GetScale();
2015-05-15 18:49:25 +00:00
GSVector4 dRect(0, 0, w, h);
// Lengthy explanation of the rescaling code.
// Here an example in 2x:
// RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling)
//
// PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels)
// So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is
// too small.
//
// So we will only limit the resize to the available data in RT.
// Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture
// size (which is 1280x1024) (i.e. 800x512)
// From the rendering point of view. UV coordinate will be normalized on the real GS texture size
// This way it can be used on an upscaled texture without extra scaling factor (only requirement is
// to have same proportion)
//
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
// I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less
// than ideal.
// 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong
// 2/ It doesn't support rescaling of the RT (tw = 1024)
// Maybe it will be more easy to just round the UV value in the Vertex Shader
2021-04-12 09:31:30 +00:00
if (!is_8bits)
{
// 8 bits handling is special due to unscaling. It is better to not execute this code
if (w > dstsize.x)
{
scale.x = (float)dstsize.x / tw;
dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x;
w = dstsize.x;
}
if (h > dstsize.y)
{
scale.y = (float)dstsize.y / th;
dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y;
h = dstsize.y;
}
}
2015-05-15 18:47:14 +00:00
GSVector4 sRect(0, 0, w, h);
// Don't be fooled by the name. 'dst' is the old target (hence the input)
// 'src' is the new texture cache entry (hence the output)
GSTexture* sTex = dst->m_texture;
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h);
src->m_texture = dTex;
// GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format
// However it is different here. We want to reuse a Render Target as a texture.
// Because the texture is already on the GPU, CPU can't convert it.
2021-04-12 09:31:30 +00:00
if (psm.pal > 0)
{
AttachPaletteToSource(src, psm.pal, true);
}
// Disable linear filtering for various GS post-processing effect
// 1/ Palette is used to interpret the alpha channel of the RT as an index.
// Star Ocean 3 uses it to emulate a stencil buffer.
// 2/ Z formats are a bad idea to interpolate (discontinuties).
// 3/ 16 bits buffer is used to move data from a channel to another.
//
// I keep linear filtering for standard color even if I'm not sure that it is
// working correctly.
// Indeed, texture is reduced so you need to read all covered pixels (9 in 3x)
// to correctly interpolate the value. Linear interpolation is likely acceptable
// only in 2x scaling
//
// Src texture will still be bilinear interpolated so I'm really not sure
// that we need to do it here too.
//
// Future note: instead to do
// RT 2048x2048 -> T 1024x1024 -> RT 2048x2048
// We can maybe sample directly a bigger texture
// RT 2048x2048 -> T 2048x2048 -> RT 2048x2048
// Pro: better quality. Copy instead of StretchRect (must be faster)
// Cons: consume more memory
//
// In distant future: investigate to reuse the RT directly without any
// copy. Likely a speed boost and memory usage reduction.
bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24);
if ((sRect == dRect).alltrue() && !shader)
{
2021-04-12 09:31:30 +00:00
if (half_right)
{
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
// which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture
// so the only reliable way to find the real size of the target is to use the TBW value.
float real_width = dst->m_TEX0.TBW * 64u * dst->m_texture->GetScale().x;
2021-04-12 09:31:30 +00:00
m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i((int)(real_width / 2.0f), 0, (int)real_width, h));
}
else
{
m_renderer->m_dev->CopyRect(sTex, dTex, GSVector4i(0, 0, w, h)); // <= likely wrong dstsize.x could be bigger than w
}
}
else
{
// Different size or not the same format
2015-05-15 18:47:14 +00:00
sRect.z /= sTex->GetWidth();
sRect.w /= sTex->GetHeight();
2021-04-12 09:31:30 +00:00
if (half_right)
{
sRect.x = sRect.z / 2.0f;
}
m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, shader, linear);
}
2021-04-12 09:31:30 +00:00
if (src->m_texture)
src->m_texture->SetScale(scale);
else
ASSERT(0);
// Offset hack. Can be enabled via GSdx options.
// The offset will be used in Draw().
float modx = 0.0f;
float mody = 0.0f;
2021-04-12 09:31:30 +00:00
if (UserHacks_HalfPixelOffset && hack)
{
switch(m_renderer->GetUpscaleMultiplier())
{
2021-04-12 09:31:30 +00:00
case 0: //Custom Resolution
{
const float offset = 0.2f;
modx = dst->m_texture->GetScale().x + offset;
mody = dst->m_texture->GetScale().y + offset;
dst->m_texture->LikelyOffset = true;
break;
}
case 2: modx = 2.2f; mody = 2.2f; dst->m_texture->LikelyOffset = true; break;
case 3: modx = 3.1f; mody = 3.1f; dst->m_texture->LikelyOffset = true; break;
case 4: modx = 4.2f; mody = 4.2f; dst->m_texture->LikelyOffset = true; break;
case 5: modx = 5.3f; mody = 5.3f; dst->m_texture->LikelyOffset = true; break;
case 6: modx = 6.2f; mody = 6.2f; dst->m_texture->LikelyOffset = true; break;
case 8: modx = 8.2f; mody = 8.2f; dst->m_texture->LikelyOffset = true; break;
default: modx = 0.0f; mody = 0.0f; dst->m_texture->LikelyOffset = false; break;
}
}
dst->m_texture->OffsetHack_modx = modx;
dst->m_texture->OffsetHack_mody = mody;
}
else
{
if (m_paltex && psm.pal > 0)
{
src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat());
AttachPaletteToSource(src, psm.pal, true);
}
2021-04-12 09:31:30 +00:00
else
{
src->m_texture = m_renderer->m_dev->CreateTexture(tw, th);
2021-04-12 09:31:30 +00:00
if (psm.pal > 0)
{
AttachPaletteToSource(src, psm.pal, false);
}
}
}
ASSERT(src->m_texture);
m_src.Add(src, TEX0, m_renderer->m_context->offset.tex);
return src;
}
GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type)
{
ASSERT(type == RenderTarget || type == DepthStencil);
Target* t = new Target(m_renderer, TEX0, m_temp, m_can_convert_depth);
// FIXME: initial data should be unswizzled from local mem in Update() if dirty
t->m_type = type;
2021-04-12 09:31:30 +00:00
if (type == RenderTarget)
{
t->m_texture = m_renderer->m_dev->CreateSparseRenderTarget(w, h);
t->m_used = true; // FIXME
}
2021-04-12 09:31:30 +00:00
else if (type == DepthStencil)
{
t->m_texture = m_renderer->m_dev->CreateSparseDepthStencil(w, h);
}
m_dst[type].push_front(t);
return t;
}
void GSTextureCache::PrintMemoryUsage()
{
#ifdef ENABLE_OGL_DEBUG
2021-04-12 09:31:30 +00:00
uint32 tex = 0;
uint32 tex_rt = 0;
2021-04-12 09:31:30 +00:00
uint32 rt = 0;
uint32 dss = 0;
for (auto s : m_src.m_surfaces)
{
if (s && !s->m_shared_texture)
{
if (s->m_target)
tex_rt += s->m_texture->GetMemUsage();
else
2021-04-12 09:31:30 +00:00
tex += s->m_texture->GetMemUsage();
}
}
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[RenderTarget])
{
if (t)
rt += t->m_texture->GetMemUsage();
}
2021-04-12 09:31:30 +00:00
for (auto t : m_dst[DepthStencil])
{
if (t)
dss += t->m_texture->GetMemUsage();
}
GL_PERF("MEM: RO Tex %dMB. RW Tex %dMB. Target %dMB. Depth %dMB", tex >> 20u, tex_rt >> 20u, rt >> 20u, dss >> 20u);
#endif
}
// GSTextureCache::Surface
GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp)
: m_renderer(r)
, m_texture(NULL)
, m_age(0)
, m_temp(temp)
, m_32_bits_fmt(false)
, m_shared_texture(false)
, m_end_block(0)
{
m_TEX0.TBP0 = 0x3fff;
}
GSTextureCache::Surface::~Surface()
{
// Shared textures are pointers copy. Therefore no allocation
// to recycle.
if (!m_shared_texture)
m_renderer->m_dev->Recycle(m_texture);
}
void GSTextureCache::Surface::UpdateAge()
{
m_age = 0;
}
bool GSTextureCache::Surface::Inside(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect)
{
// Valid only for color formats.
uint32 const end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw);
return bp >= m_TEX0.TBP0 && end_block <= m_end_block;
}
bool GSTextureCache::Surface::Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect)
{
// Valid only for color formats.
uint32 const end_block = GSLocalMemory::m_psm[psm].bn(rect.z - 1, rect.w - 1, bp, bw);
return (m_TEX0.TBP0 <= bp && bp <= m_end_block)
2021-04-12 09:31:30 +00:00
|| (m_TEX0.TBP0 <= end_block && end_block <= m_end_block);
}
// GSTextureCache::Source
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container)
: Surface(r, temp)
, m_palette_obj(nullptr)
, m_palette(nullptr)
, m_valid_rect(0, 0)
, m_target(false)
, m_complete(false)
, m_p2t(NULL)
, m_from_target(NULL)
, m_from_target_TEX0(TEX0)
{
m_TEX0 = TEX0;
m_TEXA = TEXA;
2021-04-12 09:31:30 +00:00
if (dummy_container)
{
// Dummy container only contain a m_texture that is a pointer to another source.
m_write.rect = NULL;
m_write.count = 0;
m_repeating = false;
2021-04-12 09:31:30 +00:00
}
else
{
memset(m_layer_TEX0, 0, sizeof(m_layer_TEX0));
memset(m_valid, 0, sizeof(m_valid));
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
m_write.count = 0;
m_repeating = m_TEX0.IsRepeating();
2021-04-12 09:31:30 +00:00
if (m_repeating)
{
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
}
GSOffset* off = m_renderer->m_context->offset.tex;
m_pages_as_bit = off->GetPagesAsBits(m_TEX0);
}
}
GSTextureCache::Source::~Source()
{
_aligned_free(m_write.rect);
}
void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
{
Surface::UpdateAge();
2021-04-12 09:31:30 +00:00
if (layer == 0 && (m_complete || m_target))
{
return;
}
2016-05-05 13:32:21 +00:00
const GSVector2i& bs = GSLocalMemory::m_psm[m_TEX0.PSM].bs;
int tw = std::max<int>(1 << m_TEX0.TW, bs.x);
int th = std::max<int>(1 << m_TEX0.TH, bs.y);
GSVector4i r = rect.ralign<Align_Outside>(bs);
2021-04-12 09:31:30 +00:00
if (layer == 0 && r.eq(GSVector4i(0, 0, tw, th)))
{
m_complete = true; // lame, but better than nothing
}
2015-05-15 18:40:09 +00:00
const GSOffset* off = m_renderer->m_context->offset.tex;
uint32 blocks = 0;
2021-04-12 09:31:30 +00:00
if (m_repeating)
{
2021-04-12 09:31:30 +00:00
for (int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = off->block.row[y >> 3u];
2021-04-12 09:31:30 +00:00
for (int x = r.left, i = (y << 7) + x; x < r.right; x += bs.x, i += bs.x)
{
uint32 block = base + off->block.col[x >> 3u];
2021-04-12 09:31:30 +00:00
if (block < MAX_BLOCKS || m_wrap_gs_mem)
{
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
2021-04-12 09:31:30 +00:00
if ((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
Write(GSVector4i(x, y, x + bs.x, y + bs.y), layer);
blocks++;
}
}
}
}
}
else
{
2021-04-12 09:31:30 +00:00
for (int y = r.top; y < r.bottom; y += bs.y)
{
uint32 base = off->block.row[y >> 3u];
2021-04-12 09:31:30 +00:00
for (int x = r.left; x < r.right; x += bs.x)
{
uint32 block = base + off->block.col[x >> 3u];
2021-04-12 09:31:30 +00:00
if (block < MAX_BLOCKS || m_wrap_gs_mem)
{
block %= MAX_BLOCKS;
uint32 row = block >> 5u;
uint32 col = 1 << (block & 31u);
2021-04-12 09:31:30 +00:00
if ((m_valid[row] & col) == 0)
{
m_valid[row] |= col;
Write(GSVector4i(x, y, x + bs.x, y + bs.y), layer);
blocks++;
}
}
}
}
}
2021-04-12 09:31:30 +00:00
if (blocks > 0)
{
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * blocks << (m_palette ? 2 : 0));
Flush(m_write.count, layer);
}
}
void GSTextureCache::Source::UpdateLayer(const GIFRegTEX0& TEX0, const GSVector4i& rect, int layer)
{
if (layer > 6)
return;
if (m_target) // Yeah keep dreaming
return;
if (TEX0 == m_layer_TEX0[layer])
return;
GIFRegTEX0 old_TEX0 = m_TEX0;
m_layer_TEX0[layer] = TEX0;
m_TEX0 = TEX0;
Update(rect, layer);
m_TEX0 = old_TEX0;
}
void GSTextureCache::Source::Write(const GSVector4i& r, int layer)
{
m_write.rect[m_write.count++] = r;
2021-04-12 09:31:30 +00:00
while (m_write.count >= 2)
{
GSVector4i& a = m_write.rect[m_write.count - 2];
GSVector4i& b = m_write.rect[m_write.count - 1];
2021-04-12 09:31:30 +00:00
if ((a == b.zyxw()).mask() == 0xfff0)
{
a.right = b.right; // extend right
m_write.count--;
}
2021-04-12 09:31:30 +00:00
else if ((a == b.xwzy()).mask() == 0xff0f)
{
a.bottom = b.bottom; // extend down
m_write.count--;
}
else
{
break;
}
}
2021-04-12 09:31:30 +00:00
if (m_write.count > 2)
{
Flush(1, layer);
}
}
void GSTextureCache::Source::Flush(uint32 count, int layer)
{
// This function as written will not work for paletted formats copied from framebuffers
// because they are 8 or 4 bit formats on the GS and the GS local memory module reads
// these into an 8 bit format while the D3D surfaces are 32 bit.
// However the function is never called for these cases. This is just for information
// should someone wish to use this function for these cases later.
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
GSVector4i tr(0, 0, tw, th);
2017-05-26 16:30:44 +00:00
int pitch = std::max(tw, psm.bs.x) * sizeof(uint32);
GSLocalMemory& mem = m_renderer->m_mem;
2015-05-15 18:40:09 +00:00
const GSOffset* off = m_renderer->m_context->offset.tex;
GSLocalMemory::readTexture rtx = psm.rtx;
2021-04-12 09:31:30 +00:00
if (m_palette)
{
pitch >>= 2;
rtx = psm.rtxP;
}
uint8* buff = m_temp;
2021-04-12 09:31:30 +00:00
for (uint32 i = 0; i < count; i++)
{
GSVector4i r = m_write.rect[i];
2021-04-12 09:31:30 +00:00
if ((r > tr).mask() & 0xff00)
{
2015-05-15 18:40:09 +00:00
(mem.*rtx)(off, r, buff, pitch, m_TEXA);
m_texture->Update(r.rintersect(tr), buff, pitch, layer);
}
else
{
GSTexture::GSMap m;
2021-04-12 09:31:30 +00:00
if (m_texture->Map(m, &r, layer))
{
(mem.*rtx)(off, r, m.bits, m.pitch, m_TEXA);
m_texture->Unmap();
}
else
{
(mem.*rtx)(off, r, buff, pitch, m_TEXA);
m_texture->Update(r, buff, pitch, layer);
}
}
}
2021-04-12 09:31:30 +00:00
if (count < m_write.count)
{
// Warning src and destination overlap. Memmove must be used instead of memcpy
memmove(&m_write.rect[0], &m_write.rect[count], (m_write.count - count) * sizeof(m_write.rect[0]));
}
m_write.count -= count;
}
2021-04-12 09:31:30 +00:00
bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key)
{
return PaletteKeyEqual()(palette_key, m_palette_obj->GetPaletteKey());
}
// GSTextureCache::Target
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported)
: Surface(r, temp)
, m_type(-1)
, m_used(false)
, m_depth_supported(depth_supported)
{
m_TEX0 = TEX0;
m_32_bits_fmt |= (GSLocalMemory::m_psm[TEX0.PSM].trbpp != 16);
m_dirty_alpha = GSLocalMemory::m_psm[TEX0.PSM].trbpp != 24;
m_valid = GSVector4i::zero();
}
void GSTextureCache::Target::Update()
{
Surface::UpdateAge();
// FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :)
// GH: it must be doable
// 1/ rescale the new t to the good size
// 2/ copy each rectangle (rescale the rectangle) (use CopyRect or multiple vertex)
// Alternate
// 1/ uses multiple vertex rectangle
GSVector2i t_size = default_rt_size;
// Ensure buffer width is at least of the minimum required value.
// Probably not necessary but doesn't hurt to be on the safe side.
// I've seen some games use buffer sizes over 1024, which might bypass our default limit
int buffer_width = m_TEX0.TBW << 6;
t_size.x = std::max(buffer_width, t_size.x);
GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, t_size);
2021-04-12 09:31:30 +00:00
if (r.rempty())
return;
// No handling please
2021-04-12 09:31:30 +00:00
if ((m_type == DepthStencil) && !m_depth_supported)
{
// do the most likely thing a direct write would do, clear it
GL_INS("ERROR: Update DepthStencil dummy");
return;
2021-04-12 09:31:30 +00:00
}
else if (m_type == DepthStencil && m_renderer->m_game.title == CRC::FFX2)
{
GL_INS("ERROR: bad invalidation detected, depth buffer will be cleared");
// FFX2 menu. Invalidation of the depth is wrongly done and only the first
// page is invalidated. Technically a CRC hack will be better but I don't expect
// any games to only upload a single page of data for the depth.
//
// FFX2 menu got another bug. I'm not sure the top-left is properly written or not. It
// could be a gsdx transfer bug too due to unaligned-page transfer.
//
// So the quick and dirty solution is just to clean the depth buffer.
m_renderer->m_dev->ClearDepth(m_texture);
return;
}
int w = r.width();
int h = r.height();
GIFRegTEXA TEXA;
TEXA.AEM = 1;
TEXA.TA0 = 0;
TEXA.TA1 = 0x80;
GSTexture* t = m_renderer->m_dev->CreateTexture(w, h);
const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM);
GSTexture::GSMap m;
2021-04-12 09:31:30 +00:00
if (t->Map(m))
{
2021-04-12 09:31:30 +00:00
m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA);
t->Unmap();
}
else
{
int pitch = ((w + 3) & ~3) * 4;
m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA);
t->Update(r.rsize(), m_temp, pitch);
}
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4);
// Copy the new GS memory content into the destination texture.
2021-04-12 09:31:30 +00:00
if (m_type == RenderTarget)
{
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy());
}
2021-04-12 09:31:30 +00:00
else if (m_type == DepthStencil)
{
GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0);
// FIXME linear or not?
m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), ShaderConvert_RGBA8_TO_FLOAT32);
}
m_renderer->m_dev->Recycle(t);
}
void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect)
{
m_valid = m_valid.runion(rect);
// Block of the bottom right texel of the validity rectangle, last valid block of the texture
2021-04-12 09:31:30 +00:00
m_end_block = GSLocalMemory::m_psm[m_TEX0.PSM].bn(m_valid.z - 1, m_valid.w - 1, m_TEX0.TBP0, m_TEX0.TBW); // Valid only for color formats
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
}
// GSTextureCache::SourceMap
void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset* off)
{
m_surfaces.insert(s);
2021-04-12 09:31:30 +00:00
if (s->m_target)
{
// TODO
// GH: I don't know why but it seems we only consider the first page for a render target
size_t page = TEX0.TBP0 >> 5;
s->m_erase_it[page] = m_map[page].InsertFront(s);
return;
}
// The source pointer will be stored/duplicated in all m_map[array of pages]
2021-04-12 09:31:30 +00:00
for (size_t i = 0; i < countof(m_pages); i++)
{
2021-04-12 09:31:30 +00:00
if (uint32 p = s->m_pages_as_bit[i])
{
auto* m = &m_map[i << 5];
auto* e = &s->m_erase_it[i << 5];
unsigned long j;
2021-04-12 09:31:30 +00:00
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
e[j] = m[j].InsertFront(s);
}
}
}
}
void GSTextureCache::SourceMap::RemoveAll()
{
2021-04-12 09:31:30 +00:00
for (auto s : m_surfaces)
delete s;
m_surfaces.clear();
2021-04-12 09:31:30 +00:00
for (size_t i = 0; i < countof(m_map); i++)
{
m_map[i].clear();
}
}
void GSTextureCache::SourceMap::RemoveAt(Source* s)
{
m_surfaces.erase(s);
GL_CACHE("TC: Remove Src Texture: %d (0x%x)",
2021-04-12 09:31:30 +00:00
s->m_texture ? s->m_texture->GetID() : 0,
s->m_TEX0.TBP0);
if (s->m_target)
{
const size_t page = s->m_TEX0.TBP0 >> 5;
m_map[page].EraseIndex(s->m_erase_it[page]);
}
else
{
2021-04-12 09:31:30 +00:00
for (size_t i = 0; i < countof(m_pages); i++)
{
2021-04-12 09:31:30 +00:00
if (uint32 p = s->m_pages_as_bit[i])
{
auto* m = &m_map[i << 5];
const auto* e = &s->m_erase_it[i << 5];
unsigned long j;
2021-04-12 09:31:30 +00:00
while (_BitScanForward(&j, p))
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
m[j].EraseIndex(e[j]);
}
}
}
}
delete s;
}
void GSTextureCache::AttachPaletteToSource(Source* s, uint16 pal, bool need_gs_texture)
{
s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture);
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
}
// GSTextureCache::Palette
GSTextureCache::Palette::Palette(const GSRenderer* renderer, uint16 pal, bool need_gs_texture)
: m_pal(pal)
, m_tex_palette(nullptr)
, m_renderer(renderer)
{
uint16 palette_size = pal * sizeof(uint32);
m_clut = (uint32*)_aligned_malloc(palette_size, 64);
memcpy(m_clut, (const uint32*)m_renderer->m_mem.m_clut, palette_size);
2021-04-12 09:31:30 +00:00
if (need_gs_texture)
{
InitializeTexture();
}
}
2021-04-12 09:31:30 +00:00
GSTextureCache::Palette::~Palette()
{
m_renderer->m_dev->Recycle(m_tex_palette);
_aligned_free(m_clut);
}
2021-04-12 09:31:30 +00:00
GSTexture* GSTextureCache::Palette::GetPaletteGSTexture()
{
return m_tex_palette;
}
2021-04-12 09:31:30 +00:00
GSTextureCache::PaletteKey GSTextureCache::Palette::GetPaletteKey()
{
return {m_clut, m_pal};
}
2021-04-12 09:31:30 +00:00
void GSTextureCache::Palette::InitializeTexture()
{
if (!m_tex_palette)
{
// A palette texture is always created with dimensions 256x1 (also in the case that m_pal is 16, thus a 16x1 texture
// would be enough to store the CLUT data) because the coordinates that the shader uses for
// sampling such texture are always normalized by 255.
// This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255),
// and therefore will read texel 15/255 * texture size).
m_tex_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0]));
}
}
// GSTextureCache::PaletteKeyHash
// Hashes the content of the clut.
// The hashing function is implemented by taking two things into account:
// 1) The clut can be an array of 16 or 256 uint32 (depending on the pal parameter) and in order to speed up the computation of the hash
// the array is hashed in blocks of 16 uint32, so for clut of size 16 uint32 the hashing is computed in one pass and for clut of 256 uint32
// it is computed in 16 passes,
// 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function
// is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended.
2021-04-12 09:31:30 +00:00
std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey& key) const
{
uint16 pal = key.pal;
const uint32* clut = key.clut;
ASSERT((pal & 15) == 0);
size_t clut_hash = 3831179159;
2021-04-12 09:31:30 +00:00
for (uint16 i = 0; i < pal; i += 16)
{
clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011);
clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467);
clut_hash = (clut_hash + 3659574209) ^ (clut[i + 2] + 577038523);
clut_hash = (clut_hash + 33644011) ^ (clut[i + 3] + 3491555267);
clut_hash = (clut_hash + 777771959) ^ (clut[i + 4] + 3301075993);
clut_hash = (clut_hash + 4019618579) ^ (clut[i + 5] + 4186992613);
clut_hash = (clut_hash + 3465668953) ^ (clut[i + 6] + 3043435883);
clut_hash = (clut_hash + 3494478943) ^ (clut[i + 7] + 3441897883);
clut_hash = (clut_hash + 3432010979) ^ (clut[i + 8] + 2167922789);
clut_hash = (clut_hash + 1570862863) ^ (clut[i + 9] + 3401920591);
clut_hash = (clut_hash + 1002648679) ^ (clut[i + 10] + 1293530519);
clut_hash = (clut_hash + 551381741) ^ (clut[i + 11] + 2539834039);
clut_hash = (clut_hash + 3768974459) ^ (clut[i + 12] + 169943507);
clut_hash = (clut_hash + 862380703) ^ (clut[i + 13] + 2906932549);
clut_hash = (clut_hash + 3433082137) ^ (clut[i + 14] + 4234384109);
clut_hash = (clut_hash + 2679083843) ^ (clut[i + 15] + 2719605247);
}
return clut_hash;
};
// GSTextureCache::PaletteKeyEqual
2021-04-12 09:31:30 +00:00
bool GSTextureCache::PaletteKeyEqual::operator()(const PaletteKey& lhs, const PaletteKey& rhs) const
{
if (lhs.pal != rhs.pal)
{
return false;
}
return GSVector4i::compare64(lhs.clut, rhs.clut, lhs.pal * sizeof(lhs.clut[0]));
};
// GSTextureCache::PaletteMap
GSTextureCache::PaletteMap::PaletteMap(const GSRenderer* renderer)
: m_renderer(renderer)
{
2021-04-12 09:31:30 +00:00
for (auto& map : m_maps)
{
map.reserve(MAX_SIZE);
}
}
2021-04-12 09:31:30 +00:00
std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture)
{
ASSERT(pal == 16 || pal == 256);
// Choose which hash map search into:
// pal == 16 : index 0
// pal == 256 : index 1
auto& map = m_maps[pal == 16 ? 0 : 1];
const uint32* clut = (const uint32*)m_renderer->m_mem.m_clut;
// Create PaletteKey for searching into map (clut is actually not copied, so do not store this key into the map)
2021-04-12 09:31:30 +00:00
PaletteKey palette_key = {clut, pal};
auto it1 = map.find(palette_key);
2021-04-12 09:31:30 +00:00
if (it1 != map.end())
{
// Clut content match, HIT
2021-04-12 09:31:30 +00:00
if (need_gs_texture && !it1->second->GetPaletteGSTexture())
{
// Generate GSTexture and upload clut content if needed and not done yet
it1->second->InitializeTexture();
}
return it1->second;
}
// No palette with matching clut content, MISS
2021-04-12 09:31:30 +00:00
if (map.size() > MAX_SIZE)
{
// If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one
GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(uint32), map.size(), MAX_SIZE);
uint32 current_size = map.size();
2021-04-12 09:31:30 +00:00
for (auto it = map.begin(); it != map.end();)
{
// If the palette is unused, there is only one shared pointers holding a reference to the unused Palette object,
// and this shared pointer is the one stored in the map itself
2021-04-12 09:31:30 +00:00
if (it->second.use_count() <= 1)
{
// Palette is unused
it = map.erase(it); // Erase element from map
2021-04-12 09:31:30 +00:00
// The palette object should now be gone as the shared pointer to the object in the map is deleted
}
2021-04-12 09:31:30 +00:00
else
{
++it;
}
}
uint32 cleared_palette_count = current_size - (uint32)map.size();
2021-04-12 09:31:30 +00:00
if (cleared_palette_count == 0)
{
GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(uint32), map.size(), MAX_SIZE);
}
2021-04-12 09:31:30 +00:00
else
{
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(uint32), map.size(), cleared_palette_count);
}
}
std::shared_ptr<Palette> palette = std::make_shared<Palette>(m_renderer, pal, need_gs_texture);
2021-04-12 09:31:30 +00:00
map.emplace(palette->GetPaletteKey(), palette);
GL_CACHE("TC, %u-bit PaletteMap (Size %u): Added new palette.", pal * sizeof(uint32), map.size());
2021-04-12 09:31:30 +00:00
return palette;
}
2021-04-12 09:31:30 +00:00
void GSTextureCache::PaletteMap::Clear()
{
for (auto& map : m_maps)
{
map.clear(); // Clear all the nodes of the map, deleting Palette objects managed by shared pointers as they should be unused elsewhere
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
}
}