2010-04-25 00:31:27 +00:00
/*
2009-02-09 21:15:56 +00:00
* Copyright ( C ) 2007 - 2009 Gabest
* http : //www.gabest.org
*
* This Program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 , or ( at your option )
* any later version .
2010-04-25 00:31:27 +00:00
*
2009-02-09 21:15:56 +00:00
* This Program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2010-04-25 00:31:27 +00:00
*
2009-02-09 21:15:56 +00:00
* You should have received a copy of the GNU General Public License
* along with GNU Make ; see the file COPYING . If not , write to
2012-09-09 18:16:11 +00:00
* the Free Software Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA USA .
2009-02-09 21:15:56 +00:00
* http : //www.gnu.org/copyleft/gpl.html
*
*/
2011-02-19 03:36:30 +00:00
# include "stdafx.h"
2018-11-16 18:41:37 +00:00
# include "Renderers/Common/GSTextureCache.h"
2016-12-05 21:55:53 +00:00
# include "GSUtil.h"
2009-05-22 01:22:52 +00:00
2016-03-20 17:05:36 +00:00
bool GSTextureCache : : m_disable_partial_invalidation = false ;
2017-01-22 21:27:38 +00:00
bool GSTextureCache : : m_wrap_gs_mem = false ;
2015-08-08 13:15:51 +00:00
2009-05-22 01:22:52 +00:00
GSTextureCache : : GSTextureCache ( GSRenderer * r )
2018-12-10 20:18:47 +00:00
: m_renderer ( r )
, m_palette_map ( r )
2009-05-22 01:22:52 +00:00
{
2016-05-24 19:52:06 +00:00
if ( theApp . GetConfigB ( " UserHacks " ) ) {
m_spritehack = theApp . GetConfigI ( " UserHacks_SpriteHack " ) ;
2017-02-26 15:09:55 +00:00
UserHacks_HalfPixelOffset = theApp . GetConfigI ( " UserHacks_HalfPixelOffset " ) = = 1 ;
2016-05-24 19:52:06 +00:00
m_preload_frame = theApp . GetConfigB ( " preload_frame_with_gs_data " ) ;
m_disable_partial_invalidation = theApp . GetConfigB ( " UserHacks_DisablePartialInvalidation " ) ;
m_can_convert_depth = ! theApp . GetConfigB ( " UserHacks_DisableDepthSupport " ) ;
2017-10-04 21:50:36 +00:00
m_cpu_fb_conversion = theApp . GetConfigB ( " UserHacks_CPU_FB_Conversion " ) ;
2016-10-06 23:43:04 +00:00
m_texture_inside_rt = theApp . GetConfigB ( " UserHacks_TextureInsideRt " ) ;
2017-02-04 13:50:36 +00:00
m_wrap_gs_mem = theApp . GetConfigB ( " wrap_gs_mem " ) ;
2016-04-10 11:22:55 +00:00
} else {
m_spritehack = 0 ;
UserHacks_HalfPixelOffset = false ;
m_preload_frame = false ;
m_disable_partial_invalidation = false ;
m_can_convert_depth = true ;
2017-10-04 21:50:36 +00:00
m_cpu_fb_conversion = false ;
2016-10-06 23:43:04 +00:00
m_texture_inside_rt = false ;
2017-02-04 13:50:36 +00:00
m_wrap_gs_mem = false ;
2016-04-10 11:22:55 +00:00
}
2011-02-19 03:36:30 +00:00
2016-05-24 19:52:06 +00:00
m_paltex = theApp . GetConfigB ( " paltex " ) ;
2016-12-05 21:55:53 +00:00
m_crc_hack_level = theApp . GetConfigT < CRCHackLevel > ( " crc_hack_level " ) ;
if ( m_crc_hack_level = = CRCHackLevel : : Automatic )
m_crc_hack_level = GSUtil : : GetRecommendedCRCHackLevel ( theApp . GetCurrentRendererType ( ) ) ;
2015-12-28 20:13:29 +00:00
2015-12-30 18:13:30 +00:00
// In theory 4MB is enough but 9MB is safer for overflow (8MB
// isn't enough in custom resolution)
2015-12-28 20:13:29 +00:00
// Test: onimusha 3 PAL 60Hz
2015-12-30 18:13:30 +00:00
m_temp = ( uint8 * ) _aligned_malloc ( 9 * 1024 * 1024 , 32 ) ;
2009-05-22 01:22:52 +00:00
}
GSTextureCache : : ~ GSTextureCache ( )
{
RemoveAll ( ) ;
2011-02-19 03:36:30 +00:00
_aligned_free ( m_temp ) ;
2009-05-22 01:22:52 +00:00
}
2015-05-13 18:00:25 +00:00
void GSTextureCache : : RemovePartial ( )
{
//m_src.RemoveAll();
for ( int type = 0 ; type < 2 ; type + + )
{
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ type ] ) delete t ;
2015-05-13 18:00:25 +00:00
m_dst [ type ] . clear ( ) ;
}
}
2009-05-22 01:22:52 +00:00
void GSTextureCache : : RemoveAll ( )
{
2009-06-27 03:32:33 +00:00
m_src . RemoveAll ( ) ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
for ( int type = 0 ; type < 2 ; type + + )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ type ] ) delete t ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
m_dst [ type ] . clear ( ) ;
2009-05-22 01:22:52 +00:00
}
2018-11-04 21:06:24 +00:00
m_palette_map . Clear ( ) ;
2009-05-22 01:22:52 +00:00
}
2016-04-29 19:00:32 +00:00
GSTextureCache : : Source * GSTextureCache : : LookupDepthSource ( const GIFRegTEX0 & TEX0 , const GIFRegTEXA & TEXA , const GSVector4i & r , bool palette )
2016-04-21 17:13:47 +00:00
{
2019-01-18 13:53:06 +00:00
if ( ! m_can_convert_depth ) {
2017-01-16 16:51:02 +00:00
GL_CACHE ( " LookupDepthSource not supported (0x%x, F:0x%x) " , TEX0 . TBP0 , TEX0 . PSM ) ;
2018-04-12 19:24:49 +00:00
if ( m_renderer - > m_game . title = = CRC : : JackieChanAdv | | m_renderer - > m_game . title = = CRC : : SVCChaos ) {
// JackieChan and SVCChaos cause regressions when skipping the draw calls when depth is disabled/not supported.
// This way we make sure there are no regressions on D3D as well.
return LookupSource ( TEX0 , TEXA , r ) ;
} else {
throw GSDXRecoverableError ( ) ;
}
2017-01-16 16:51:02 +00:00
}
2016-04-29 19:00:32 +00:00
const GSLocalMemory : : psm_t & psm_s = GSLocalMemory : : m_psm [ TEX0 . PSM ] ;
2016-04-21 17:13:47 +00:00
Source * src = NULL ;
Target * dst = NULL ;
// Check only current frame, I guess it is only used as a postprocessing effect
uint32 bp = TEX0 . TBP0 ;
uint32 psm = TEX0 . PSM ;
2016-04-29 19:00:32 +00:00
2016-04-21 17:13:47 +00:00
for ( auto t : m_dst [ DepthStencil ] ) {
2017-01-11 21:10:41 +00:00
if ( t - > m_used & & t - > m_dirty . empty ( ) & & GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) )
2016-04-21 17:13:47 +00:00
{
ASSERT ( GSLocalMemory : : m_psm [ t - > m_TEX0 . PSM ] . depth ) ;
2017-01-11 21:10:41 +00:00
if ( t - > m_age = = 0 ) {
// Perfect Match
dst = t ;
break ;
} else if ( t - > m_age = = 1 ) {
// Better than nothing (Full Spectrum Warrior)
dst = t ;
}
2016-04-21 17:13:47 +00:00
}
}
if ( ! dst ) {
// Retry on the render target (Silent Hill 4)
for ( auto t : m_dst [ RenderTarget ] ) {
2017-01-11 21:10:41 +00:00
// FIXME: do I need to allow m_age == 1 as a potential match (as DepthStencil) ???
2016-04-21 17:13:47 +00:00
if ( ! t - > m_age & & t - > m_used & & t - > m_dirty . empty ( ) & & GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) )
{
ASSERT ( GSLocalMemory : : m_psm [ t - > m_TEX0 . PSM ] . depth ) ;
dst = t ;
break ;
}
}
}
if ( dst ) {
2016-08-03 16:07:54 +00:00
GL_CACHE ( " TC depth: dst %s hit: %d (0x%x, %s) " , to_string ( dst - > m_type ) ,
2016-04-21 17:13:47 +00:00
dst - > m_texture ? dst - > m_texture - > GetID ( ) : 0 ,
2016-08-03 16:07:54 +00:00
TEX0 . TBP0 , psm_str ( psm ) ) ;
2016-04-21 17:13:47 +00:00
// Create a shared texture source
2016-04-24 20:30:56 +00:00
src = new Source ( m_renderer , TEX0 , TEXA , m_temp , true ) ;
2016-04-21 17:13:47 +00:00
src - > m_texture = dst - > m_texture ;
src - > m_shared_texture = true ;
src - > m_target = true ; // So renderer can check if a conversion is required
2016-04-30 14:07:45 +00:00
src - > m_from_target = dst - > m_texture ; // avoid complex condition on the renderer
2016-04-21 17:13:47 +00:00
src - > m_32_bits_fmt = dst - > m_32_bits_fmt ;
// Insert the texture in the hash set to keep track of it. But don't bother with
// texture cache list. It means that a new Source is created everytime we need it.
// If it is too expensive, one could cut memory allocation in Source constructor for this
// use case.
2016-04-29 19:00:32 +00:00
if ( palette ) {
2018-11-13 18:22:13 +00:00
AttachPaletteToSource ( src , psm_s . pal , true ) ;
2016-04-29 19:00:32 +00:00
}
2016-04-21 17:13:47 +00:00
m_src . m_surfaces . insert ( src ) ;
} else {
2016-08-03 16:07:54 +00:00
GL_CACHE ( " TC depth: ERROR miss (0x%x, %s) " , TEX0 . TBP0 , psm_str ( psm ) ) ;
2016-04-21 17:13:47 +00:00
// Possible ? In this case we could call LookupSource
// Or just put a basic texture
// src->m_texture = m_renderer->m_dev->CreateTexture(tw, th);
// In all cases rendering will be broken
//
// Note: might worth to check previous frame
// Note: otherwise return NULL and skip the draw
2018-04-12 19:24:49 +00:00
if ( m_renderer - > m_game . title = = CRC : : JackieChanAdv | | m_renderer - > m_game . title = = CRC : : SVCChaos ) {
// JackieChan and SVCChaos cause regressions when skipping the draw calls so we reuse the old code for these two.
return LookupSource ( TEX0 , TEXA , r ) ;
} else {
// Full Spectrum Warrior: first draw call of cut-scene rendering
// The game tries to emulate a texture shuffle with an old depth buffer
// (don't exists yet for us due to the cache)
// Rendering is nicer (less garbage) if we skip the draw call.
throw GSDXRecoverableError ( ) ;
}
2017-01-13 20:02:14 +00:00
2016-04-21 17:13:47 +00:00
//ASSERT(0);
}
return src ;
}
2009-06-27 03:32:33 +00:00
GSTextureCache : : Source * GSTextureCache : : LookupSource ( const GIFRegTEX0 & TEX0 , const GIFRegTEXA & TEXA , const GSVector4i & r )
2009-05-22 01:22:52 +00:00
{
2016-04-29 19:00:32 +00:00
const GSLocalMemory : : psm_t & psm_s = GSLocalMemory : : m_psm [ TEX0 . PSM ] ;
2014-04-14 18:25:02 +00:00
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
2012-06-19 00:12:35 +00:00
2015-08-08 13:15:51 +00:00
// Until DX is fixed
2016-09-25 14:19:27 +00:00
if ( psm_s . pal > 0 )
m_renderer - > m_mem . m_clut . Read32 ( TEX0 , TEXA ) ;
2012-06-19 00:12:35 +00:00
2009-06-27 03:32:33 +00:00
const uint32 * clut = m_renderer - > m_mem . m_clut ;
2010-04-25 00:31:27 +00:00
2009-06-27 03:32:33 +00:00
Source * src = NULL ;
2009-05-22 01:22:52 +00:00
2017-08-04 18:37:44 +00:00
auto & m = m_src . m_map [ TEX0 . TBP0 > > 5 ] ;
2009-05-22 01:22:52 +00:00
2017-08-04 18:37:44 +00:00
for ( auto i = m . begin ( ) ; i ! = m . end ( ) ; + + i )
2009-05-22 01:22:52 +00:00
{
2009-07-12 13:46:05 +00:00
Source * s = * i ;
2009-05-22 01:22:52 +00:00
2015-08-08 13:15:51 +00:00
if ( ( ( TEX0 . u32 [ 0 ] ^ s - > m_TEX0 . u32 [ 0 ] ) | ( ( TEX0 . u32 [ 1 ] ^ s - > m_TEX0 . u32 [ 1 ] ) & 3 ) ) ! = 0 ) // TBP0 TBW PSM TW TH
2009-06-27 03:32:33 +00:00
continue ;
2009-05-22 01:22:52 +00:00
2015-08-08 13:15:51 +00:00
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
if ( ! s - > m_target ) {
2016-04-29 19:00:32 +00:00
// We request a palette texture (psm_s.pal). If the texture was
2018-12-10 20:56:29 +00:00
// converted by the CPU (!s->m_palette), we need to ensure
2015-08-08 13:15:51 +00:00
// palette content is the same.
2018-12-10 20:56:29 +00:00
if ( psm_s . pal > 0 & & ! s - > m_palette & & ! s - > ClutMatch ( { clut , psm_s . pal } ) )
2015-08-08 13:15:51 +00:00
continue ;
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
// the CPU. We need to check that TEXA is identical
2016-04-29 19:00:32 +00:00
if ( psm_s . pal = = 0 & & psm_s . fmt > 0 & & s - > m_TEXA . u64 ! = TEXA . u64 )
2015-08-08 13:15:51 +00:00
continue ;
2009-05-22 01:22:52 +00:00
}
2017-08-04 18:37:44 +00:00
m . MoveFront ( i . Index ( ) ) ;
2009-07-12 13:46:05 +00:00
2009-06-27 03:32:33 +00:00
src = s ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
break ;
2009-05-22 01:22:52 +00:00
}
2009-06-27 03:32:33 +00:00
Target * dst = NULL ;
2015-05-13 18:03:04 +00:00
bool half_right = false ;
2016-10-06 23:43:04 +00:00
int x_offset = 0 ;
int y_offset = 0 ;
2009-05-22 01:22:52 +00:00
2011-12-16 19:13:58 +00:00
# ifdef DISABLE_HW_TEXTURE_CACHE
2011-05-09 01:36:57 +00:00
if ( 0 )
# else
2009-06-27 03:32:33 +00:00
if ( src = = NULL )
2011-05-09 01:36:57 +00:00
# endif
2009-05-22 01:22:52 +00:00
{
2009-07-04 15:14:04 +00:00
uint32 bp = TEX0 . TBP0 ;
uint32 psm = TEX0 . PSM ;
2016-10-06 23:43:04 +00:00
uint32 bw = TEX0 . TBW ;
int tw = 1 < < TEX0 . TW ;
int th = 1 < < TEX0 . TH ;
uint32 bp_end = psm_s . bn ( tw - 1 , th - 1 , bp , bw ) ;
2011-12-23 20:58:10 +00:00
// Arc the Lad finds the wrong surface here when looking for a depth stencil.
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
2014-10-07 17:11:43 +00:00
2011-12-23 20:58:10 +00:00
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.)
2014-10-07 17:11:43 +00:00
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ RenderTarget ] ) {
2015-06-05 21:50:38 +00:00
if ( t - > m_used & & t - > m_dirty . empty ( ) ) {
2015-06-25 17:09:26 +00:00
// Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written
// 2/ RT used as 24 bits => no update of alpha channel
// 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false
// because of the previous draw call format
//
// Solution: consider the RT as 32 bits if the alpha was used in the past
uint32 t_psm = ( t - > m_dirty_alpha ) ? t - > m_TEX0 . PSM & ~ 0x1 : t - > m_TEX0 . PSM ;
if ( GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t_psm ) ) {
2017-10-04 21:50:36 +00:00
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will be slow but
// 1/ it just works :)
// 2/ even with upscaling
2018-09-15 12:27:04 +00:00
// 3/ for both Direct3D and OpenGL
2017-10-04 21:50:36 +00:00
if ( m_cpu_fb_conversion & & ( psm = = PSM_PSMT4 | | psm = = PSM_PSMT8 ) )
// Forces 4-bit and 8-bit frame buffer conversion to be done on the CPU instead of the GPU, but performance will be slower.
2018-11-25 12:35:00 +00:00
// There is no dedicated shader to handle 4-bit conversion (Stuntman has been confirmed to use 4-bit).
// Direct3D10/11 and OpenGL support 8-bit fb conversion but don't render some corner cases properly (Harry Potter games).
2017-10-04 21:50:36 +00:00
// The hack can fix glitches in some games.
Read ( t , t - > m_valid ) ;
else
2015-06-25 07:12:03 +00:00
dst = t ;
2015-05-13 18:03:04 +00:00
2015-06-05 21:50:38 +00:00
break ;
2015-05-13 18:03:04 +00:00
2015-06-05 21:50:38 +00:00
} else if ( ( t - > m_TEX0 . TBW > = 16 ) & & GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 + t - > m_TEX0 . TBW * 0x10 , t - > m_TEX0 . PSM ) ) {
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
half_right = true ;
dst = t ;
2009-05-22 01:22:52 +00:00
2015-06-05 21:50:38 +00:00
break ;
2017-10-04 21:50:36 +00:00
2016-10-06 23:43:04 +00:00
} else if ( m_texture_inside_rt & & psm = = PSM_PSMCT32 & & bw = = 1 & & bp_end < t - > m_end_block & & t - > m_TEX0 . TBP0 < bp ) {
// Note bw == 1 until we find a generic formulae below
dst = t ;
2015-06-05 21:50:38 +00:00
2016-10-06 23:43:04 +00:00
uint32 delta = bp - t - > m_TEX0 . TBP0 ;
uint32 delta_p = delta / 32 ;
uint32 delta_b = delta % 32 ;
// FIXME
x_offset = ( delta_p % bw ) * psm_s . pgs . x ;
y_offset = ( delta_p / bw ) * psm_s . pgs . y ;
static int block32_offset_x [ 32 ] = {
0 , 1 , 0 , 1 ,
2 , 3 , 2 , 3 ,
0 , 1 , 0 , 1 ,
2 , 3 , 2 , 3 ,
4 , 5 , 4 , 5 ,
6 , 7 , 6 , 7 ,
4 , 5 , 4 , 5 ,
6 , 7 , 6 , 7 ,
} ;
static int block32_offset_y [ 32 ] = {
0 , 0 , 1 , 1 ,
0 , 0 , 1 , 1 ,
2 , 2 , 3 , 3 ,
2 , 2 , 3 , 3 ,
0 , 0 , 1 , 1 ,
0 , 0 , 1 , 1 ,
2 , 2 , 3 , 3 ,
2 , 2 , 3 , 3 ,
} ;
x_offset + = block32_offset_x [ delta_b ] * psm_s . bs . x ;
y_offset + = block32_offset_y [ delta_b ] * psm_s . bs . y ;
GL_INS ( " WARNING middle of framebuffer 0x%x => 0x%x. Offset %d,%d " , t - > m_TEX0 . TBP0 , t - > m_end_block , x_offset , y_offset ) ;
}
2015-06-05 21:50:38 +00:00
}
}
2016-04-21 17:13:47 +00:00
// Pure depth texture format will be fetched by LookupDepthSource.
// However guess what, some games (GoW) read the depth as a standard
// color format (instead of a depth format). All pixels are scrambled
// (because color and depth don't have same location). They don't care
// pixel will be several draw calls later.
//
// Sigh... They don't help us.
2019-01-18 13:53:06 +00:00
if ( dst = = NULL & & m_can_convert_depth ) {
2015-06-05 21:50:38 +00:00
// Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase
//
// 1/ Check only current frame, I guess it is only used as a postprocessing effect
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ DepthStencil ] ) {
2015-06-05 21:50:38 +00:00
if ( ! t - > m_age & & t - > m_used & & t - > m_dirty . empty ( ) & & GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) )
{
2016-04-21 17:13:47 +00:00
GL_INS ( " TC: Warning depth format read as color format. Pixels will be scrambled " ) ;
2016-04-24 18:30:54 +00:00
// Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the
// rescaling of the current function.
2016-04-29 19:00:32 +00:00
if ( psm_s . bpp > 8 ) {
GIFRegTEX0 depth_TEX0 ;
depth_TEX0 . u32 [ 0 ] = TEX0 . u32 [ 0 ] | ( 0x30u < < 20u ) ;
depth_TEX0 . u32 [ 1 ] = TEX0 . u32 [ 1 ] ;
return LookupDepthSource ( depth_TEX0 , TEXA , r ) ;
} else {
return LookupDepthSource ( TEX0 , TEXA , r , true ) ;
}
2009-06-27 03:32:33 +00:00
}
2009-05-22 01:22:52 +00:00
}
}
}
2018-12-10 20:56:29 +00:00
bool new_source = false ;
2009-06-27 03:32:33 +00:00
if ( src = = NULL )
2009-05-22 01:22:52 +00:00
{
2015-05-13 06:49:32 +00:00
# ifdef ENABLE_OGL_DEBUG
if ( dst ) {
2016-08-09 13:15:30 +00:00
GL_CACHE ( " TC: dst %s hit (%s): %d (0x%x, %s) " , to_string ( dst - > m_type ) , half_right ? " half " : " full " ,
2015-05-13 06:49:32 +00:00
dst - > m_texture ? dst - > m_texture - > GetID ( ) : 0 ,
2016-08-03 16:07:54 +00:00
TEX0 . TBP0 , psm_str ( TEX0 . PSM ) ) ;
2015-05-13 06:49:32 +00:00
} else {
2016-09-30 17:18:58 +00:00
GL_CACHE ( " TC: src miss (0x%x, 0x%x, %s) " , TEX0 . TBP0 , psm_s . pal > 0 ? TEX0 . CBP : 0 , psm_str ( TEX0 . PSM ) ) ;
2015-05-13 06:49:32 +00:00
}
# endif
2016-10-06 23:43:04 +00:00
src = CreateSource ( TEX0 , TEXA , dst , half_right , x_offset , y_offset ) ;
2018-12-10 20:56:29 +00:00
new_source = true ;
2009-05-22 01:22:52 +00:00
2015-05-13 06:49:32 +00:00
} else {
2016-10-05 22:11:35 +00:00
GL_CACHE ( " TC: src hit: %d (0x%x, 0x%x, %s) " ,
2015-05-13 06:49:32 +00:00
src - > m_texture ? src - > m_texture - > GetID ( ) : 0 ,
2016-10-05 22:11:35 +00:00
TEX0 . TBP0 , psm_s . pal > 0 ? TEX0 . CBP : 0 ,
psm_str ( TEX0 . PSM ) ) ;
2009-05-22 01:22:52 +00:00
}
2018-12-10 20:56:29 +00:00
if ( src - > m_palette & & ! new_source & & ! src - > ClutMatch ( { clut , psm_s . pal } ) ) {
2018-11-13 18:22:13 +00:00
AttachPaletteToSource ( src , psm_s . pal , true ) ;
2009-06-27 03:32:33 +00:00
}
2009-06-17 11:24:42 +00:00
2011-04-25 18:18:21 +00:00
src - > Update ( r ) ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
m_src . m_used = true ;
2009-06-09 10:13:28 +00:00
2009-06-27 03:32:33 +00:00
return src ;
}
2009-06-20 20:28:36 +00:00
2016-10-13 09:08:47 +00:00
void GSTextureCache : : ScaleTexture ( GSTexture * texture )
{
if ( ! m_renderer - > CanUpscale ( ) )
return ;
float multiplier = static_cast < float > ( m_renderer - > GetUpscaleMultiplier ( ) ) ;
bool custom_resolution = ( multiplier = = 0 ) ;
GSVector2 scale_factor ( multiplier ) ;
if ( custom_resolution )
{
2017-01-23 04:36:37 +00:00
int width = m_renderer - > GetDisplayRect ( ) . width ( ) ;
int height = m_renderer - > GetDisplayRect ( ) . height ( ) ;
2016-10-13 09:08:47 +00:00
GSVector2i requested_resolution = m_renderer - > GetCustomResolution ( ) ;
scale_factor . x = static_cast < float > ( requested_resolution . x ) / width ;
scale_factor . y = static_cast < float > ( requested_resolution . y ) / height ;
}
texture - > SetScale ( scale_factor ) ;
}
2016-10-03 18:34:04 +00:00
GSTextureCache : : Target * GSTextureCache : : LookupTarget ( const GIFRegTEX0 & TEX0 , int w , int h , int type , bool used , uint32 fbmask )
2009-06-27 03:32:33 +00:00
{
2016-05-05 10:17:57 +00:00
const GSLocalMemory : : psm_t & psm_s = GSLocalMemory : : m_psm [ TEX0 . PSM ] ;
2009-07-04 15:14:04 +00:00
uint32 bp = TEX0 . TBP0 ;
2009-06-27 03:32:33 +00:00
Target * dst = NULL ;
2009-05-22 01:22:52 +00:00
2017-08-04 18:37:44 +00:00
auto & list = m_dst [ type ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; + + i ) {
2009-06-27 03:32:33 +00:00
Target * t = * i ;
2009-05-22 01:22:52 +00:00
2009-07-04 15:14:04 +00:00
if ( bp = = t - > m_TEX0 . TBP0 )
2009-06-27 03:32:33 +00:00
{
2017-08-04 18:37:44 +00:00
list . MoveFront ( i . Index ( ) ) ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
dst = t ;
2009-05-22 01:22:52 +00:00
2016-05-05 10:17:57 +00:00
dst - > m_32_bits_fmt | = ( psm_s . bpp ! = 16 ) ;
2009-07-31 23:59:06 +00:00
dst - > m_TEX0 = TEX0 ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
break ;
2009-05-22 01:22:52 +00:00
}
}
2015-06-05 21:51:13 +00:00
if ( dst ) {
2016-08-03 16:07:54 +00:00
GL_CACHE ( " TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, %s) " , to_string ( type ) , w , h , dst - > m_texture - > GetID ( ) , bp , psm_str ( TEX0 . PSM ) ) ;
2015-06-05 21:51:13 +00:00
dst - > Update ( ) ;
2015-06-25 17:09:26 +00:00
2016-10-03 18:34:04 +00:00
dst - > m_dirty_alpha | = ( psm_s . trbpp = = 32 & & ( fbmask & 0xFF000000 ) ! = 0xFF000000 ) | | ( psm_s . trbpp = = 16 ) ;
2015-06-25 17:09:26 +00:00
2019-01-18 13:53:06 +00:00
} else if ( m_can_convert_depth ) {
2015-05-15 13:12:49 +00:00
2015-06-08 07:44:47 +00:00
int rev_type = ( type = = DepthStencil ) ? RenderTarget : DepthStencil ;
2015-06-05 21:51:13 +00:00
2015-06-08 07:44:47 +00:00
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data.
2016-10-14 20:03:29 +00:00
Target * dst_match = nullptr ;
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ rev_type ] ) {
2016-10-14 20:03:29 +00:00
if ( bp = = t - > m_TEX0 . TBP0 ) {
if ( t - > m_age = = 0 ) {
dst_match = t ;
break ;
} else if ( t - > m_age = = 1 ) {
2016-10-15 16:08:00 +00:00
dst_match = t ;
2015-06-05 21:51:13 +00:00
}
2016-10-14 20:03:29 +00:00
}
}
2015-06-08 07:44:47 +00:00
2016-10-14 20:03:29 +00:00
if ( dst_match ) {
GSVector4 sRect ( 0 , 0 , 1 , 1 ) ;
GSVector4 dRect ( 0 , 0 , w , h ) ;
dst = CreateTarget ( TEX0 , w , h , type ) ;
dst - > m_32_bits_fmt = dst_match - > m_32_bits_fmt ;
int shader ;
bool fmt_16_bits = ( psm_s . bpp = = 16 & & GSLocalMemory : : m_psm [ dst_match - > m_TEX0 . PSM ] . bpp = = 16 ) ;
if ( type = = DepthStencil ) {
GL_CACHE ( " TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, %s was %s) " , w , h , bp , psm_str ( TEX0 . PSM ) , psm_str ( dst_match - > m_TEX0 . PSM ) ) ;
shader = ( fmt_16_bits ) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + psm_s . fmt ;
} else {
GL_CACHE ( " TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, %s was %s) " , w , h , bp , psm_str ( TEX0 . PSM ) , psm_str ( dst_match - > m_TEX0 . PSM ) ) ;
shader = ( fmt_16_bits ) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8 ;
2015-06-05 21:51:13 +00:00
}
2016-10-14 20:03:29 +00:00
m_renderer - > m_dev - > StretchRect ( dst_match - > m_texture , sRect , dst - > m_texture , dRect , shader , false ) ;
2015-06-05 21:51:13 +00:00
}
2015-06-08 07:44:47 +00:00
}
2009-06-27 03:32:33 +00:00
2015-06-08 07:44:47 +00:00
if ( dst = = NULL )
{
2016-08-03 16:07:54 +00:00
GL_CACHE ( " TC: Lookup Target(%s) %dx%d, miss (0x%x, %s) " , to_string ( type ) , w , h , bp , psm_str ( TEX0 . PSM ) ) ;
2015-05-15 13:12:49 +00:00
2015-06-08 07:44:47 +00:00
dst = CreateTarget ( TEX0 , w , h , type ) ;
2015-06-05 21:51:13 +00:00
2015-10-25 14:24:16 +00:00
// In theory new textures contain invalidated data. Still in theory a new target
// must contains the content of the GS memory.
// In practice, TC will wrongly invalidate some RT. For example due to write on the alpha
// channel but colors is still valid. Unfortunately TC doesn't support the upload of data
// in target.
//
// Cleaning the code here will likely break several games. However it might reduce
// the noise in draw call debugging. It is the main reason to enable it on debug build.
//
// From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable.
2017-11-07 01:23:27 +00:00
bool supported_fmt = m_can_convert_depth | | psm_s . depth = = 0 ;
if ( m_preload_frame & & TEX0 . TBW > 0 & & supported_fmt ) {
GL_INS ( " Preloading the RT DATA " ) ;
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
int page0 = TEX0 . TBP0 > > 5 ;
int max_page = ( MAX_PAGES - page0 ) ;
int max_h = 32 * max_page / TEX0 . TBW ;
// h is likely smaller than w (true most of the time). Reduce the upload size (speed)
max_h = std : : min < int > ( max_h , TEX0 . TBW * 64 ) ;
dst - > m_dirty . push_back ( GSDirtyRect ( GSVector4i ( 0 , 0 , TEX0 . TBW * 64 , max_h ) , TEX0 . PSM ) ) ;
dst - > Update ( ) ;
} else {
2015-06-26 16:33:41 +00:00
# ifdef ENABLE_OGL_DEBUG
2017-11-07 01:23:27 +00:00
switch ( type ) {
case RenderTarget : m_renderer - > m_dev - > ClearRenderTarget ( dst - > m_texture , 0 ) ; break ;
case DepthStencil : m_renderer - > m_dev - > ClearDepth ( dst - > m_texture ) ; break ;
default : break ;
2015-10-25 14:24:16 +00:00
}
2017-11-07 01:23:27 +00:00
# endif
2015-10-25 14:24:16 +00:00
}
2009-05-22 01:22:52 +00:00
}
2016-10-13 09:08:47 +00:00
ScaleTexture ( dst - > m_texture ) ;
2009-06-27 03:32:33 +00:00
if ( used )
{
dst - > m_used = true ;
}
2009-06-09 10:13:28 +00:00
2009-06-27 03:32:33 +00:00
return dst ;
2009-05-22 01:22:52 +00:00
}
2015-06-26 07:25:50 +00:00
GSTextureCache : : Target * GSTextureCache : : LookupTarget ( const GIFRegTEX0 & TEX0 , int w , int h , int real_h )
2009-07-31 23:59:06 +00:00
{
uint32 bp = TEX0 . TBP0 ;
Target * dst = NULL ;
2016-03-19 11:37:25 +00:00
#if 0
// Dump the list of targets for debug
for ( auto t : m_dst [ RenderTarget ] ) {
GL_INS ( " TC: frame 0x%x -> 0x%x : %d (age %d) " , t - > m_TEX0 . TBP0 , t - > m_end_block , t - > m_texture - > GetID ( ) , t - > m_age ) ;
}
# endif
// Let's try to find a perfect frame that contains valid data
for ( auto t : m_dst [ RenderTarget ] ) {
if ( bp = = t - > m_TEX0 . TBP0 & & t - > m_end_block > bp ) {
dst = t ;
2016-08-20 09:52:22 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x %s) " , w , h , dst - > m_texture - > GetID ( ) , bp , t - > m_end_block , psm_str ( TEX0 . PSM ) ) ;
2016-03-19 11:37:25 +00:00
break ;
}
}
// 2nd try ! Try to find a frame that include the bp
if ( dst = = NULL ) {
for ( auto t : m_dst [ RenderTarget ] ) {
if ( t - > m_TEX0 . TBP0 < bp & & bp < t - > m_end_block ) {
dst = t ;
2016-08-20 09:52:22 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, inclusive hit: %d (0x%x, took 0x%x -> 0x%x %s) " , w , h , t - > m_texture - > GetID ( ) , bp , t - > m_TEX0 . TBP0 , t - > m_end_block , psm_str ( TEX0 . PSM ) ) ;
2016-03-19 11:37:25 +00:00
break ;
}
}
}
// 3rd try ! Try to find a frame that doesn't contain valid data (honestly I'm not sure we need to do it)
if ( dst = = NULL ) {
for ( auto t : m_dst [ RenderTarget ] ) {
if ( bp = = t - > m_TEX0 . TBP0 ) {
dst = t ;
2016-08-20 09:52:22 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, empty hit: %d (0x%x -> 0x%x %s) " , w , h , dst - > m_texture - > GetID ( ) , bp , t - > m_end_block , psm_str ( TEX0 . PSM ) ) ;
2016-03-19 11:37:25 +00:00
break ;
}
}
}
#if 0
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ RenderTarget ] )
2009-07-31 23:59:06 +00:00
{
if ( bp = = t - > m_TEX0 . TBP0 )
{
dst = t ;
2016-03-06 13:24:20 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, perfect hit: %d (0x%x -> 0x%x) " , w , h , dst - > m_texture - > GetID ( ) , bp , t - > m_end_block ) ;
2015-05-15 13:12:49 +00:00
2009-07-31 23:59:06 +00:00
break ;
}
2010-04-25 00:31:27 +00:00
else
2009-07-31 23:59:06 +00:00
{
// HACK: try to find something close to the base pointer
2014-05-06 07:25:00 +00:00
if ( t - > m_TEX0 . TBP0 < = bp & & bp < t - > m_TEX0 . TBP0 + 0xe00UL & & ( ! dst | | t - > m_TEX0 . TBP0 > = dst - > m_TEX0 . TBP0 ) )
2009-07-31 23:59:06 +00:00
{
2016-03-06 13:24:20 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, close hit: %d (0x%x, took 0x%x -> 0x%x) " , w , h , t - > m_texture - > GetID ( ) , bp , t - > m_TEX0 . TBP0 , t - > m_end_block ) ;
2009-07-31 23:59:06 +00:00
dst = t ;
}
}
}
2016-03-19 11:37:25 +00:00
# endif
2009-07-31 23:59:06 +00:00
if ( dst = = NULL )
{
2016-08-20 09:52:22 +00:00
GL_CACHE ( " TC: Lookup Frame %dx%d, miss (0x%x %s) " , w , h , bp , psm_str ( TEX0 . PSM ) ) ;
2015-05-15 13:12:49 +00:00
2009-07-31 23:59:06 +00:00
dst = CreateTarget ( TEX0 , w , h , RenderTarget ) ;
2016-10-13 09:08:47 +00:00
ScaleTexture ( dst - > m_texture ) ;
2016-10-08 21:41:16 +00:00
2012-01-18 11:47:31 +00:00
m_renderer - > m_dev - > ClearRenderTarget ( dst - > m_texture , 0 ) ; // new frame buffers after reset should be cleared, don't display memory garbage
2015-06-26 07:25:50 +00:00
if ( m_preload_frame ) {
// Load GS data into frame. Game can directly uploads a background or the full image in
// "CTRC" buffer. It will also avoid various black screen issue in gs dump.
//
// Code is more or less an equivalent of the SW renderer
//
// Option is hidden and not enabled by default to avoid any regression
dst - > m_dirty . push_back ( GSDirtyRect ( GSVector4i ( 0 , 0 , TEX0 . TBW * 64 , real_h ) , TEX0 . PSM ) ) ;
dst - > Update ( ) ;
}
2009-07-31 23:59:06 +00:00
}
else
{
dst - > Update ( ) ;
}
dst - > m_used = true ;
2016-09-02 19:31:13 +00:00
dst - > m_dirty_alpha = false ;
2009-07-31 23:59:06 +00:00
return dst ;
}
2015-06-05 21:37:06 +00:00
// Goal: Depth And Target at the same address is not possible. On GS it is
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
// must invalidate the Target/Depth respectively
void GSTextureCache : : InvalidateVideoMemType ( int type , uint32 bp )
{
2019-01-18 13:53:06 +00:00
if ( ! m_can_convert_depth )
2017-01-16 16:51:02 +00:00
return ;
2017-08-04 18:37:44 +00:00
auto & list = m_dst [ type ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; + + i )
2015-06-05 21:37:06 +00:00
{
Target * t = * i ;
if ( bp = = t - > m_TEX0 . TBP0 )
{
2015-06-13 06:32:11 +00:00
GL_CACHE ( " TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x) " , to_string ( type ) ,
2015-06-05 21:37:06 +00:00
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
t - > m_TEX0 . TBP0 ) ;
2017-08-04 18:37:44 +00:00
list . erase ( i ) ;
2015-06-05 21:37:06 +00:00
delete t ;
break ;
}
}
}
2014-10-24 18:49:30 +00:00
// Goal: invalidate data sent to the GPU when the source (GS memory) is modified
// Called each time you want to write to the GS memory
2015-05-15 18:40:09 +00:00
void GSTextureCache : : InvalidateVideoMem ( GSOffset * off , const GSVector4i & rect , bool target )
2009-05-22 01:22:52 +00:00
{
2015-05-15 18:40:09 +00:00
if ( ! off ) return ; // Fixme. Crashes Dual Hearts, maybe others as well. Was fine before r1549.
2010-04-25 00:31:27 +00:00
2015-05-15 18:40:09 +00:00
uint32 bp = off - > bp ;
uint32 bw = off - > bw ;
uint32 psm = off - > psm ;
2009-07-04 15:14:04 +00:00
2009-06-27 03:32:33 +00:00
if ( ! target )
2009-05-22 01:22:52 +00:00
{
2014-10-24 18:49:30 +00:00
// Remove Source that have same BP as the render target (color&dss)
// rendering will dirty the copy
2017-08-04 18:37:44 +00:00
auto & list = m_src . m_map [ bp > > 5 ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
Source * s = * i ;
+ + i ;
2009-06-27 03:32:33 +00:00
2009-07-04 15:14:04 +00:00
if ( GSUtil : : HasSharedBits ( bp , psm , s - > m_TEX0 . TBP0 , s - > m_TEX0 . PSM ) )
2009-05-22 01:22:52 +00:00
{
2009-06-27 03:32:33 +00:00
m_src . RemoveAt ( s ) ;
2009-05-22 01:22:52 +00:00
}
}
2015-05-13 18:01:25 +00:00
2015-07-11 12:34:29 +00:00
uint32 bbp = bp + bw * 0x10 ;
if ( bw > = 16 & & bbp < 16384 ) {
2015-06-03 06:40:49 +00:00
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
2017-08-04 18:37:44 +00:00
auto & list = m_src . m_map [ bbp > > 5 ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; )
2015-05-13 18:01:25 +00:00
{
2017-08-04 18:37:44 +00:00
Source * s = * i ;
+ + i ;
2015-05-13 18:01:25 +00:00
if ( GSUtil : : HasSharedBits ( bbp , psm , s - > m_TEX0 . TBP0 , s - > m_TEX0 . PSM ) )
{
m_src . RemoveAt ( s ) ;
}
}
}
2016-10-16 15:32:57 +00:00
// Haunting ground write frame buffer 0x3000 and expect to write data to 0x3380
// Note: the game only does a 0 direct write. If some games expect some real data
// we are screwed.
if ( m_renderer - > m_game . title = = CRC : : HauntingGround ) {
uint32 end_block = GSLocalMemory : : m_psm [ psm ] . bn ( rect . width ( ) , rect . height ( ) , bp , bw ) ;
auto type = RenderTarget ;
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ type ] )
2016-10-16 15:32:57 +00:00
{
if ( t - > m_TEX0 . TBP0 > bp & & t - > m_end_block < end_block ) {
2016-10-19 17:29:09 +00:00
// Haunting ground expect to clean buffer B with a rendering into buffer A.
// Situation is quite messy as it would require to extract the data from the buffer A
// and to move in buffer B.
//
// Of course buffers don't share the same line width. You can't delete the buffer as next
// miss will load invalid data.
//
// So just clear the damn buffer and forget about it.
GL_CACHE ( " TC: Clear Sub Target(%s) %d (0x%x) " , to_string ( type ) ,
2016-10-16 15:32:57 +00:00
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
t - > m_TEX0 . TBP0 ) ;
2016-10-19 17:29:09 +00:00
m_renderer - > m_dev - > ClearRenderTarget ( t - > m_texture , 0 ) ;
2016-10-16 15:32:57 +00:00
}
}
}
2009-06-27 03:32:33 +00:00
}
2011-12-23 15:53:53 +00:00
GSVector4i r ;
2012-01-06 01:20:01 +00:00
uint32 * pages = ( uint32 * ) m_temp ;
2014-10-07 17:11:43 +00:00
2015-05-15 18:40:09 +00:00
off - > GetPages ( rect , pages , & r ) ;
2011-12-23 15:53:53 +00:00
2009-07-04 15:14:04 +00:00
bool found = false ;
2012-01-05 02:40:24 +00:00
for ( const uint32 * p = pages ; * p ! = GSOffset : : EOP ; p + + )
2009-06-27 03:32:33 +00:00
{
2011-12-23 15:53:53 +00:00
uint32 page = * p ;
2009-06-27 03:32:33 +00:00
2017-08-04 18:37:44 +00:00
auto & list = m_src . m_map [ page ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
Source * s = * i ;
+ + i ;
2011-12-23 15:53:53 +00:00
if ( GSUtil : : HasSharedBits ( psm , s - > m_TEX0 . PSM ) )
2009-05-22 01:22:52 +00:00
{
2011-12-23 15:53:53 +00:00
bool b = bp = = s - > m_TEX0 . TBP0 ;
2009-05-22 01:22:52 +00:00
2011-12-23 15:53:53 +00:00
if ( ! s - > m_target )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
if ( m_disable_partial_invalidation & & s - > m_repeating )
{
2016-03-18 23:23:00 +00:00
m_src . RemoveAt ( s ) ;
2017-08-04 18:37:44 +00:00
}
else
{
uint32 * RESTRICT valid = s - > m_valid ;
2016-03-18 23:23:00 +00:00
// Invalidate data of input texture
if ( s - > m_repeating )
{
// Note: very hot path on snowbling engine game
2017-08-04 18:37:44 +00:00
for ( const GSVector2i & k : s - > m_p2t [ page ] )
2016-03-18 23:23:00 +00:00
{
2017-08-04 18:37:44 +00:00
valid [ k . x ] & = k . y ;
2016-03-18 23:23:00 +00:00
}
}
else
2009-06-27 03:32:33 +00:00
{
2016-03-18 23:23:00 +00:00
valid [ page ] = 0 ;
2009-06-27 03:32:33 +00:00
}
2011-12-23 15:53:53 +00:00
2016-03-18 23:23:00 +00:00
s - > m_complete = false ;
2011-12-23 15:53:53 +00:00
2016-03-18 23:23:00 +00:00
found | = b ;
}
2011-12-23 15:53:53 +00:00
}
else
{
2014-10-24 18:49:30 +00:00
// render target used as input texture
2011-12-23 15:53:53 +00:00
// TODO
if ( b )
{
m_src . RemoveAt ( s ) ;
}
2009-05-22 01:22:52 +00:00
}
}
}
}
2009-06-27 03:32:33 +00:00
if ( ! target ) return ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
for ( int type = 0 ; type < 2 ; type + + )
{
2017-08-04 18:37:44 +00:00
auto & list = m_dst [ type ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
auto j = i + + ;
2009-06-27 03:32:33 +00:00
Target * t = * j ;
2009-05-22 01:22:52 +00:00
2015-06-26 16:02:51 +00:00
// GH: (I think) this code is completely broken. Typical issue:
// EE write an alpha channel into 32 bits texture
// Results: the target is deleted (because HasCompatibleBits is false)
//
// Major issues are expected if the game try to reuse the target
// If we dirty the RT, it will likely upload partially invalid data.
// (The color on the previous example)
2009-07-04 15:14:04 +00:00
if ( GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) )
2009-05-22 01:22:52 +00:00
{
2009-07-04 15:14:04 +00:00
if ( ! found & & GSUtil : : HasCompatibleBits ( psm , t - > m_TEX0 . PSM ) )
2009-05-22 01:22:52 +00:00
{
2016-05-17 17:28:11 +00:00
GL_CACHE ( " TC: Dirty Target(%s) %d (0x%x) r(%d,%d,%d,%d) " , to_string ( type ) ,
2015-06-29 06:48:19 +00:00
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
2016-05-17 17:28:11 +00:00
t - > m_TEX0 . TBP0 , r . x , r . y , r . z , r . w ) ;
2009-07-04 15:14:04 +00:00
t - > m_dirty . push_back ( GSDirtyRect ( r , psm ) ) ;
t - > m_TEX0 . TBW = bw ;
2009-06-27 03:32:33 +00:00
}
else
{
2017-08-04 18:37:44 +00:00
list . erase ( j ) ;
2015-06-13 06:32:11 +00:00
GL_CACHE ( " TC: Remove Target(%s) %d (0x%x) " , to_string ( type ) ,
2015-05-16 15:32:24 +00:00
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
2015-05-16 17:28:22 +00:00
t - > m_TEX0 . TBP0 ) ;
2009-06-27 03:32:33 +00:00
delete t ;
2009-05-22 01:22:52 +00:00
continue ;
}
2015-11-02 06:36:37 +00:00
} else if ( bp = = t - > m_TEX0 . TBP0 ) {
2015-10-24 14:19:21 +00:00
// EE writes the ALPHA channel. Mark it as invalid for
// the texture cache. Otherwise it will generate a wrong
// hit on the texture cache.
// Game: Conflict - Desert Storm (flickering)
t - > m_dirty_alpha = false ;
2009-05-22 01:22:52 +00:00
}
2015-06-26 16:02:51 +00:00
// GH: Try to detect texture write that will overlap with a target buffer
2016-03-06 13:25:14 +00:00
if ( GSUtil : : HasSharedBits ( psm , t - > m_TEX0 . PSM ) ) {
if ( bp < t - > m_TEX0 . TBP0 )
{
uint32 rowsize = bw * 8192 ;
uint32 offset = ( uint32 ) ( ( t - > m_TEX0 . TBP0 - bp ) * 256 ) ;
if ( rowsize > 0 & & offset % rowsize = = 0 )
{
int y = GSLocalMemory : : m_psm [ psm ] . pgs . y * offset / rowsize ;
if ( r . bottom > y )
{
GL_CACHE ( " TC: Dirty After Target(%s) %d (0x%x) " , to_string ( type ) ,
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
t - > m_TEX0 . TBP0 ) ;
// TODO: do not add this rect above too
t - > m_dirty . push_back ( GSDirtyRect ( GSVector4i ( r . left , r . top - y , r . right , r . bottom - y ) , psm ) ) ;
t - > m_TEX0 . TBW = bw ;
continue ;
}
}
}
2009-05-22 01:22:52 +00:00
2016-03-06 13:25:14 +00:00
// FIXME: this code "fixes" black FMV issue with rule of rose.
# if 1
// Greg: I'm not sure the 'bw' equality is required but it won't hurt too much
2016-08-18 06:33:18 +00:00
//
// Ben 10 Alien Force : Vilgax Attacks uses a small temporary target for multiple textures (different bw)
// It is too complex to handle, and purpose of the code was to handle FMV (large bw). So let's skip small
// (128 pixels) target
if ( bw > 2 & & t - > m_TEX0 . TBW = = bw & & t - > Inside ( bp , bw , psm , rect ) & & GSUtil : : HasCompatibleBits ( psm , t - > m_TEX0 . PSM ) ) {
2016-03-06 13:25:14 +00:00
uint32 rowsize = bw * 8192u ;
uint32 offset = ( uint32 ) ( ( bp - t - > m_TEX0 . TBP0 ) * 256 ) ;
if ( rowsize > 0 & & offset % rowsize = = 0 ) {
int y = GSLocalMemory : : m_psm [ psm ] . pgs . y * offset / rowsize ;
2016-08-20 09:52:22 +00:00
GL_CACHE ( " TC: Dirty in the middle of Target(%s) %d (0x%x->0x%x) pos(%d,%d => %d,%d) bw:%u " , to_string ( type ) ,
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
t - > m_TEX0 . TBP0 , t - > m_end_block ,
r . left , r . top + y , r . right , r . bottom + y , bw ) ;
2016-03-06 13:25:14 +00:00
t - > m_dirty . push_back ( GSDirtyRect ( GSVector4i ( r . left , r . top + y , r . right , r . bottom + y ) , psm ) ) ;
2009-07-04 15:14:04 +00:00
t - > m_TEX0 . TBW = bw ;
2009-06-27 03:32:33 +00:00
continue ;
}
2009-05-22 01:22:52 +00:00
}
2015-07-10 20:33:30 +00:00
# endif
2016-03-06 13:25:14 +00:00
}
2009-05-22 01:22:52 +00:00
}
}
}
2014-10-24 18:49:30 +00:00
// Goal: retrive the data from the GPU to the GS memory.
// Called each time you want to read from the GS memory
2015-05-15 18:40:09 +00:00
void GSTextureCache : : InvalidateLocalMem ( GSOffset * off , const GSVector4i & r )
2009-05-22 01:22:52 +00:00
{
2015-05-15 18:40:09 +00:00
uint32 bp = off - > bp ;
uint32 psm = off - > psm ;
//uint32 bw = off->bw;
2010-02-26 17:03:58 +00:00
// No depth handling please.
2016-04-03 09:17:02 +00:00
if ( psm = = PSM_PSMZ32 | | psm = = PSM_PSMZ24 | | psm = = PSM_PSMZ16 | | psm = = PSM_PSMZ16S ) {
2016-04-11 14:02:57 +00:00
GL_INS ( " ERROR: InvalidateLocalMem depth format isn't supported (%d,%d to %d,%d) " , r . x , r . y , r . z , r . w ) ;
2016-04-03 10:30:14 +00:00
if ( m_can_convert_depth ) {
for ( auto t : m_dst [ DepthStencil ] ) {
if ( GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) ) {
2016-07-29 10:13:26 +00:00
if ( GSUtil : : HasCompatibleBits ( psm , t - > m_TEX0 . PSM ) )
Read ( t , r . rintersect ( t - > m_valid ) ) ;
2016-04-03 10:30:14 +00:00
}
}
}
2010-02-26 17:03:58 +00:00
return ;
2016-04-03 09:17:02 +00:00
}
2009-07-04 15:14:04 +00:00
2011-05-09 22:14:35 +00:00
// This is a shorter but potentially slower version of the below, commented out code.
// It works for all the games mentioned below and fixes a couple of other ones as well
2011-05-10 16:02:52 +00:00
// (Busen0: Wizardry and Chaos Legion).
2011-05-09 22:14:35 +00:00
// Also in a few games the below code ran the Grandia3 case when it shouldn't :p
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ RenderTarget ] )
2009-05-22 01:22:52 +00:00
{
2010-02-26 17:03:58 +00:00
if ( t - > m_TEX0 . PSM ! = PSM_PSMZ32 & & t - > m_TEX0 . PSM ! = PSM_PSMZ24 & & t - > m_TEX0 . PSM ! = PSM_PSMZ16 & & t - > m_TEX0 . PSM ! = PSM_PSMZ16S )
2009-05-22 01:22:52 +00:00
{
2010-02-26 17:03:58 +00:00
if ( GSUtil : : HasSharedBits ( bp , psm , t - > m_TEX0 . TBP0 , t - > m_TEX0 . PSM ) )
2009-05-22 01:22:52 +00:00
{
2014-10-24 18:49:30 +00:00
// GH Note: Read will do a StretchRect and then will sizzle data to the GS memory
// t->m_valid will do the full target texture whereas r.intersect(t->m_valid) will be limited
// to the useful part for the transfer.
// 1/ Logically intersect must be enough, except if we miss some call to InvalidateLocalMem
// or it need the depth part too
// 2/ Read function is slow but I suspect the swizzle part to be costly. Maybe a compute shader
// that do the swizzle at the same time of the Stretching could save CPU computation.
2011-05-09 22:14:35 +00:00
// note: r.rintersect breaks Wizardry and Chaos Legion
2011-05-10 16:02:52 +00:00
// Read(t, t->m_valid) works in all tested games but is very slow in GUST titles ><
2016-03-20 17:05:36 +00:00
if ( GSTextureCache : : m_disable_partial_invalidation ) {
2011-05-10 16:02:52 +00:00
Read ( t , r . rintersect ( t - > m_valid ) ) ;
2016-03-20 17:05:36 +00:00
} else {
if ( r . x = = 0 & & r . y = = 0 ) // Full screen read?
Read ( t , t - > m_valid ) ;
else // Block level read?
Read ( t , r . rintersect ( t - > m_valid ) ) ;
}
2009-05-22 01:22:52 +00:00
}
2016-04-03 09:17:02 +00:00
} else {
GL_INS ( " ERROR: InvalidateLocalMem target is a depth format " ) ;
2009-05-22 01:22:52 +00:00
}
}
2014-10-07 17:11:43 +00:00
2011-05-09 22:14:35 +00:00
//GSTextureCache::Target* rt2 = NULL;
//int ymin = INT_MAX;
2017-05-26 16:07:00 +00:00
//for(auto i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
2011-05-09 22:14:35 +00:00
//{
2017-05-26 16:07:00 +00:00
// auto j = i++;
2011-05-09 22:14:35 +00:00
// Target* t = *j;
// if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
// {
// if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
// {
// if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
// {
// Read(t, r.rintersect(t->m_valid));
// return;
// }
// else if(psm == PSM_PSMCT32 && (t->m_TEX0.PSM == PSM_PSMCT16 || t->m_TEX0.PSM == PSM_PSMCT16S))
// {
// // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit
// Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid));
// return;
// }
// else
// {
// if (psm == PSM_PSMT4HH && t->m_TEX0.PSM == PSM_PSMCT32)
// {
// // Silent Hill Origins shadows: Read 8 bit using only the HIGH bits (4 bit) texture as 32 bit.
// Read(t, r.rintersect(t->m_valid));
// return;
// }
// else
// {
// //printf("Trashing render target. We have a %d type texture and we are trying to write into a %d type texture\n", t->m_TEX0.PSM, psm);
// m_dst[RenderTarget].erase(j);
// delete t;
// }
// }
// }
// // Grandia3, FFX, FFX-2 pause menus. t->m_TEX0.TBP0 magic number checks because otherwise kills xs2 videos
// if( (GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && (bp > t->m_TEX0.TBP0) )
// && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584) ))
// {
// //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0);
// uint32 rowsize = bw * 8192;
// uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
// if(rowsize > 0 && offset % rowsize == 0)
// {
// int y = GSLocalMemory::m_psm[psm].pgs.y * offset / rowsize;
// if(y < ymin && y < 512)
// {
// rt2 = t;
// ymin = y;
// }
// }
// }
// }
//}
//if(rt2)
//{
// Read(rt2, GSVector4i(r.left, r.top + ymin, r.right, r.bottom + ymin));
//}
2014-10-07 17:11:43 +00:00
2009-05-22 01:22:52 +00:00
// TODO: ds
}
2009-11-06 17:13:36 +00:00
2016-03-19 10:39:56 +00:00
// Hack: remove Target that are strictly included in current rt. Typically uses for FMV
// For example, game is rendered at 0x800->0x1000, fmv will be uploaded to 0x0->0x2800
// FIXME In theory, we ought to report the data from the sub rt to the main rt. But let's
// postpone it for later.
void GSTextureCache : : InvalidateVideoMemSubTarget ( GSTextureCache : : Target * rt )
{
if ( ! rt )
return ;
2017-08-04 18:37:44 +00:00
auto & list = m_dst [ RenderTarget ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; ) {
Target * t = * i ;
2016-03-19 10:39:56 +00:00
2017-08-04 18:37:44 +00:00
if ( ( t - > m_TEX0 . TBP0 > rt - > m_TEX0 . TBP0 ) & & ( t - > m_end_block < rt - > m_end_block ) & & ( t - > m_TEX0 . TBW = = rt - > m_TEX0 . TBW )
2016-03-19 10:39:56 +00:00
& & ( t - > m_TEX0 . TBP0 < t - > m_end_block ) ) {
GL_INS ( " InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x " ,
rt - > m_TEX0 . TBP0 , rt - > m_end_block , t - > m_TEX0 . TBP0 , t - > m_end_block ) ;
2017-08-04 18:37:44 +00:00
i = list . erase ( i ) ;
2016-03-19 10:39:56 +00:00
delete t ;
2017-08-04 18:37:44 +00:00
} else {
+ + i ;
2016-03-19 10:39:56 +00:00
}
}
}
2009-05-22 01:22:52 +00:00
void GSTextureCache : : IncAge ( )
{
2009-06-27 03:32:33 +00:00
int maxage = m_src . m_used ? 3 : 30 ;
2014-10-24 18:49:30 +00:00
// You can't use m_map[page] because Source* are duplicated on several pages.
2017-08-04 18:37:44 +00:00
for ( auto i = m_src . m_surfaces . begin ( ) ; i ! = m_src . m_surfaces . end ( ) ; )
2009-06-27 03:32:33 +00:00
{
2017-08-04 18:37:44 +00:00
Source * s = * i ;
2009-06-27 03:32:33 +00:00
2016-04-21 17:08:41 +00:00
if ( s - > m_shared_texture ) {
// Shared textures are temporary only added in the hash set but not in the texture
// cache list therefore you can't use RemoveAt
2017-08-04 18:37:44 +00:00
i = m_src . m_surfaces . erase ( i ) ;
2016-04-21 17:08:41 +00:00
delete s ;
2017-08-04 18:37:44 +00:00
} else {
+ + i ;
if ( + + s - > m_age > maxage ) {
m_src . RemoveAt ( s ) ;
}
2009-06-27 03:32:33 +00:00
}
}
m_src . m_used = false ;
2009-11-10 11:38:32 +00:00
// Clearing of Rendertargets causes flickering in many scene transitions.
// Sigh, this seems to be used to invalidate surfaces. So set a huge maxage to avoid flicker,
// but still invalidate surfaces. (Disgaea 2 fmv when booting the game through the BIOS)
// Original maxage was 4 here, Xenosaga 2 needs at least 240, else it flickers on scene transitions.
maxage = 400 ; // ffx intro scene changes leave the old image untouched for a couple of frames and only then start using it
2009-06-27 03:32:33 +00:00
2009-11-10 11:38:32 +00:00
for ( int type = 0 ; type < 2 ; type + + )
{
2017-08-04 18:37:44 +00:00
auto & list = m_dst [ type ] ;
for ( auto i = list . begin ( ) ; i ! = list . end ( ) ; )
2009-11-10 11:38:32 +00:00
{
2017-08-04 18:37:44 +00:00
Target * t = * i ;
2009-06-27 03:32:33 +00:00
2015-07-09 21:03:55 +00:00
// This variable is used to detect the texture shuffle effect. There is a high
// probability that game will do it on the current RT.
// Variable is cleared here to avoid issue with game that uses a 16 bits
// render target
2015-07-26 08:12:49 +00:00
if ( t - > m_age > 0 ) {
// GoW2 uses the effect at the start of the frame
t - > m_32_bits_fmt = false ;
}
2015-07-09 21:03:55 +00:00
2009-11-10 11:38:32 +00:00
if ( + + t - > m_age > maxage )
{
2017-08-04 18:37:44 +00:00
i = list . erase ( i ) ;
2015-06-13 06:32:11 +00:00
GL_CACHE ( " TC: Remove Target(%s): %d (0x%x) due to age " , to_string ( type ) ,
2015-05-16 15:32:24 +00:00
t - > m_texture ? t - > m_texture - > GetID ( ) : 0 ,
2015-05-16 17:28:22 +00:00
t - > m_TEX0 . TBP0 ) ;
2009-06-27 03:32:33 +00:00
2009-11-10 11:38:32 +00:00
delete t ;
2017-08-04 18:37:44 +00:00
} else {
+ + i ;
2009-11-10 11:38:32 +00:00
}
}
}
2009-05-22 01:22:52 +00:00
}
2009-11-09 00:32:52 +00:00
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
2016-10-06 23:43:04 +00:00
GSTextureCache : : Source * GSTextureCache : : CreateSource ( const GIFRegTEX0 & TEX0 , const GIFRegTEXA & TEXA , Target * dst , bool half_right , int x_offset , int y_offset )
2009-05-22 01:22:52 +00:00
{
2012-06-11 00:00:18 +00:00
const GSLocalMemory : : psm_t & psm = GSLocalMemory : : m_psm [ TEX0 . PSM ] ;
2011-04-25 18:18:21 +00:00
Source * src = new Source ( m_renderer , TEX0 , TEXA , m_temp ) ;
2009-05-22 01:22:52 +00:00
2009-07-31 23:59:06 +00:00
int tw = 1 < < TEX0 . TW ;
int th = 1 < < TEX0 . TH ;
2014-04-14 18:25:02 +00:00
//int tp = TEX0.TBW << 6;
2010-04-25 00:31:27 +00:00
2010-03-14 19:35:10 +00:00
bool hack = false ;
2009-06-27 03:32:33 +00:00
2014-10-07 17:11:43 +00:00
if ( m_spritehack & & ( TEX0 . PSM = = PSM_PSMT8 | | TEX0 . PSM = = PSM_PSMT8H ) )
2012-06-19 01:12:01 +00:00
{
src - > m_spritehack_t = true ;
2014-10-07 17:11:43 +00:00
if ( m_spritehack = = 2 & & TEX0 . CPSM ! = PSM_PSMCT16 )
src - > m_spritehack_t = false ;
}
2012-06-19 01:12:01 +00:00
else
src - > m_spritehack_t = false ;
2016-10-06 23:43:04 +00:00
if ( dst & & ( x_offset ! = 0 | | y_offset ! = 0 ) )
{
GSVector2 scale = dst - > m_texture - > GetScale ( ) ;
int x = ( int ) ( scale . x * x_offset ) ;
int y = ( int ) ( scale . y * y_offset ) ;
int w = ( int ) ( scale . x * tw ) ;
int h = ( int ) ( scale . y * th ) ;
GSTexture * sTex = dst - > m_texture ;
2019-01-14 21:28:23 +00:00
GSTexture * dTex = m_renderer - > m_dev - > CreateRenderTarget ( w , h ) ;
2016-10-06 23:43:04 +00:00
GSVector4i area ( x , y , x + w , y + h ) ;
m_renderer - > m_dev - > CopyRect ( sTex , dTex , area ) ;
// Keep a trace of origin of the texture
src - > m_texture = dTex ;
src - > m_target = true ;
src - > m_from_target = dst - > m_texture ;
src - > m_texture - > SetScale ( scale ) ;
2018-12-10 20:57:03 +00:00
if ( psm . pal > 0 ) {
// Attach palette for GPU texture conversion
AttachPaletteToSource ( src , psm . pal , true ) ;
}
2016-10-06 23:43:04 +00:00
}
else if ( dst )
2009-07-31 23:59:06 +00:00
{
// TODO: clean up this mess
2009-05-22 01:22:52 +00:00
2015-09-08 14:36:23 +00:00
int shader = dst - > m_type ! = RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY ;
2018-12-19 23:54:51 +00:00
bool is_8bits = TEX0 . PSM = = PSM_PSMT8 ;
2015-06-27 09:24:16 +00:00
2015-06-29 17:17:46 +00:00
if ( is_8bits ) {
2015-06-27 09:24:16 +00:00
GL_INS ( " Reading RT as a packed-indexed 8 bits format " ) ;
2015-09-08 14:36:23 +00:00
shader = ShaderConvert_RGBA_TO_8I ;
2015-06-27 09:24:16 +00:00
}
2015-06-20 14:19:02 +00:00
# ifdef ENABLE_OGL_DEBUG
2015-06-27 09:24:16 +00:00
if ( TEX0 . PSM = = PSM_PSMT4 ) {
GL_INS ( " ERROR: Reading RT as a packed-indexed 4 bits format is not supported " ) ;
2015-06-20 14:19:02 +00:00
}
# endif
2016-04-22 17:46:44 +00:00
if ( GSLocalMemory : : m_psm [ TEX0 . PSM ] . bpp > 8 ) {
2015-07-03 21:04:48 +00:00
src - > m_32_bits_fmt = dst - > m_32_bits_fmt ;
2015-07-09 21:03:55 +00:00
}
2016-04-29 15:31:09 +00:00
// Keep a trace of origin of the texture
2009-07-31 23:59:06 +00:00
src - > m_target = true ;
2016-04-29 15:31:09 +00:00
src - > m_from_target = dst - > m_texture ;
2009-06-28 02:02:14 +00:00
2012-06-17 14:33:34 +00:00
dst - > Update ( ) ;
2009-07-31 23:59:06 +00:00
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
2009-07-06 16:35:06 +00:00
2009-08-02 23:07:30 +00:00
int w = ( int ) ( dst - > m_texture - > GetScale ( ) . x * tw ) ;
2010-04-25 00:31:27 +00:00
int h = ( int ) ( dst - > m_texture - > GetScale ( ) . y * th ) ;
2015-06-29 17:17:46 +00:00
if ( is_8bits ) {
// Unscale 8 bits textures, quality won't be nice but format is really awful
w = tw ;
h = th ;
}
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
GSVector2i dstsize = dst - > m_texture - > GetSize ( ) ;
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
// pitch conversion
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
if ( dst - > m_TEX0 . TBW ! = TEX0 . TBW ) // && dst->m_TEX0.PSM == TEX0.PSM
{
2010-02-20 02:50:58 +00:00
// This is so broken :p
////Better not do the code below, "fixes" like every game that ever gets here..
////Edit: Ratchet and Clank needs this to show most of it's graphics at all.
////Someone else fix this please, I can't :p
////delete src; return NULL;
2009-10-22 01:52:41 +00:00
2010-02-20 02:50:58 +00:00
//// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y, false);
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//GSVector4 size = GSVector4(dstsize).xyxy();
//GSVector4 scale = GSVector4(dst->m_texture->GetScale()).xyxy();
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//int blockWidth = 64;
//int blockHeight = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//GSVector4i br(0, 0, blockWidth, blockHeight);
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//int sw = (int)dst->m_TEX0.TBW << 6;
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//int dw = (int)TEX0.TBW << 6;
//int dh = 1 << TEX0.TH;
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
//if(sw != 0)
//for(int dy = 0; dy < dh; dy += blockHeight)
//{
// for(int dx = 0; dx < dw; dx += blockWidth)
// {
2015-05-15 18:40:09 +00:00
// int off = dy * dw / blockHeight + dx;
2009-07-06 16:35:06 +00:00
2015-05-15 18:40:09 +00:00
// int sx = off % sw;
// int sy = off / sw;
2009-07-06 16:35:06 +00:00
2015-05-15 18:47:14 +00:00
// GSVector4 sRect = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
2015-05-15 18:49:25 +00:00
// GSVector4 dRect = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
2009-07-06 16:35:06 +00:00
2015-05-15 18:49:25 +00:00
// m_renderer->m_dev->StretchRect(dst->m_texture, sRect, src->m_texture, dRect);
2009-07-06 16:35:06 +00:00
2010-02-20 02:50:58 +00:00
// // TODO: this is quite a lot of StretchRect, do it with one Draw
// }
//}
2009-07-31 23:59:06 +00:00
}
2010-04-06 21:14:15 +00:00
else if ( tw < 1024 )
2009-10-24 18:39:36 +00:00
{
// FIXME: timesplitters blurs the render target by blending itself over a couple of times
2011-04-15 17:12:22 +00:00
hack = true ;
2011-04-15 15:42:28 +00:00
//if(tw == 256 && th == 128 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00))
//{
// delete src;
// return NULL;
//}
2009-10-24 18:39:36 +00:00
}
2009-07-31 23:59:06 +00:00
// width/height conversion
2009-07-06 16:35:06 +00:00
2009-08-02 23:07:30 +00:00
GSVector2 scale = dst - > m_texture - > GetScale ( ) ;
2009-07-06 16:35:06 +00:00
2015-05-15 18:49:25 +00:00
GSVector4 dRect ( 0 , 0 , w , h ) ;
2009-07-06 16:35:06 +00:00
2015-06-27 14:39:44 +00:00
// Lengthy explanation of the rescaling code.
// Here an example in 2x:
// RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling)
//
// PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels)
// So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is
// too small.
//
// So we will only limit the resize to the available data in RT.
// Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture
// size (which is 1280x1024) (i.e. 800x512)
// From the rendering point of view. UV coordinate will be normalized on the real GS texture size
// This way it can be used on an upscaled texture without extra scaling factor (only requirement is
// to have same proportion)
//
// FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0)
// At 2x it will become 0.5/128 * 256 = 1 (pixel 1)
2015-06-30 21:21:31 +00:00
// I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less
// than ideal.
// 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong
// 2/ It doesn't support rescaling of the RT (tw = 1024)
// Maybe it will be more easy to just round the UV value in the Vertex Shader
2015-06-27 14:39:44 +00:00
2015-06-29 17:17:46 +00:00
if ( ! is_8bits ) {
// 8 bits handling is special due to unscaling. It is better to not execute this code
if ( w > dstsize . x )
{
scale . x = ( float ) dstsize . x / tw ;
dRect . z = ( float ) dstsize . x * scale . x / dst - > m_texture - > GetScale ( ) . x ;
w = dstsize . x ;
}
2010-04-25 00:31:27 +00:00
2015-06-29 17:17:46 +00:00
if ( h > dstsize . y )
{
scale . y = ( float ) dstsize . y / th ;
dRect . w = ( float ) dstsize . y * scale . y / dst - > m_texture - > GetScale ( ) . y ;
h = dstsize . y ;
}
2009-07-31 23:59:06 +00:00
}
2009-12-20 02:50:34 +00:00
2015-05-15 18:47:14 +00:00
GSVector4 sRect ( 0 , 0 , w , h ) ;
2011-02-19 03:36:30 +00:00
2015-05-15 18:45:31 +00:00
GSTexture * sTex = src - > m_texture ? src - > m_texture : dst - > m_texture ;
2019-01-14 21:28:23 +00:00
GSTexture * dTex = m_renderer - > m_dev - > CreateRenderTarget ( w , h ) ;
2015-06-27 09:24:16 +00:00
2014-10-24 18:49:30 +00:00
// GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format
// However it is different here. We want to reuse a Render Target as a texture.
// Because the texture is already on the GPU, CPU can't convert it.
2015-05-25 07:46:51 +00:00
if ( psm . pal > 0 ) {
2018-12-10 20:56:29 +00:00
AttachPaletteToSource ( src , psm . pal , true ) ;
2015-05-25 07:46:51 +00:00
}
2015-06-25 06:25:31 +00:00
// Disable linear filtering for various GS post-processing effect
// 1/ Palette is used to interpret the alpha channel of the RT as an index.
// Star Ocean 3 uses it to emulate a stencil buffer.
// 2/ Z formats are a bad idea to interpolate (discontinuties).
// 3/ 16 bits buffer is used to move data from a channel to another.
//
// I keep linear filtering for standard color even if I'm not sure that it is
// working correctly.
// Indeed, texture is reduced so you need to read all covered pixels (9 in 3x)
// to correctly interpolate the value. Linear interpolation is likely acceptable
// only in 2x scaling
//
// Src texture will still be bilinear interpolated so I'm really not sure
// that we need to do it here too.
//
// Future note: instead to do
// RT 2048x2048 -> T 1024x1024 -> RT 2048x2048
// We can maybe sample directly a bigger texture
// RT 2048x2048 -> T 2048x2048 -> RT 2048x2048
// Pro: better quality. Copy instead of StretchRect (must be faster)
// Cons: consume more memory
//
// In distant future: investigate to reuse the RT directly without any
// copy. Likely a speed boost and memory usage reduction.
bool linear = ( TEX0 . PSM = = PSM_PSMCT32 | | TEX0 . PSM = = PSM_PSMCT24 ) ;
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
if ( ! src - > m_texture )
2009-07-06 16:35:06 +00:00
{
2015-05-15 18:43:57 +00:00
src - > m_texture = dTex ;
2009-07-31 23:59:06 +00:00
}
2009-07-06 16:35:06 +00:00
2015-06-27 09:24:16 +00:00
if ( ( sRect = = dRect ) . alltrue ( ) & & ! shader )
2009-07-31 23:59:06 +00:00
{
2015-06-18 19:17:43 +00:00
if ( half_right ) {
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT
// which is arbitrary set to 1280 (biggest RT used by GS). h/w are based on the input texture
// so the only reliable way to find the real size of the target is to use the TBW value.
2015-06-18 21:02:40 +00:00
float real_width = dst - > m_TEX0 . TBW * 64u * dst - > m_texture - > GetScale ( ) . x ;
2016-04-14 10:00:58 +00:00
m_renderer - > m_dev - > CopyRect ( sTex , dTex , GSVector4i ( ( int ) ( real_width / 2.0f ) , 0 , ( int ) real_width , h ) ) ;
2015-06-18 19:17:43 +00:00
} else {
2015-05-13 18:03:04 +00:00
m_renderer - > m_dev - > CopyRect ( sTex , dTex , GSVector4i ( 0 , 0 , w , h ) ) ; // <= likely wrong dstsize.x could be bigger than w
2015-06-18 19:17:43 +00:00
}
2009-07-31 23:59:06 +00:00
}
else
{
2015-06-05 21:37:06 +00:00
// Different size or not the same format
2015-05-15 18:47:14 +00:00
sRect . z / = sTex - > GetWidth ( ) ;
sRect . w / = sTex - > GetHeight ( ) ;
2009-07-06 16:35:06 +00:00
2015-05-13 18:03:04 +00:00
if ( half_right ) {
sRect . x = sRect . z / 2.0f ;
}
2015-06-05 21:50:38 +00:00
m_renderer - > m_dev - > StretchRect ( sTex , sRect , dTex , dRect , shader , linear ) ;
2009-07-31 23:59:06 +00:00
}
2009-07-06 16:35:06 +00:00
2015-05-15 18:43:57 +00:00
if ( dTex ! = src - > m_texture )
2009-07-31 23:59:06 +00:00
{
m_renderer - > m_dev - > Recycle ( src - > m_texture ) ;
2009-07-06 16:35:06 +00:00
2015-05-15 18:43:57 +00:00
src - > m_texture = dTex ;
2009-07-06 16:35:06 +00:00
}
2011-09-01 13:25:08 +00:00
if ( src - > m_texture )
src - > m_texture - > SetScale ( scale ) ;
else
ASSERT ( 0 ) ;
2009-07-31 23:59:06 +00:00
2010-05-15 04:51:15 +00:00
// Offset hack. Can be enabled via GSdx options.
// The offset will be used in Draw().
2011-02-19 03:36:30 +00:00
2010-05-15 04:51:15 +00:00
float modx = 0.0f ;
float mody = 0.0f ;
2011-02-19 03:36:30 +00:00
if ( UserHacks_HalfPixelOffset & & hack )
2010-05-15 04:51:15 +00:00
{
2011-02-19 03:36:30 +00:00
switch ( m_renderer - > GetUpscaleMultiplier ( ) )
2010-05-15 04:51:15 +00:00
{
2016-07-27 15:23:17 +00:00
case 0 : //Custom Resolution
{
const float offset = 0.2f ;
modx = dst - > m_texture - > GetScale ( ) . x + offset ;
mody = dst - > m_texture - > GetScale ( ) . y + offset ;
dst - > m_texture - > LikelyOffset = true ;
break ;
}
2010-05-15 04:51:15 +00:00
case 2 : modx = 2.2f ; mody = 2.2f ; dst - > m_texture - > LikelyOffset = true ; break ;
case 3 : modx = 3.1f ; mody = 3.1f ; dst - > m_texture - > LikelyOffset = true ; break ;
case 4 : modx = 4.2f ; mody = 4.2f ; dst - > m_texture - > LikelyOffset = true ; break ;
case 5 : modx = 5.3f ; mody = 5.3f ; dst - > m_texture - > LikelyOffset = true ; break ;
case 6 : modx = 6.2f ; mody = 6.2f ; dst - > m_texture - > LikelyOffset = true ; break ;
2015-09-08 15:25:58 +00:00
case 8 : modx = 8.2f ; mody = 8.2f ; dst - > m_texture - > LikelyOffset = true ; break ;
2010-05-15 04:51:15 +00:00
default : modx = 0.0f ; mody = 0.0f ; dst - > m_texture - > LikelyOffset = false ; break ;
}
}
2011-02-19 03:36:30 +00:00
2010-05-15 04:51:15 +00:00
dst - > m_texture - > OffsetHack_modx = modx ;
dst - > m_texture - > OffsetHack_mody = mody ;
2009-07-06 16:35:06 +00:00
}
2012-06-11 10:57:32 +00:00
else
{
if ( m_paltex & & psm . pal > 0 )
{
src - > m_texture = m_renderer - > m_dev - > CreateTexture ( tw , th , Get8bitFormat ( ) ) ;
2018-12-10 20:56:29 +00:00
AttachPaletteToSource ( src , psm . pal , true ) ;
2012-06-11 10:57:32 +00:00
}
2018-11-04 21:06:24 +00:00
else {
2012-06-11 10:57:32 +00:00
src - > m_texture = m_renderer - > m_dev - > CreateTexture ( tw , th ) ;
2018-12-10 20:56:29 +00:00
if ( psm . pal > 0 ) {
AttachPaletteToSource ( src , psm . pal , false ) ;
}
2018-11-04 21:06:24 +00:00
}
2012-06-11 10:57:32 +00:00
}
2009-07-06 16:35:06 +00:00
2016-10-13 18:44:59 +00:00
ASSERT ( src - > m_texture ) ;
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
m_src . Add ( src , TEX0 , m_renderer - > m_context - > offset . tex ) ;
return src ;
}
2009-11-09 00:32:52 +00:00
2009-07-31 23:59:06 +00:00
GSTextureCache : : Target * GSTextureCache : : CreateTarget ( const GIFRegTEX0 & TEX0 , int w , int h , int type )
{
2016-10-13 18:44:59 +00:00
ASSERT ( type = = RenderTarget | | type = = DepthStencil ) ;
2019-01-18 13:53:06 +00:00
Target * t = new Target ( m_renderer , TEX0 , m_temp , m_can_convert_depth ) ;
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
// FIXME: initial data should be unswizzled from local mem in Update() if dirty
t - > m_type = type ;
if ( type = = RenderTarget )
2009-07-06 16:35:06 +00:00
{
2019-01-14 21:28:23 +00:00
t - > m_texture = m_renderer - > m_dev - > CreateRenderTarget ( w , h ) ;
2009-07-31 23:59:06 +00:00
t - > m_used = true ; // FIXME
2009-07-06 16:35:06 +00:00
}
2009-07-31 23:59:06 +00:00
else if ( type = = DepthStencil )
2009-07-06 16:35:06 +00:00
{
2019-01-14 21:28:23 +00:00
t - > m_texture = m_renderer - > m_dev - > CreateDepthStencil ( w , h ) ;
2009-07-06 16:35:06 +00:00
}
2009-07-31 23:59:06 +00:00
m_dst [ type ] . push_front ( t ) ;
2009-07-06 16:35:06 +00:00
2009-07-31 23:59:06 +00:00
return t ;
}
2015-07-10 19:11:14 +00:00
void GSTextureCache : : PrintMemoryUsage ( )
{
# ifdef ENABLE_OGL_DEBUG
uint32 tex = 0 ;
uint32 tex_rt = 0 ;
uint32 rt = 0 ;
uint32 dss = 0 ;
2017-08-04 18:37:44 +00:00
for ( auto s : m_src . m_surfaces ) {
if ( s & & ! s - > m_shared_texture ) {
if ( s - > m_target )
2015-07-10 19:11:14 +00:00
tex_rt + = s - > m_texture - > GetMemUsage ( ) ;
else
tex + = s - > m_texture - > GetMemUsage ( ) ;
}
}
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ RenderTarget ] ) {
if ( t )
2015-07-10 19:11:14 +00:00
rt + = t - > m_texture - > GetMemUsage ( ) ;
}
2017-08-04 18:37:44 +00:00
for ( auto t : m_dst [ DepthStencil ] ) {
if ( t )
2015-07-10 19:11:14 +00:00
dss + = t - > m_texture - > GetMemUsage ( ) ;
}
GL_PERF ( " MEM: RO Tex %dMB. RW Tex %dMB. Target %dMB. Depth %dMB " , tex > > 20u , tex_rt > > 20u , rt > > 20u , dss > > 20u ) ;
# endif
}
2009-07-31 23:59:06 +00:00
// GSTextureCache::Surface
2009-07-06 16:35:06 +00:00
2011-02-20 23:53:00 +00:00
GSTextureCache : : Surface : : Surface ( GSRenderer * r , uint8 * temp )
2009-07-31 23:59:06 +00:00
: m_renderer ( r )
, m_texture ( NULL )
, m_age ( 0 )
2013-06-28 17:32:37 +00:00
, m_temp ( temp )
2015-06-17 18:02:03 +00:00
, m_32_bits_fmt ( false )
2016-04-21 17:08:41 +00:00
, m_shared_texture ( false )
2009-07-31 23:59:06 +00:00
{
2011-02-19 09:05:15 +00:00
m_TEX0 . TBP0 = 0x3fff ;
2009-07-31 23:59:06 +00:00
}
GSTextureCache : : Surface : : ~ Surface ( )
{
2016-04-21 17:08:41 +00:00
// Shared textures are pointers copy. Therefore no allocation
// to recycle.
if ( ! m_shared_texture )
m_renderer - > m_dev - > Recycle ( m_texture ) ;
2009-07-31 23:59:06 +00:00
}
2016-09-30 19:36:52 +00:00
void GSTextureCache : : Surface : : UpdateAge ( )
2009-07-31 23:59:06 +00:00
{
m_age = 0 ;
}
// GSTextureCache::Source
2016-04-24 20:30:56 +00:00
GSTextureCache : : Source : : Source ( GSRenderer * r , const GIFRegTEX0 & TEX0 , const GIFRegTEXA & TEXA , uint8 * temp , bool dummy_container )
2011-02-20 23:53:00 +00:00
: Surface ( r , temp )
2018-11-04 21:06:24 +00:00
, m_palette_obj ( nullptr )
2018-12-10 20:49:40 +00:00
, m_palette ( nullptr )
2009-07-31 23:59:06 +00:00
, m_target ( false )
, m_complete ( false )
2015-09-11 10:19:49 +00:00
, m_spritehack_t ( false )
2011-04-26 00:56:54 +00:00
, m_p2t ( NULL )
2016-04-29 15:31:09 +00:00
, m_from_target ( NULL )
2009-07-31 23:59:06 +00:00
{
2011-04-25 18:18:21 +00:00
m_TEX0 = TEX0 ;
m_TEXA = TEXA ;
2016-04-24 20:30:56 +00:00
if ( dummy_container ) {
// Dummy container only contain a m_texture that is a pointer to another source.
2009-07-31 23:59:06 +00:00
2016-04-24 20:30:56 +00:00
m_write . rect = NULL ;
m_write . count = 0 ;
2009-07-31 23:59:06 +00:00
2016-04-24 20:30:56 +00:00
m_repeating = false ;
2011-04-25 18:18:21 +00:00
2016-04-24 20:30:56 +00:00
} else {
2016-09-30 19:37:24 +00:00
memset ( m_layer_TEX0 , 0 , sizeof ( m_layer_TEX0 ) ) ;
2016-04-24 20:30:56 +00:00
memset ( m_valid , 0 , sizeof ( m_valid ) ) ;
2011-04-25 18:18:21 +00:00
2016-04-24 20:30:56 +00:00
m_write . rect = ( GSVector4i * ) _aligned_malloc ( 3 * sizeof ( GSVector4i ) , 32 ) ;
m_write . count = 0 ;
m_repeating = m_TEX0 . IsRepeating ( ) ;
if ( m_repeating )
{
m_p2t = r - > m_mem . GetPage2TileMap ( m_TEX0 ) ;
}
2017-01-22 22:05:02 +00:00
GSOffset * off = m_renderer - > m_context - > offset . tex ;
m_pages_as_bit = off - > GetPagesAsBits ( m_TEX0 ) ;
2011-04-25 18:18:21 +00:00
}
2009-07-31 23:59:06 +00:00
}
GSTextureCache : : Source : : ~ Source ( )
{
_aligned_free ( m_write . rect ) ;
2009-07-06 16:35:06 +00:00
}
2016-09-30 19:36:52 +00:00
void GSTextureCache : : Source : : Update ( const GSVector4i & rect , int layer )
2009-06-12 19:09:17 +00:00
{
2016-09-30 19:36:52 +00:00
Surface : : UpdateAge ( ) ;
2009-06-12 19:09:17 +00:00
2016-09-30 19:37:24 +00:00
if ( layer = = 0 & & ( m_complete | | m_target ) )
2009-06-27 03:32:33 +00:00
{
return ;
}
2009-06-12 19:09:17 +00:00
2016-05-05 13:32:21 +00:00
const GSVector2i & bs = GSLocalMemory : : m_psm [ m_TEX0 . PSM ] . bs ;
2009-06-27 03:32:33 +00:00
2009-07-16 21:36:07 +00:00
int tw = std : : max < int > ( 1 < < m_TEX0 . TW , bs . x ) ;
int th = std : : max < int > ( 1 < < m_TEX0 . TH , bs . y ) ;
2011-03-12 22:10:58 +00:00
GSVector4i r = rect . ralign < Align_Outside > ( bs ) ;
2009-06-27 03:32:33 +00:00
2016-09-30 19:37:24 +00:00
if ( layer = = 0 & & r . eq ( GSVector4i ( 0 , 0 , tw , th ) ) )
2009-07-01 21:14:12 +00:00
{
m_complete = true ; // lame, but better than nothing
}
2015-05-15 18:40:09 +00:00
const GSOffset * off = m_renderer - > m_context - > offset . tex ;
2009-06-27 03:32:33 +00:00
2009-06-28 02:02:14 +00:00
uint32 blocks = 0 ;
2009-06-27 03:32:33 +00:00
2011-04-25 18:18:21 +00:00
if ( m_repeating )
2009-06-12 19:09:17 +00:00
{
2011-04-25 01:44:00 +00:00
for ( int y = r . top ; y < r . bottom ; y + = bs . y )
2009-06-27 03:32:33 +00:00
{
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 base = off - > block . row [ y > > 3u ] ;
2009-06-27 03:32:33 +00:00
2011-04-25 18:18:21 +00:00
for ( int x = r . left , i = ( y < < 7 ) + x ; x < r . right ; x + = bs . x , i + = bs . x )
2009-06-27 03:32:33 +00:00
{
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 block = base + off - > block . col [ x > > 3u ] ;
2009-06-27 03:32:33 +00:00
2017-01-22 21:27:38 +00:00
if ( block < MAX_BLOCKS | | m_wrap_gs_mem )
2009-06-27 03:32:33 +00:00
{
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 addr = ( i > > 3u ) % MAX_BLOCKS ;
2011-04-25 18:18:21 +00:00
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 row = addr > > 5u ;
uint32 col = 1 < < ( addr & 31u ) ;
2011-04-25 01:44:00 +00:00
if ( ( m_valid [ row ] & col ) = = 0 )
2009-07-01 21:14:12 +00:00
{
m_valid [ row ] | = col ;
2009-06-12 19:09:17 +00:00
2016-09-30 19:36:52 +00:00
Write ( GSVector4i ( x , y , x + bs . x , y + bs . y ) , layer ) ;
2009-06-12 19:09:17 +00:00
2011-04-25 01:44:00 +00:00
blocks + + ;
}
2009-06-27 03:32:33 +00:00
}
}
}
}
2011-04-25 01:44:00 +00:00
else
2009-06-28 02:02:14 +00:00
{
2011-04-25 01:44:00 +00:00
for ( int y = r . top ; y < r . bottom ; y + = bs . y )
2009-06-12 19:09:17 +00:00
{
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 base = off - > block . row [ y > > 3u ] ;
2011-04-25 01:44:00 +00:00
for ( int x = r . left ; x < r . right ; x + = bs . x )
2009-06-27 03:32:33 +00:00
{
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 block = base + off - > block . col [ x > > 3u ] ;
2009-06-27 03:32:33 +00:00
2017-01-22 21:27:38 +00:00
if ( block < MAX_BLOCKS | | m_wrap_gs_mem )
2009-06-27 03:32:33 +00:00
{
2017-01-22 21:27:38 +00:00
block % = MAX_BLOCKS ;
gsdx tc: use unsigned constant
Help the compiler to generate better code
C code:
uint32 addr = (i >> 3u) % MAX_BLOCKS;
uint32 row = addr >> 5u;
uint32 col = 1 << (addr & 31u);
ASM Before
f48: mov eax,esi
f4a: mov ecx,esi
f4c: mov edx,DWORD PTR [ebp+0x8]
f4f: sar eax,0x1f
f52: sar ecx,0x3
f55: shr eax,0x12
f58: add ecx,eax
f5a: and ecx,0x3fff
f60: sub ecx,eax
f62: mov eax,0x1
f67: shl eax,cl
f69: shr ecx,0x5
f6c: lea edx,[edx+ecx*4]
ASM After
f48: mov ecx,edi
f4a: mov eax,0x1
f4f: sar ecx,0x3
f52: shl eax,cl
f54: shr ecx,0x3
f57: and ecx,0x7fc
f5d: add ecx,DWORD PTR [ebp+0x8]
2017-01-26 17:53:16 +00:00
uint32 row = block > > 5u ;
uint32 col = 1 < < ( block & 31u ) ;
2009-06-27 03:32:33 +00:00
2011-04-25 01:44:00 +00:00
if ( ( m_valid [ row ] & col ) = = 0 )
2009-06-27 03:32:33 +00:00
{
2009-07-01 21:14:12 +00:00
m_valid [ row ] | = col ;
2011-04-25 01:44:00 +00:00
2016-09-30 19:36:52 +00:00
Write ( GSVector4i ( x , y , x + bs . x , y + bs . y ) , layer ) ;
2011-04-25 01:44:00 +00:00
blocks + + ;
2009-06-27 03:32:33 +00:00
}
}
}
2009-06-12 19:09:17 +00:00
}
2011-04-25 01:44:00 +00:00
}
2009-06-27 03:32:33 +00:00
2011-04-25 01:44:00 +00:00
if ( blocks > 0 )
{
2012-06-11 03:27:16 +00:00
m_renderer - > m_perfmon . Put ( GSPerfMon : : Unswizzle , bs . x * bs . y * blocks < < ( m_palette ? 2 : 0 ) ) ;
2009-06-27 03:32:33 +00:00
2016-09-30 19:36:52 +00:00
Flush ( m_write . count , layer ) ;
2009-06-28 02:02:14 +00:00
}
}
2009-06-27 03:32:33 +00:00
2016-09-30 19:37:24 +00:00
void GSTextureCache : : Source : : UpdateLayer ( const GIFRegTEX0 & TEX0 , const GSVector4i & rect , int layer )
{
if ( layer > 6 )
return ;
if ( m_target ) // Yeah keep dreaming
return ;
if ( TEX0 = = m_layer_TEX0 [ layer ] )
return ;
GIFRegTEX0 old_TEX0 = m_TEX0 ;
m_layer_TEX0 [ layer ] = TEX0 ;
m_TEX0 = TEX0 ;
Update ( rect , layer ) ;
m_TEX0 = old_TEX0 ;
}
2016-09-30 19:36:52 +00:00
void GSTextureCache : : Source : : Write ( const GSVector4i & r , int layer )
2009-06-28 02:02:14 +00:00
{
m_write . rect [ m_write . count + + ] = r ;
while ( m_write . count > = 2 )
{
GSVector4i & a = m_write . rect [ m_write . count - 2 ] ;
GSVector4i & b = m_write . rect [ m_write . count - 1 ] ;
if ( ( a = = b . zyxw ( ) ) . mask ( ) = = 0xfff0 )
{
a . right = b . right ; // extend right
m_write . count - - ;
2009-06-12 19:09:17 +00:00
}
2009-06-28 02:02:14 +00:00
else if ( ( a = = b . xwzy ( ) ) . mask ( ) = = 0xff0f )
{
a . bottom = b . bottom ; // extend down
2009-06-12 19:09:17 +00:00
2009-06-28 02:02:14 +00:00
m_write . count - - ;
}
else
{
break ;
}
}
2009-06-12 19:09:17 +00:00
2009-06-28 02:02:14 +00:00
if ( m_write . count > 2 )
{
2016-09-30 19:36:52 +00:00
Flush ( 1 , layer ) ;
2009-06-28 02:02:14 +00:00
}
2009-06-27 03:32:33 +00:00
}
2009-06-12 19:09:17 +00:00
2016-09-30 19:36:52 +00:00
void GSTextureCache : : Source : : Flush ( uint32 count , int layer )
2009-06-27 03:32:33 +00:00
{
2012-06-17 18:39:18 +00:00
// This function as written will not work for paletted formats copied from framebuffers
// because they are 8 or 4 bit formats on the GS and the GS local memory module reads
// these into an 8 bit format while the D3D surfaces are 32 bit.
// However the function is never called for these cases. This is just for information
// should someone wish to use this function for these cases later.
2009-06-28 02:02:14 +00:00
const GSLocalMemory : : psm_t & psm = GSLocalMemory : : m_psm [ m_TEX0 . PSM ] ;
2009-06-27 03:32:33 +00:00
2009-06-28 02:02:14 +00:00
int tw = 1 < < m_TEX0 . TW ;
int th = 1 < < m_TEX0 . TH ;
2009-06-27 03:32:33 +00:00
2009-06-28 02:02:14 +00:00
GSVector4i tr ( 0 , 0 , tw , th ) ;
2009-06-27 03:32:33 +00:00
2017-05-26 16:30:44 +00:00
int pitch = std : : max ( tw , psm . bs . x ) * sizeof ( uint32 ) ;
2009-06-28 02:02:14 +00:00
2009-07-04 15:14:04 +00:00
GSLocalMemory & mem = m_renderer - > m_mem ;
2009-06-28 02:02:14 +00:00
2015-05-15 18:40:09 +00:00
const GSOffset * off = m_renderer - > m_context - > offset . tex ;
2009-07-22 03:55:28 +00:00
2009-06-28 02:02:14 +00:00
GSLocalMemory : : readTexture rtx = psm . rtx ;
2012-06-11 03:27:16 +00:00
if ( m_palette )
2009-07-06 16:35:06 +00:00
{
pitch > > = 2 ;
rtx = psm . rtxP ;
}
2011-02-19 03:36:30 +00:00
uint8 * buff = m_temp ;
2009-10-12 19:58:03 +00:00
2009-06-28 02:02:14 +00:00
for ( uint32 i = 0 ; i < count ; i + + )
2009-06-27 03:32:33 +00:00
{
2009-06-28 02:02:14 +00:00
GSVector4i r = m_write . rect [ i ] ;
2009-06-27 19:05:36 +00:00
2009-06-28 02:02:14 +00:00
if ( ( r > tr ) . mask ( ) & 0xff00 )
2009-06-27 19:05:36 +00:00
{
2015-05-15 18:40:09 +00:00
( mem . * rtx ) ( off , r , buff , pitch , m_TEXA ) ;
2009-06-27 03:32:33 +00:00
2016-09-30 19:37:24 +00:00
m_texture - > Update ( r . rintersect ( tr ) , buff , pitch , layer ) ;
2009-06-27 19:05:36 +00:00
}
else
{
2009-06-28 02:02:14 +00:00
GSTexture : : GSMap m ;
2016-09-30 19:37:24 +00:00
if ( m_texture - > Map ( m , & r , layer ) )
2009-06-28 02:02:14 +00:00
{
2016-09-25 14:19:27 +00:00
( mem . * rtx ) ( off , r , m . bits , m . pitch , m_TEXA ) ;
2009-06-28 02:02:14 +00:00
m_texture - > Unmap ( ) ;
}
else
{
2016-09-25 14:19:27 +00:00
( mem . * rtx ) ( off , r , buff , pitch , m_TEXA ) ;
2009-06-27 19:05:36 +00:00
2016-09-30 19:37:24 +00:00
m_texture - > Update ( r , buff , pitch , layer ) ;
2009-06-28 02:02:14 +00:00
}
2009-06-27 19:05:36 +00:00
}
2009-06-12 19:09:17 +00:00
}
2009-06-28 02:02:14 +00:00
if ( count < m_write . count )
{
2013-09-05 20:01:47 +00:00
// Warning src and destination overlap. Memmove must be used instead of memcpy
memmove ( & m_write . rect [ 0 ] , & m_write . rect [ count ] , ( m_write . count - count ) * sizeof ( m_write . rect [ 0 ] ) ) ;
2009-06-28 02:02:14 +00:00
}
m_write . count - = count ;
2009-06-12 19:09:17 +00:00
}
2018-12-10 20:49:40 +00:00
bool GSTextureCache : : Source : : ClutMatch ( PaletteKey palette_key ) {
return PaletteKeyEqual ( ) ( palette_key , m_palette_obj - > GetPaletteKey ( ) ) ;
}
2009-06-27 03:32:33 +00:00
// GSTextureCache::Target
2009-05-22 01:22:52 +00:00
2015-06-10 08:07:40 +00:00
GSTextureCache : : Target : : Target ( GSRenderer * r , const GIFRegTEX0 & TEX0 , uint8 * temp , bool depth_supported )
2011-02-20 23:53:00 +00:00
: Surface ( r , temp )
2009-06-27 03:32:33 +00:00
, m_type ( - 1 )
2009-05-22 01:22:52 +00:00
, m_used ( false )
2015-06-10 08:07:40 +00:00
, m_depth_supported ( depth_supported )
2016-08-12 20:45:17 +00:00
, m_end_block ( 0 )
2009-05-22 01:22:52 +00:00
{
2011-04-25 18:18:21 +00:00
m_TEX0 = TEX0 ;
2016-05-06 11:47:53 +00:00
m_32_bits_fmt | = ( GSLocalMemory : : m_psm [ TEX0 . PSM ] . trbpp ! = 16 ) ;
m_dirty_alpha = GSLocalMemory : : m_psm [ TEX0 . PSM ] . trbpp ! = 24 ;
2011-04-25 18:18:21 +00:00
2009-07-12 13:46:05 +00:00
m_valid = GSVector4i : : zero ( ) ;
2009-05-22 01:22:52 +00:00
}
2009-06-27 03:32:33 +00:00
void GSTextureCache : : Target : : Update ( )
2009-06-12 19:09:17 +00:00
{
2016-09-30 19:36:52 +00:00
Surface : : UpdateAge ( ) ;
2009-06-12 19:09:17 +00:00
2009-06-27 03:32:33 +00:00
// FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :)
2015-06-10 08:07:40 +00:00
// GH: it must be doable
// 1/ rescale the new t to the good size
// 2/ copy each rectangle (rescale the rectangle) (use CopyRect or multiple vertex)
// Alternate
// 1/ uses multiple vertex rectangle
2009-06-27 03:32:33 +00:00
2017-11-08 06:47:23 +00:00
GSVector2i t_size = default_rt_size ;
// Ensure buffer width is at least of the minimum required value.
// Probably not necessary but doesn't hurt to be on the safe side.
// I've seen some games use buffer sizes over 1024, which might bypass our default limit
int buffer_width = m_TEX0 . TBW < < 6 ;
t_size . x = std : : max ( buffer_width , t_size . x ) ;
2016-05-05 10:59:49 +00:00
GSVector4i r = m_dirty . GetDirtyRectAndClear ( m_TEX0 , t_size ) ;
2009-06-20 20:28:36 +00:00
2015-06-10 08:07:40 +00:00
if ( r . rempty ( ) ) return ;
2009-06-20 20:28:36 +00:00
2015-07-12 15:56:46 +00:00
// No handling please
if ( ( m_type = = DepthStencil ) & & ! m_depth_supported ) {
// do the most likely thing a direct write would do, clear it
GL_INS ( " ERROR: Update DepthStencil dummy " ) ;
return ;
2016-05-19 15:50:41 +00:00
} else if ( m_type = = DepthStencil & & m_renderer - > m_game . title = = CRC : : FFX2 ) {
2016-05-17 17:28:11 +00:00
GL_INS ( " ERROR: bad invalidation detected, depth buffer will be cleared " ) ;
// FFX2 menu. Invalidation of the depth is wrongly done and only the first
// page is invalidated. Technically a CRC hack will be better but I don't expect
// any games to only upload a single page of data for the depth.
//
// FFX2 menu got another bug. I'm not sure the top-left is properly written or not. It
// could be a gsdx transfer bug too due to unaligned-page transfer.
//
// So the quick and dirty solution is just to clean the depth buffer.
2016-07-27 21:22:46 +00:00
m_renderer - > m_dev - > ClearDepth ( m_texture ) ;
2016-05-17 17:28:11 +00:00
return ;
2015-07-12 15:56:46 +00:00
}
2015-06-10 08:07:40 +00:00
int w = r . width ( ) ;
int h = r . height ( ) ;
2009-06-20 20:28:36 +00:00
2015-06-10 08:07:40 +00:00
GIFRegTEXA TEXA ;
2009-07-22 03:55:28 +00:00
2015-06-10 08:07:40 +00:00
TEXA . AEM = 1 ;
TEXA . TA0 = 0 ;
TEXA . TA1 = 0x80 ;
2009-06-12 19:09:17 +00:00
2015-06-10 08:07:40 +00:00
GSTexture * t = m_renderer - > m_dev - > CreateTexture ( w , h ) ;
2009-05-22 01:22:52 +00:00
2015-06-10 08:07:40 +00:00
const GSOffset * off = m_renderer - > m_mem . GetOffset ( m_TEX0 . TBP0 , m_TEX0 . TBW , m_TEX0 . PSM ) ;
2010-04-25 00:31:27 +00:00
2015-06-10 08:07:40 +00:00
GSTexture : : GSMap m ;
if ( t - > Map ( m ) )
{
m_renderer - > m_mem . ReadTexture ( off , r , m . bits , m . pitch , TEXA ) ;
2009-05-22 01:22:52 +00:00
2015-06-10 08:07:40 +00:00
t - > Unmap ( ) ;
}
else
{
int pitch = ( ( w + 3 ) & ~ 3 ) * 4 ;
2009-05-22 01:22:52 +00:00
2015-06-10 08:07:40 +00:00
m_renderer - > m_mem . ReadTexture ( off , r , m_temp , pitch , TEXA ) ;
2009-06-27 03:32:33 +00:00
2015-06-10 08:07:40 +00:00
t - > Update ( r . rsize ( ) , m_temp , pitch ) ;
}
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4);
// Copy the new GS memory content into the destination texture.
if ( m_type = = RenderTarget )
{
2016-08-20 09:52:22 +00:00
GL_INS ( " ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d) " , m_TEX0 . TBP0 , m_TEX0 . TBW , r . x , r . y , r . z , r . w ) ;
2015-06-10 08:07:40 +00:00
m_renderer - > m_dev - > StretchRect ( t , m_texture , GSVector4 ( r ) * GSVector4 ( m_texture - > GetScale ( ) ) . xyxy ( ) ) ;
2009-06-27 03:32:33 +00:00
}
else if ( m_type = = DepthStencil )
2009-05-22 01:22:52 +00:00
{
2016-03-06 13:25:14 +00:00
GL_INS ( " ERROR: Update DepthStencil 0x%x " , m_TEX0 . TBP0 ) ;
2009-06-27 03:32:33 +00:00
2015-06-13 08:05:33 +00:00
// FIXME linear or not?
2015-09-08 14:36:23 +00:00
m_renderer - > m_dev - > StretchRect ( t , m_texture , GSVector4 ( r ) * GSVector4 ( m_texture - > GetScale ( ) ) . xyxy ( ) , ShaderConvert_RGBA8_TO_FLOAT32 ) ;
2009-05-22 01:22:52 +00:00
}
2015-06-10 08:07:40 +00:00
m_renderer - > m_dev - > Recycle ( t ) ;
2009-06-27 03:32:33 +00:00
}
2009-05-22 01:22:52 +00:00
2016-03-06 13:25:14 +00:00
void GSTextureCache : : Target : : UpdateValidity ( const GSVector4i & rect )
2016-03-06 13:24:20 +00:00
{
2016-03-06 13:25:14 +00:00
m_valid = m_valid . runion ( rect ) ;
2016-03-06 13:24:20 +00:00
uint32 nb_block = m_TEX0 . TBW * m_valid . height ( ) ;
2016-03-06 13:25:14 +00:00
if ( m_TEX0 . PSM = = PSM_PSMCT16 )
nb_block > > = 1 ;
2016-03-06 13:24:20 +00:00
m_end_block = m_TEX0 . TBP0 + nb_block ;
2016-08-17 19:16:39 +00:00
// GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w);
2016-03-06 13:24:20 +00:00
}
2016-05-05 10:17:57 +00:00
bool GSTextureCache : : Target : : Inside ( uint32 bp , uint32 bw , uint32 psm , const GSVector4i & rect )
2016-03-06 13:25:14 +00:00
{
2016-06-26 16:32:04 +00:00
uint32 block = GSLocalMemory : : m_psm [ psm ] . bn ( rect . width ( ) , rect . height ( ) , bp , bw ) ;
2016-03-06 13:25:14 +00:00
2016-05-05 10:17:57 +00:00
return bp > m_TEX0 . TBP0 & & block < m_end_block ;
2016-03-06 13:25:14 +00:00
}
2009-06-27 03:32:33 +00:00
// GSTextureCache::SourceMap
2009-05-22 01:22:52 +00:00
2016-04-28 15:03:41 +00:00
void GSTextureCache : : SourceMap : : Add ( Source * s , const GIFRegTEX0 & TEX0 , GSOffset * off )
2009-06-27 03:32:33 +00:00
{
2009-07-16 21:36:07 +00:00
m_surfaces . insert ( s ) ;
2009-05-22 01:22:52 +00:00
2009-07-02 00:47:50 +00:00
if ( s - > m_target )
{
// TODO
2014-10-24 18:49:30 +00:00
// GH: I don't know why but it seems we only consider the first page for a render target
2017-01-12 20:04:00 +00:00
size_t page = TEX0 . TBP0 > > 5 ;
2014-10-24 18:49:30 +00:00
2017-08-04 18:37:44 +00:00
s - > m_erase_it [ page ] = m_map [ page ] . InsertFront ( s ) ;
2009-07-02 00:47:50 +00:00
return ;
}
2014-10-24 18:49:30 +00:00
// The source pointer will be stored/duplicated in all m_map[array of pages]
2016-04-23 17:15:33 +00:00
for ( size_t i = 0 ; i < countof ( m_pages ) ; i + + )
{
2017-01-22 22:05:02 +00:00
if ( uint32 p = s - > m_pages_as_bit [ i ] )
2016-04-23 17:15:33 +00:00
{
2017-08-04 18:37:44 +00:00
auto * m = & m_map [ i < < 5 ] ;
2017-01-12 20:04:00 +00:00
auto * e = & s - > m_erase_it [ i < < 5 ] ;
2016-04-23 17:15:33 +00:00
unsigned long j ;
while ( _BitScanForward ( & j , p ) )
{
2017-01-11 19:13:53 +00:00
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
2017-01-15 18:55:40 +00:00
p ^ = 1U < < j ;
2016-04-23 17:15:33 +00:00
2017-08-04 18:37:44 +00:00
e [ j ] = m [ j ] . InsertFront ( s ) ;
2016-04-23 17:15:33 +00:00
}
}
}
}
2009-06-27 03:32:33 +00:00
void GSTextureCache : : SourceMap : : RemoveAll ( )
2009-05-22 01:22:52 +00:00
{
2018-11-04 21:06:24 +00:00
for ( auto s : m_surfaces ) delete s ;
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
m_surfaces . clear ( ) ;
2009-05-22 01:22:52 +00:00
2013-06-29 12:02:03 +00:00
for ( size_t i = 0 ; i < countof ( m_map ) ; i + + )
2009-05-22 01:22:52 +00:00
{
2009-06-27 03:32:33 +00:00
m_map [ i ] . clear ( ) ;
2009-05-22 01:22:52 +00:00
}
2009-06-27 03:32:33 +00:00
}
2009-05-22 01:22:52 +00:00
2009-06-27 03:32:33 +00:00
void GSTextureCache : : SourceMap : : RemoveAt ( Source * s )
{
m_surfaces . erase ( s ) ;
2015-05-16 17:28:22 +00:00
GL_CACHE ( " TC: Remove Src Texture: %d (0x%x) " ,
2015-05-13 06:49:32 +00:00
s - > m_texture ? s - > m_texture - > GetID ( ) : 0 ,
2015-05-16 17:28:22 +00:00
s - > m_TEX0 . TBP0 ) ;
2015-05-13 06:49:32 +00:00
2017-01-12 20:04:00 +00:00
if ( s - > m_target )
2009-05-22 01:22:52 +00:00
{
2017-08-04 18:37:44 +00:00
const size_t page = s - > m_TEX0 . TBP0 > > 5 ;
m_map [ page ] . EraseIndex ( s - > m_erase_it [ page ] ) ;
2017-01-12 20:04:00 +00:00
}
else
{
2017-01-12 20:17:01 +00:00
for ( size_t i = 0 ; i < countof ( m_pages ) ; i + + )
2009-07-02 00:47:50 +00:00
{
2017-01-22 22:05:02 +00:00
if ( uint32 p = s - > m_pages_as_bit [ i ] )
2017-01-12 20:17:01 +00:00
{
2017-08-04 18:37:44 +00:00
auto * m = & m_map [ i < < 5 ] ;
const auto * e = & s - > m_erase_it [ i < < 5 ] ;
2017-01-12 20:17:01 +00:00
unsigned long j ;
while ( _BitScanForward ( & j , p ) )
{
// FIXME: this statement could be optimized to a single ASM instruction (instead of 4)
// Either BTR (AKA bit test and reset). Depends on the previous instruction.
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
2017-01-15 18:55:40 +00:00
p ^ = 1U < < j ;
2017-01-12 20:17:01 +00:00
2017-08-04 18:37:44 +00:00
m [ j ] . EraseIndex ( e [ j ] ) ;
2017-01-12 20:17:01 +00:00
}
2017-01-12 20:04:00 +00:00
}
2009-07-02 00:47:50 +00:00
}
2009-05-22 01:22:52 +00:00
}
2009-06-27 03:32:33 +00:00
delete s ;
2010-04-25 00:31:27 +00:00
}
2018-11-04 21:06:24 +00:00
2018-11-13 18:22:13 +00:00
void GSTextureCache : : AttachPaletteToSource ( Source * s , uint16 pal , bool need_gs_texture )
2018-11-04 21:06:24 +00:00
{
2018-12-10 20:54:05 +00:00
s - > m_palette_obj = m_palette_map . LookupPalette ( pal , need_gs_texture ) ;
s - > m_palette = need_gs_texture ? s - > m_palette_obj - > GetPaletteGSTexture ( ) : nullptr ;
2018-11-04 21:06:24 +00:00
}
// GSTextureCache::Palette
2018-12-10 20:49:40 +00:00
GSTextureCache : : Palette : : Palette ( const GSRenderer * renderer , uint16 pal , bool need_gs_texture )
: m_pal ( pal )
, m_tex_palette ( nullptr )
, m_renderer ( renderer )
{
2018-11-04 21:06:24 +00:00
uint16 palette_size = pal * sizeof ( uint32 ) ;
m_clut = ( uint32 * ) _aligned_malloc ( palette_size , 64 ) ;
memcpy ( m_clut , ( const uint32 * ) m_renderer - > m_mem . m_clut , palette_size ) ;
2018-11-13 18:22:13 +00:00
if ( need_gs_texture ) {
2018-12-10 21:33:55 +00:00
InitializeTexture ( ) ;
2018-11-13 18:22:13 +00:00
}
2018-11-04 21:06:24 +00:00
}
GSTextureCache : : Palette : : ~ Palette ( ) {
2018-12-10 20:49:40 +00:00
m_renderer - > m_dev - > Recycle ( m_tex_palette ) ;
_aligned_free ( m_clut ) ;
2018-11-04 21:06:24 +00:00
}
GSTexture * GSTextureCache : : Palette : : GetPaletteGSTexture ( ) {
return m_tex_palette ;
}
2018-12-10 20:49:40 +00:00
GSTextureCache : : PaletteKey GSTextureCache : : Palette : : GetPaletteKey ( ) {
return { m_clut , m_pal } ;
}
2018-12-10 21:33:55 +00:00
void GSTextureCache : : Palette : : InitializeTexture ( ) {
if ( ! m_tex_palette ) {
// A palette texture is always created with dimensions 256x1 (also in the case that m_pal is 16, thus a 16x1 texture
// would be enough to store the CLUT data) because the coordinates that the shader uses for
// sampling such texture are always normalized by 255.
// This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255),
// and therefore will read texel 15/255 * texture size).
m_tex_palette = m_renderer - > m_dev - > CreateTexture ( 256 , 1 ) ;
m_tex_palette - > Update ( GSVector4i ( 0 , 0 , m_pal , 1 ) , m_clut , m_pal * sizeof ( m_clut [ 0 ] ) ) ;
}
}
2018-11-04 21:06:24 +00:00
// GSTextureCache::PaletteKeyHash
// Hashes the content of the clut.
// The hashing function is implemented by taking two things into account:
// 1) The clut can be an array of 16 or 256 uint32 (depending on the pal parameter) and in order to speed up the computation of the hash
// the array is hashed in blocks of 16 uint32, so for clut of size 16 uint32 the hashing is computed in one pass and for clut of 256 uint32
// it is computed in 16 passes,
// 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function
// is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended.
std : : size_t GSTextureCache : : PaletteKeyHash : : operator ( ) ( const PaletteKey & key ) const {
uint16 pal = key . pal ;
const uint32 * clut = key . clut ;
2018-12-10 20:11:52 +00:00
ASSERT ( ( pal & 15 ) = = 0 ) ;
2018-11-04 21:06:24 +00:00
size_t clut_hash = 3831179159 ;
for ( uint16 i = 0 ; i < pal ; i + = 16 ) {
2018-12-10 20:18:47 +00:00
clut_hash = ( clut_hash + 1488000301 ) ^ ( clut [ i ] + 33644011 ) ;
clut_hash = ( clut_hash + 3831179159 ) ^ ( clut [ i + 1 ] + 47627467 ) ;
clut_hash = ( clut_hash + 3659574209 ) ^ ( clut [ i + 2 ] + 577038523 ) ;
clut_hash = ( clut_hash + 33644011 ) ^ ( clut [ i + 3 ] + 3491555267 ) ;
clut_hash = ( clut_hash + 777771959 ) ^ ( clut [ i + 4 ] + 3301075993 ) ;
clut_hash = ( clut_hash + 4019618579 ) ^ ( clut [ i + 5 ] + 4186992613 ) ;
clut_hash = ( clut_hash + 3465668953 ) ^ ( clut [ i + 6 ] + 3043435883 ) ;
clut_hash = ( clut_hash + 3494478943 ) ^ ( clut [ i + 7 ] + 3441897883 ) ;
clut_hash = ( clut_hash + 3432010979 ) ^ ( clut [ i + 8 ] + 2167922789 ) ;
clut_hash = ( clut_hash + 1570862863 ) ^ ( clut [ i + 9 ] + 3401920591 ) ;
2018-11-04 21:06:24 +00:00
clut_hash = ( clut_hash + 1002648679 ) ^ ( clut [ i + 10 ] + 1293530519 ) ;
2018-12-10 20:18:47 +00:00
clut_hash = ( clut_hash + 551381741 ) ^ ( clut [ i + 11 ] + 2539834039 ) ;
2018-11-04 21:06:24 +00:00
2018-12-10 20:18:47 +00:00
clut_hash = ( clut_hash + 3768974459 ) ^ ( clut [ i + 12 ] + 169943507 ) ;
clut_hash = ( clut_hash + 862380703 ) ^ ( clut [ i + 13 ] + 2906932549 ) ;
2018-11-04 21:06:24 +00:00
clut_hash = ( clut_hash + 3433082137 ) ^ ( clut [ i + 14 ] + 4234384109 ) ;
clut_hash = ( clut_hash + 2679083843 ) ^ ( clut [ i + 15 ] + 2719605247 ) ;
}
return clut_hash ;
} ;
// GSTextureCache::PaletteKeyEqual
bool GSTextureCache : : PaletteKeyEqual : : operator ( ) ( const PaletteKey & lhs , const PaletteKey & rhs ) const {
2018-12-10 20:12:42 +00:00
if ( lhs . pal ! = rhs . pal ) {
return false ;
}
return GSVector4i : : compare64 ( lhs . clut , rhs . clut , lhs . pal * sizeof ( lhs . clut [ 0 ] ) ) ;
2018-11-04 21:06:24 +00:00
} ;
// GSTextureCache::PaletteMap
2018-12-10 20:18:47 +00:00
GSTextureCache : : PaletteMap : : PaletteMap ( const GSRenderer * renderer )
: m_renderer ( renderer )
{
2018-11-04 21:06:24 +00:00
for ( auto & map : m_maps ) {
map . reserve ( MAX_SIZE ) ;
}
}
2018-11-13 18:22:13 +00:00
std : : shared_ptr < GSTextureCache : : Palette > GSTextureCache : : PaletteMap : : LookupPalette ( uint16 pal , bool need_gs_texture ) {
2018-12-10 20:11:52 +00:00
ASSERT ( pal = = 16 | | pal = = 256 ) ;
2018-11-04 21:06:24 +00:00
// Choose which hash map search into:
// pal == 16 : index 0
// pal == 256 : index 1
auto & map = m_maps [ pal = = 16 ? 0 : 1 ] ;
const uint32 * clut = ( const uint32 * ) m_renderer - > m_mem . m_clut ;
// Create PaletteKey for searching into map (clut is actually not copied, so do not store this key into the map)
PaletteKey palette_key = { clut , pal } ;
auto it1 = map . find ( palette_key ) ;
if ( it1 ! = map . end ( ) ) {
// Clut content match, HIT
2018-12-10 21:33:55 +00:00
if ( need_gs_texture & & ! it1 - > second - > GetPaletteGSTexture ( ) ) {
// Generate GSTexture and upload clut content if needed and not done yet
it1 - > second - > InitializeTexture ( ) ;
}
2018-11-04 21:06:24 +00:00
return it1 - > second ;
}
2018-12-10 20:18:47 +00:00
// No palette with matching clut content, MISS
2018-11-04 21:06:24 +00:00
if ( map . size ( ) > MAX_SIZE ) {
// If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one
GL_INS ( " WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes. " , pal * sizeof ( uint32 ) , map . size ( ) , MAX_SIZE ) ;
uint32 current_size = map . size ( ) ;
for ( auto it = map . begin ( ) ; it ! = map . end ( ) ; ) {
// If the palette is unused, there is only one shared pointers holding a reference to the unused Palette object,
// and this shared pointer is the one stored in the map itself
if ( it - > second . use_count ( ) < = 1 ) {
// Palette is unused
it = map . erase ( it ) ; // Erase element from map
// The palette object should now be gone as the shared pointer to the object in the map is deleted
}
else {
+ + it ;
}
}
uint32 cleared_palette_count = current_size - ( uint32 ) map . size ( ) ;
if ( cleared_palette_count = = 0 ) {
GL_INS ( " ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact. " , pal * sizeof ( uint32 ) , map . size ( ) , MAX_SIZE ) ;
}
else {
map . reserve ( MAX_SIZE ) ; // Ensure map capacity is not modified by the clearing
GL_INS ( " INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes. " , pal * sizeof ( uint32 ) , map . size ( ) , cleared_palette_count ) ;
}
}
2018-11-13 18:22:13 +00:00
std : : shared_ptr < Palette > palette = std : : make_shared < Palette > ( m_renderer , pal , need_gs_texture ) ;
2018-11-04 21:06:24 +00:00
2018-12-10 20:49:40 +00:00
map . emplace ( palette - > GetPaletteKey ( ) , palette ) ;
2018-11-04 21:06:24 +00:00
GL_CACHE ( " TC, %u-bit PaletteMap (Size %u): Added new palette. " , pal * sizeof ( uint32 ) , map . size ( ) ) ;
return palette ;
}
void GSTextureCache : : PaletteMap : : Clear ( ) {
for ( auto & map : m_maps ) {
map . clear ( ) ; // Clear all the nodes of the map, deleting Palette objects managed by shared pointers as they should be unused elsewhere
map . reserve ( MAX_SIZE ) ; // Ensure map capacity is not modified by the clearing
}
}