GSdx GSTextureCache: Using FastList instead of std::list. Various other improvements.

Using range loops where possible (correctly).
Using auto where possible (minimize code changes whenever it's decided to change back to a std container).
Use more efficient erase pattern (where possible).
Minor code tweaks.
This commit is contained in:
Alessandro Vetere 2017-08-04 20:37:44 +02:00 committed by Gregory Hainaut
parent b2508dcb59
commit a0aa585afd
2 changed files with 86 additions and 109 deletions

View File

@ -75,7 +75,7 @@ void GSTextureCache::RemovePartial()
for (int type = 0; type < 2; type++)
{
for (auto &t : m_dst[type]) delete t;
for (auto t : m_dst[type]) delete t;
m_dst[type].clear();
}
@ -87,7 +87,7 @@ void GSTextureCache::RemoveAll()
for(int type = 0; type < 2; type++)
{
for (auto &t : m_dst[type]) delete t;
for (auto t : m_dst[type]) delete t;
m_dst[type].clear();
}
@ -200,10 +200,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
Source* src = NULL;
list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5];
auto& m = m_src.m_map[TEX0.TBP0 >> 5];
for(list<Source*>::iterator i = m.begin(); i != m.end(); ++i)
for(auto i = m.begin(); i != m.end(); ++i)
{
Source* s = *i;
@ -225,7 +224,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
continue;
}
m.splice(m.begin(), m, i);
m.MoveFront(i.Index());
src = s;
@ -257,10 +256,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything
// else.)
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ++i)
{
Target* t = *i;
for(auto t : m_dst[RenderTarget]) {
if(t->m_used && t->m_dirty.empty()) {
// Typical bug (MGS3 blue cloud):
// 1/ RT used as 32 bits => alpha channel written
@ -357,9 +353,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Unfortunately, I don't have any Arc the Lad testcase
//
// 1/ Check only current frame, I guess it is only used as a postprocessing effect
for(list<Target*>::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); ++i) {
Target* t = *i;
for(auto t : m_dst[DepthStencil]) {
if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled");
@ -445,13 +439,13 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
Target* dst = NULL;
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); ++i)
{
auto& list = m_dst[type];
for(auto i = list.begin(); i != list.end(); ++i) {
Target* t = *i;
if(bp == t->m_TEX0.TBP0)
{
m_dst[type].splice(m_dst[type].begin(), m_dst[type], i);
list.MoveFront(i.Index());
dst = t;
@ -476,9 +470,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
// Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick
// some bad data.
Target* dst_match = nullptr;
for(list<Target*>::iterator i = m_dst[rev_type].begin(); i != m_dst[rev_type].end(); ++i) {
Target* t = *i;
for(auto t : m_dst[rev_type]) {
if (bp == t->m_TEX0.TBP0) {
if (t->m_age == 0) {
dst_match = t;
@ -610,10 +602,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
#if 0
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++)
for(auto t : m_dst[RenderTarget])
{
Target* t = *i;
if(bp == t->m_TEX0.TBP0)
{
dst = t;
@ -674,7 +664,8 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
if (!CanConvertDepth())
return;
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); ++i)
auto& list = m_dst[type];
for(auto i = list.begin(); i != list.end(); ++i)
{
Target* t = *i;
@ -684,7 +675,7 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
m_dst[type].erase(i);
list.erase(i);
delete t;
break;
@ -707,13 +698,11 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
{
// Remove Source that have same BP as the render target (color&dss)
// rendering will dirty the copy
const list<Source*>& m = m_src.m_map[bp >> 5];
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
auto& list = m_src.m_map[bp >> 5];
for(auto i = list.begin(); i != list.end(); )
{
list<Source*>::const_iterator j = i++;
Source* s = *j;
Source* s = *i;
++i;
if(GSUtil::HasSharedBits(bp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM))
{
@ -726,14 +715,11 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
// Detect half of the render target (fix snow engine game)
// Target Page (8KB) have always a width of 64 pixels
// Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10
const list<Source*>& m = m_src.m_map[bbp >> 5];
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
auto& list = m_src.m_map[bbp >> 5];
for(auto i = list.begin(); i != list.end(); )
{
list<Source*>::const_iterator j = i++;
Source* s = *j;
Source* s = *i;
++i;
if(GSUtil::HasSharedBits(bbp, psm, s->m_TEX0.TBP0, s->m_TEX0.PSM))
{
@ -749,11 +735,8 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
uint32 end_block = GSLocalMemory::m_psm[psm].bn(rect.width(), rect.height(), bp, bw);
auto type = RenderTarget;
for(auto i = m_dst[type].begin(); i != m_dst[type].end(); )
for(auto t : m_dst[type])
{
auto j = i++;
Target* t = *j;
if (t->m_TEX0.TBP0 > bp && t->m_end_block < end_block) {
// Haunting ground expect to clean buffer B with a rendering into buffer A.
// Situation is quite messy as it would require to extract the data from the buffer A
@ -784,34 +767,33 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
{
uint32 page = *p;
const list<Source*>& m = m_src.m_map[page];
for(list<Source*>::const_iterator i = m.begin(); i != m.end(); )
auto& list = m_src.m_map[page];
for(auto i = list.begin(); i != list.end(); )
{
list<Source*>::const_iterator j = i++;
Source* s = *j;
Source* s = *i;
++i;
if(GSUtil::HasSharedBits(psm, s->m_TEX0.PSM))
{
uint32* RESTRICT valid = s->m_valid;
bool b = bp == s->m_TEX0.TBP0;
if(!s->m_target)
{
if (m_disable_partial_invalidation && s->m_repeating) {
if(m_disable_partial_invalidation && s->m_repeating)
{
m_src.RemoveAt(s);
} else {
}
else
{
uint32* RESTRICT valid = s->m_valid;
// Invalidate data of input texture
if(s->m_repeating)
{
// Note: very hot path on snowbling engine game
vector<GSVector2i>& l = s->m_p2t[page];
for(vector<GSVector2i>::iterator k = l.begin(); k != l.end(); ++k)
for(const GSVector2i& k : s->m_p2t[page])
{
valid[k->x] &= k->y;
valid[k.x] &= k.y;
}
}
else
@ -842,10 +824,10 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
for(int type = 0; type < 2; type++)
{
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); )
auto& list = m_dst[type];
for(auto i = list.begin(); i != list.end(); )
{
list<Target*>::iterator j = i++;
auto j = i++;
Target* t = *j;
// GH: (I think) this code is completely broken. Typical issue:
@ -867,7 +849,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b
}
else
{
m_dst[type].erase(j);
list.erase(j);
GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
@ -962,12 +944,8 @@ void GSTextureCache::InvalidateLocalMem(GSOffset* off, const GSVector4i& r)
// It works for all the games mentioned below and fixes a couple of other ones as well
// (Busen0: Wizardry and Chaos Legion).
// Also in a few games the below code ran the Grandia3 case when it shouldn't :p
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); )
for(auto t : m_dst[RenderTarget])
{
list<Target*>::iterator j = i++;
Target* t = *j;
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
{
if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
@ -1075,17 +1053,20 @@ void GSTextureCache::InvalidateVideoMemSubTarget(GSTextureCache::Target* rt)
if (!rt)
return;
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); ) {
list<Target*>::iterator j = i++;
Target* t = *j;
auto& list = m_dst[RenderTarget];
for(auto i = list.begin(); i != list.end(); ) {
Target* t = *i;
if((t->m_TEX0.TBP0 > rt->m_TEX0.TBP0) && (t->m_end_block < rt->m_end_block) && (t->m_TEX0.TBW == rt->m_TEX0.TBW)
&& (t->m_TEX0.TBP0 < t->m_end_block)) {
GL_INS("InvalidateVideoMemSubTarget: rt 0x%x -> 0x%x, sub rt 0x%x -> 0x%x",
rt->m_TEX0.TBP0, rt->m_end_block, t->m_TEX0.TBP0, t->m_end_block);
m_dst[RenderTarget].erase(j);
i = list.erase(i);
delete t;
} else {
++i;
}
}
}
@ -1095,21 +1076,22 @@ void GSTextureCache::IncAge()
int maxage = m_src.m_used ? 3 : 30;
// You can't use m_map[page] because Source* are duplicated on several pages.
for(hash_set<Source*>::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); )
for(auto i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); )
{
hash_set<Source*>::iterator j = i++;
Source* s = *j;
Source* s = *i;
if(s->m_shared_texture) {
// Shared textures are temporary only added in the hash set but not in the texture
// cache list therefore you can't use RemoveAt
m_src.m_surfaces.erase(s);
i = m_src.m_surfaces.erase(i);
delete s;
} else if(++s->m_age > maxage) {
} else {
++i;
if (++s->m_age > maxage) {
m_src.RemoveAt(s);
}
}
}
m_src.m_used = false;
@ -1121,11 +1103,10 @@ void GSTextureCache::IncAge()
for(int type = 0; type < 2; type++)
{
for(list<Target*>::iterator i = m_dst[type].begin(); i != m_dst[type].end(); )
auto& list = m_dst[type];
for(auto i = list.begin(); i != list.end(); )
{
list<Target*>::iterator j = i++;
Target* t = *j;
Target* t = *i;
// This variable is used to detect the texture shuffle effect. There is a high
// probability that game will do it on the current RT.
@ -1138,12 +1119,14 @@ void GSTextureCache::IncAge()
if(++t->m_age > maxage)
{
m_dst[type].erase(j);
i = list.erase(i);
GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type),
t->m_texture ? t->m_texture->GetID() : 0,
t->m_TEX0.TBP0);
delete t;
} else {
++i;
}
}
}
@ -1519,23 +1502,19 @@ void GSTextureCache::PrintMemoryUsage()
uint32 tex_rt = 0;
uint32 rt = 0;
uint32 dss = 0;
for(hash_set<Source*>::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); i++) {
Source* s = *i;
for(auto s : m_src.m_surfaces) {
if(s && !s->m_shared_texture) {
if(s->m_target)
tex_rt += s->m_texture->GetMemUsage();
else
tex += s->m_texture->GetMemUsage();
}
}
for(list<Target*>::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) {
Target* t = *i;
for(auto t : m_dst[RenderTarget]) {
if(t)
rt += t->m_texture->GetMemUsage();
}
for(list<Target*>::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); i++) {
Target* t = *i;
for(auto t : m_dst[DepthStencil]) {
if(t)
dss += t->m_texture->GetMemUsage();
}
@ -1992,8 +1971,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
// GH: I don't know why but it seems we only consider the first page for a render target
size_t page = TEX0.TBP0 >> 5;
m_map[page].push_front(s);
s->m_erase_it[page] = m_map[page].begin();
s->m_erase_it[page] = m_map[page].InsertFront(s);
return;
}
@ -2003,7 +1981,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
{
if(uint32 p = s->m_pages_as_bit[i])
{
list<Source*>* m = &m_map[i << 5];
auto* m = &m_map[i << 5];
auto* e = &s->m_erase_it[i << 5];
unsigned long j;
@ -2015,8 +1993,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
m[j].push_front(s);
e[j] = m[j].begin();
e[j] = m[j].InsertFront(s);
}
}
}
@ -2024,7 +2001,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, GSOffset*
void GSTextureCache::SourceMap::RemoveAll()
{
for (auto &t : m_surfaces) delete t;
for(auto s : m_surfaces) delete s;
m_surfaces.clear();
@ -2044,9 +2021,8 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
if (s->m_target)
{
size_t page = s->m_TEX0.TBP0 >> 5;
m_map[page].erase(s->m_erase_it[page]);
const size_t page = s->m_TEX0.TBP0 >> 5;
m_map[page].EraseIndex(s->m_erase_it[page]);
}
else
{
@ -2054,8 +2030,8 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
{
if(uint32 p = s->m_pages_as_bit[i])
{
list<Source*>* m = &m_map[i << 5];
auto* e = &s->m_erase_it[i << 5];
auto* m = &m_map[i << 5];
const auto* e = &s->m_erase_it[i << 5];
unsigned long j;
@ -2066,7 +2042,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
// Or BLSR (AKA Reset Lowest Set Bit). No dependency but require BMI1 (basically a recent CPU)
p ^= 1U << j;
m[j].erase(e[j]);
m[j].EraseIndex(e[j]);
}
}
}

View File

@ -22,6 +22,7 @@
#pragma once
#include "GSRenderer.h"
#include "GSFastList.h"
#include "GSDirtyRect.h"
class GSTextureCache
@ -66,14 +67,14 @@ public:
bool m_complete;
bool m_repeating;
bool m_spritehack_t;
vector<GSVector2i>* m_p2t;
std::vector<GSVector2i>* m_p2t;
// Keep a trace of the target origin. There is no guarantee that pointer will
// still be valid on future. However it ought to be good when the source is created
// so it can be used to access un-converted data for the current draw call.
GSTexture* m_from_target;
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
// Keep an GSTextureCache::m_map iterator to allow fast erase
std::array<std::list<Source*>::iterator, MAX_PAGES> m_erase_it;
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
std::array<uint16, MAX_PAGES> m_erase_it;
uint32* m_pages_as_bit;
public:
@ -108,7 +109,7 @@ public:
{
public:
hash_set<Source*> m_surfaces;
std::list<Source*> m_map[MAX_PAGES];
std::array<FastList<Source*>, MAX_PAGES> m_map;
uint32 m_pages[16]; // bitmap of all pages
bool m_used;
@ -123,7 +124,7 @@ public:
protected:
GSRenderer* m_renderer;
SourceMap m_src;
std::list<Target*> m_dst[2];
FastList<Target*> m_dst[2];
bool m_paltex;
int m_spritehack;
bool m_preload_frame;