GSdx: vtune tells me GSOffset::GetPages is too slow without the cache and its slowest part is new uint32[], lets use pre-allocated buffers then. In d3d9 mode, locking the vertex buffer is the most painful thing, there is a terrible delay until it returns, the same Map call in d3d10/11 does not behave like that.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5049 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-06 01:20:01 +00:00
parent d8646e9dab
commit 651196f665
5 changed files with 22 additions and 16 deletions

View File

@ -1992,7 +1992,7 @@ GSOffset::~GSOffset()
{
}
uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
{
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
@ -2006,9 +2006,14 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
int size = r.width() * r.height();
int limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
int limit = MAX_PAGES + 1;
uint32* pages = new uint32[limit];
if(pages == NULL)
{
limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
pages = new uint32[limit];
}
__aligned(uint32, 16) tmp[16];

View File

@ -53,7 +53,7 @@ public:
enum {EOP = 0xffffffff};
uint32* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
};
struct GSPixelOffset4

View File

@ -490,13 +490,15 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
uint32* pages = o->GetPages(r);
uint32* RESTRICT p = m_tmp_pages;
o->GetPages(r, p);
m_tc->InvalidatePages(pages, o->psm);
m_tc->InvalidatePages(p, o->psm);
// check if the changing pages either used as a texture or a target
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
for(; *p != GSOffset::EOP; p++)
{
uint32 page = *p;
@ -509,17 +511,17 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
break;
}
}
delete [] pages;
}
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
uint32* pages = o->GetPages(r);
uint32* RESTRICT p = m_tmp_pages;
o->GetPages(r, p);
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
for(; *p != GSOffset::EOP; p++)
{
//while(m_fzb_pages[*p]) _mm_pause();
@ -530,8 +532,6 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
break;
}
}
delete [] pages;
}
void GSRendererSW::UsePages(const uint32* pages, int type)

View File

@ -52,6 +52,7 @@ protected:
GSPixelOffset4* m_fzb;
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint16 m_tex_pages[512];
uint32 m_tmp_pages[512 + 1];
void Reset();
void VSync(int field);

View File

@ -319,7 +319,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
GSVector4i r;
const uint32* pages = o->GetPages(rect, &r);
uint32* pages = (uint32*)m_temp;
o->GetPages(rect, pages, &r);
bool found = false;
@ -374,8 +376,6 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
}
}
delete [] pages;
if(!target) return;
for(int type = 0; type < 2; type++)