mirror of https://github.com/PCSX2/pcsx2.git
GSdx: vtune tells me GSOffset::GetPages is too slow without the cache and its slowest part is new uint32[], lets use pre-allocated buffers then. In d3d9 mode, locking the vertex buffer is the most painful thing, there is a terrible delay until it returns, the same Map call in d3d10/11 does not behave like that.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5049 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d8646e9dab
commit
651196f665
|
@ -1992,7 +1992,7 @@ GSOffset::~GSOffset()
|
|||
{
|
||||
}
|
||||
|
||||
uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
||||
uint32* GSOffset::GetPages(const GSVector4i& rect, uint32* pages, GSVector4i* bbox)
|
||||
{
|
||||
GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs;
|
||||
|
||||
|
@ -2006,9 +2006,14 @@ uint32* GSOffset::GetPages(const GSVector4i& rect, GSVector4i* bbox)
|
|||
|
||||
int size = r.width() * r.height();
|
||||
|
||||
int limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
|
||||
int limit = MAX_PAGES + 1;
|
||||
|
||||
uint32* pages = new uint32[limit];
|
||||
if(pages == NULL)
|
||||
{
|
||||
limit = std::min<int>((size >> ((bp & 31) != 0 ? 6 : 11)) + 2, MAX_PAGES) + 1;
|
||||
|
||||
pages = new uint32[limit];
|
||||
}
|
||||
|
||||
__aligned(uint32, 16) tmp[16];
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ public:
|
|||
|
||||
enum {EOP = 0xffffffff};
|
||||
|
||||
uint32* GetPages(const GSVector4i& rect, GSVector4i* bbox = NULL);
|
||||
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
|
||||
};
|
||||
|
||||
struct GSPixelOffset4
|
||||
|
|
|
@ -490,13 +490,15 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
uint32* RESTRICT p = m_tmp_pages;
|
||||
|
||||
o->GetPages(r, p);
|
||||
|
||||
m_tc->InvalidatePages(pages, o->psm);
|
||||
m_tc->InvalidatePages(p, o->psm);
|
||||
|
||||
// check if the changing pages either used as a texture or a target
|
||||
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
for(; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
uint32 page = *p;
|
||||
|
||||
|
@ -509,17 +511,17 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
uint32* RESTRICT p = m_tmp_pages;
|
||||
|
||||
o->GetPages(r, p);
|
||||
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
for(; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
//while(m_fzb_pages[*p]) _mm_pause();
|
||||
|
||||
|
@ -530,8 +532,6 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererSW::UsePages(const uint32* pages, int type)
|
||||
|
|
|
@ -52,6 +52,7 @@ protected:
|
|||
GSPixelOffset4* m_fzb;
|
||||
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
||||
uint16 m_tex_pages[512];
|
||||
uint32 m_tmp_pages[512 + 1];
|
||||
|
||||
void Reset();
|
||||
void VSync(int field);
|
||||
|
|
|
@ -319,7 +319,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
const uint32* pages = o->GetPages(rect, &r);
|
||||
uint32* pages = (uint32*)m_temp;
|
||||
|
||||
o->GetPages(rect, pages, &r);
|
||||
|
||||
bool found = false;
|
||||
|
||||
|
@ -374,8 +376,6 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect, boo
|
|||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
|
||||
if(!target) return;
|
||||
|
||||
for(int type = 0; type < 2; type++)
|
||||
|
|
Loading…
Reference in New Issue