mirror of https://github.com/PCSX2/pcsx2.git
GSdx: dx9 texture uploads should be at least as fast as before
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1442 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
9547448020
commit
7c2e51157e
|
@ -279,7 +279,10 @@ void GSTextureCache::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const
|
|||
{
|
||||
if(!s->m_target)
|
||||
{
|
||||
s->m_valid[page] = 0;
|
||||
s->m_blocks -= s->m_valid[page].count;
|
||||
|
||||
s->m_valid[page].block = 0;
|
||||
s->m_valid[page].count = 0;
|
||||
|
||||
found = true;
|
||||
}
|
||||
|
@ -488,6 +491,9 @@ GSTextureCache::Source::Source(GSRenderer* r)
|
|||
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 16);
|
||||
|
||||
memset(m_clut, 0, sizeof(m_clut));
|
||||
|
||||
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 16);
|
||||
m_write.count = 0;
|
||||
}
|
||||
|
||||
GSTextureCache::Source::~Source()
|
||||
|
@ -495,6 +501,8 @@ GSTextureCache::Source::~Source()
|
|||
m_renderer->m_dev->Recycle(m_palette);
|
||||
|
||||
_aligned_free(m_clut);
|
||||
|
||||
_aligned_free(m_write.rect);
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
|
||||
|
@ -509,175 +517,170 @@ void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TE
|
|||
m_TEX0 = TEX0;
|
||||
m_TEXA = TEXA;
|
||||
|
||||
if(m_blocks == m_total_blocks)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
||||
|
||||
GSVector2i s = psm.bs;
|
||||
GSVector2i bs = psm.bs;
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(bs);
|
||||
|
||||
uint32 bp = m_TEX0.TBP0;
|
||||
uint32 bw = m_TEX0.TBW;
|
||||
|
||||
bool repeating = (1 << m_TEX0.TW) > (bw << 6); // TODO: bw == 0
|
||||
|
||||
uint32 blocks = 0;
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = psm.bn(0, y, bp, bw);
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row].block & col) == 0)
|
||||
{
|
||||
if(!repeating) m_valid[row].block |= col;
|
||||
|
||||
m_valid[row].count++;
|
||||
|
||||
Write(GSVector4i(x, y, x + bs.x, y + bs.y));
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(blocks > 0)
|
||||
{
|
||||
if(repeating)
|
||||
{
|
||||
for(int y = r.top; y < r.bottom; y += bs.y)
|
||||
{
|
||||
uint32 base = psm.bn(0, y, bp, bw);
|
||||
|
||||
for(int x = r.left; x < r.right; x += bs.x)
|
||||
{
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
m_valid[row].block |= col;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_blocks += blocks;
|
||||
|
||||
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, bs.x * bs.y * sizeof(uint32) * blocks);
|
||||
|
||||
Flush(m_write.count);
|
||||
}
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::Write(const GSVector4i& r)
|
||||
{
|
||||
m_write.rect[m_write.count++] = r;
|
||||
|
||||
while(m_write.count >= 2)
|
||||
{
|
||||
GSVector4i& a = m_write.rect[m_write.count - 2];
|
||||
GSVector4i& b = m_write.rect[m_write.count - 1];
|
||||
|
||||
if((a == b.zyxw()).mask() == 0xfff0)
|
||||
{
|
||||
a.right = b.right; // extend right
|
||||
|
||||
m_write.count--;
|
||||
}
|
||||
else if((a == b.xwzy()).mask() == 0xff0f)
|
||||
{
|
||||
a.bottom = b.bottom; // extend down
|
||||
|
||||
m_write.count--;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(m_write.count > 2)
|
||||
{
|
||||
Flush(1);
|
||||
}
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::Flush(uint32 count)
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
||||
|
||||
int tw = 1 << m_TEX0.TW;
|
||||
int th = 1 << m_TEX0.TH;
|
||||
|
||||
GSVector4i tr(0, 0, tw, th);
|
||||
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
|
||||
|
||||
uint32 bp = m_TEX0.TBP0;
|
||||
uint32 bw = m_TEX0.TBW;
|
||||
|
||||
uint32 blocks = 0;
|
||||
|
||||
// TODO
|
||||
static uint8* buff = (uint8*)_aligned_malloc(1024 * 16 * sizeof(uint32), 16); // max decompressed size for a row of blocks (1024 x 16, 4bpp)
|
||||
static uint8* buff = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 16);
|
||||
|
||||
int pitch = max(tw, s.x) * sizeof(uint32);
|
||||
|
||||
GSLocalMemory::readTexture rtx = psm.rtx;
|
||||
int pitch = max(tw, psm.bs.x) * sizeof(uint32);
|
||||
|
||||
const GSLocalMemory& mem = m_renderer->m_mem;
|
||||
|
||||
// TODO: bw == 0 (sfex)
|
||||
GSLocalMemory::readTexture rtx = psm.rtx;
|
||||
|
||||
if(tw <= (bw << 6))
|
||||
for(uint32 i = 0; i < count; i++)
|
||||
{
|
||||
// r.right = min(r.right, bw << 6);
|
||||
GSVector4i r = m_write.rect[i];
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += s.y)
|
||||
if((r > tr).mask() & 0xff00)
|
||||
{
|
||||
uint32 base = psm.bn(0, y, bp, bw);
|
||||
(mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
|
||||
|
||||
int left = r.left;
|
||||
int right = r.left;
|
||||
|
||||
for(int x = r.left; x < r.right; x += s.x)
|
||||
{
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
m_valid[row] |= col;
|
||||
|
||||
if(right < x)
|
||||
{
|
||||
Write(GSVector4i(left, y, right, y + s.y), tr, buff, pitch);
|
||||
|
||||
left = right = x;
|
||||
}
|
||||
|
||||
right += s.x;
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(left < right)
|
||||
{
|
||||
Write(GSVector4i(left, y, right, y + s.y), tr, buff, pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// unfortunatelly a block may be part of the same texture multiple times at different places (tw 1024 > tbw 640, between 640 -> 1024 it is repeated from the next row),
|
||||
// so just can't set the block's bit to valid in one pass, even if 99.9% of the games don't address the repeated part at the right side
|
||||
|
||||
// TODO: still bogus if those repeated parts aren't fetched together
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += s.y)
|
||||
{
|
||||
uint32 base = psm.bn(0, y, bp, bw);
|
||||
|
||||
int left = r.left;
|
||||
int right = r.left;
|
||||
|
||||
for(int x = r.left; x < r.right; x += s.x)
|
||||
{
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
if((m_valid[row] & col) == 0)
|
||||
{
|
||||
if(right < x)
|
||||
{
|
||||
Write(GSVector4i(left, y, right, y + s.y), tr, buff, pitch);
|
||||
|
||||
left = right = x;
|
||||
}
|
||||
|
||||
right += s.x;
|
||||
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(left < right)
|
||||
{
|
||||
Write(GSVector4i(left, y, right, y + s.y), tr, buff, pitch);
|
||||
}
|
||||
}
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += s.y)
|
||||
{
|
||||
uint32 base = psm.bn(0, y, bp, bw);
|
||||
|
||||
for(int x = r.left; x < r.right; x += s.x)
|
||||
{
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
uint32 row = block >> 5;
|
||||
uint32 col = 1 << (block & 31);
|
||||
|
||||
m_valid[row] |= col;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//_aligned_free(buff);
|
||||
|
||||
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, s.x * s.y * sizeof(uint32) * blocks);
|
||||
}
|
||||
|
||||
void GSTextureCache::Source::Write(const GSVector4i& r, const GSVector4i& tr, uint8* buff, int pitch)
|
||||
{
|
||||
if(r.rempty()) return;
|
||||
|
||||
GSLocalMemory::readTexture rtx = GSLocalMemory::m_psm[m_TEX0.PSM].rtx;
|
||||
|
||||
if((r > tr).mask() & 0xff00)
|
||||
{
|
||||
(m_renderer->m_mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
|
||||
|
||||
m_texture->Update(r.rintersect(tr), buff, pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSTexture::GSMap m;
|
||||
|
||||
if(m_texture->Map(m, &r))
|
||||
{
|
||||
(m_renderer->m_mem.*rtx)(r, m.bits, m.pitch, m_TEX0, m_TEXA);
|
||||
|
||||
m_texture->Unmap();
|
||||
m_texture->Update(r.rintersect(tr), buff, pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
(m_renderer->m_mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
|
||||
GSTexture::GSMap m;
|
||||
|
||||
m_texture->Update(r, buff, pitch);
|
||||
if(m_texture->Map(m, &r))
|
||||
{
|
||||
(mem.*rtx)(r, m.bits, m.pitch, m_TEX0, m_TEXA);
|
||||
|
||||
m_texture->Unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
(mem.*rtx)(r, buff, pitch, m_TEX0, m_TEXA);
|
||||
|
||||
m_texture->Update(r, buff, pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(count < m_write.count)
|
||||
{
|
||||
memcpy(m_write.rect[0], &m_write.rect[count], (m_write.count - count) * sizeof(m_write.rect[0]));
|
||||
}
|
||||
|
||||
m_write.count -= count;
|
||||
|
||||
//_aligned_free(buff);
|
||||
}
|
||||
|
||||
// GSTextureCache::Target
|
||||
|
@ -782,7 +785,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0)
|
|||
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
|
||||
GSVector2i bs = (bp & 31) == 0 ? psm.pgs : psm.bs;
|
||||
GSVector2i bs = psm.bs;
|
||||
|
||||
int blocks = 0;
|
||||
|
||||
|
@ -792,16 +795,19 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0)
|
|||
|
||||
for(int x = 0; x < tw; x += bs.x)
|
||||
{
|
||||
uint32 page = (base + psm.blockOffset[x >> 3]) >> 5;
|
||||
uint32 block = base + psm.blockOffset[x >> 3];
|
||||
|
||||
if(page < MAX_PAGES)
|
||||
if(block < MAX_BLOCKS)
|
||||
{
|
||||
m_map[page][s] = true;
|
||||
m_map[block >> 5][s] = true;
|
||||
|
||||
s->m_pages.push_back(page);
|
||||
blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s->m_blocks = 0;
|
||||
s->m_total_blocks = blocks;
|
||||
}
|
||||
|
||||
void GSTextureCache::SourceMap::RemoveAll()
|
||||
|
@ -823,9 +829,9 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
|||
{
|
||||
m_surfaces.erase(s);
|
||||
|
||||
for(list<int>::iterator i = s->m_pages.begin(); i != s->m_pages.end(); i++)
|
||||
for(int i = 0; i < countof(m_map); i++)
|
||||
{
|
||||
m_map[*i].erase(s);
|
||||
m_map[i].erase(s);
|
||||
}
|
||||
|
||||
delete s;
|
||||
|
|
|
@ -50,11 +50,16 @@ public:
|
|||
|
||||
class Source : public Surface
|
||||
{
|
||||
struct {GSVector4i* rect; uint32 count;} m_write;
|
||||
|
||||
void Write(const GSVector4i& r);
|
||||
void Flush(uint32 count);
|
||||
|
||||
public:
|
||||
GSTexture* m_palette;
|
||||
bool m_initpalette;
|
||||
list<int> m_pages;
|
||||
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
||||
uint32 m_blocks, m_total_blocks;
|
||||
struct {uint32 block, count;} m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
||||
uint32* m_clut;
|
||||
int m_bpp;
|
||||
bool m_target;
|
||||
|
@ -66,8 +71,6 @@ public:
|
|||
virtual bool Create() = 0;
|
||||
virtual bool Create(Target* dst) = 0;
|
||||
virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect);
|
||||
|
||||
void Write(const GSVector4i& r, const GSVector4i& tr, uint8* buff, int pitch);
|
||||
};
|
||||
|
||||
class Target : public Surface
|
||||
|
|
Loading…
Reference in New Issue