GSdx: a little refinement to the fix for the issue that come up with Bully.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5016 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-12-25 07:26:42 +00:00
parent ed8eb53c22
commit 2628d5bb7d
7 changed files with 92 additions and 52 deletions

View File

@ -42,10 +42,14 @@ public:
void* param;
GSRasterizerData()
: vertices(NULL)
: scissor(GSVector4i::zero())
, bbox(GSVector4i::zero())
, primclass(GS_INVALID_CLASS)
, vertices(NULL)
, count(0)
, solidrect(false)
, syncpoint(false)
, frame(0)
, param(NULL)
{
}

View File

@ -185,6 +185,25 @@ void GSRendererSW::Draw()
data->solidrect = gd->sel.IsSolidRect();
data->frame = m_perfmon.GetFrame();
GSVector4i r = data->bbox.rintersect(data->scissor);
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r);
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
//
if(gd->sel.fwrite)
{
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
}
if(gd->sel.zwrite)
{
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
}
//
if(m_fzb != m_context->offset.fzb)
{
m_fzb = m_context->offset.fzb;
@ -192,35 +211,39 @@ void GSRendererSW::Draw()
data->syncpoint = true;
}
if(m_context->FRAME.Block() == m_context->ZBUF.Block())
{
// Writing the same address in a different format is incompatible with our screen splitting technique,
// it must not necessarily be done by the same batch, enough if there are two in the queue one after eachother,
// first it writes frame buffer at address X, then as z buffer also at address X, due to format differences the
// block layout in memory is not the same.
//
// Most of these situations are detected by the previous m_fzb != m_context->offset.fzb check,
// but when FRAME.Block() == ZBUF.Block() and f/z writes are switched on/off mutually then offset.fzb stays the same.
//
// Bully: FBP/ZBP = 0x2300
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
data->syncpoint = true;
if(!data->syncpoint)
{
if(gd->sel.fwrite)
{
for(list<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0xffff0000) data->syncpoint = true; // already used as a z-buffer
}
}
}
GSVector4i r = data->bbox.rintersect(data->scissor);
if(!data->syncpoint)
{
if(gd->sel.zwrite)
{
for(list<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
{
if(m_fzb_pages[*i] & 0x0000ffff) data->syncpoint = true; // already used as a frame buffer
}
}
}
if(gd->sel.fwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
UseTargetPages(m_context->offset.fb, r);
UseTargetPages(fb_pages, 0);
}
if(gd->sel.zwrite)
{
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
UseTargetPages(m_context->offset.zb, r);
UseTargetPages(zb_pages, 1);
}
if(s_dump)
@ -322,13 +345,13 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
{
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
m_tc->InvalidateVideoMem(o, r);
list<uint32>* pages = o->GetPages(r);
m_tc->InvalidatePages(pages, o->psm);
// check if the changing pages either used as a texture or a target
list<uint32>* pages = o->GetPages(r);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 page = *i;
@ -347,11 +370,9 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
list<uint32>* pages = o->GetPages(r);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 page = *i;
if(m_fzb_pages[page])
if(m_fzb_pages[*i])
{
Sync(6);
@ -360,25 +381,23 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
}
}
void GSRendererSW::UseTargetPages(GSOffset* o, const GSVector4i& rect)
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset)
{
list<uint32>* pages = o->GetPages(rect);
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
{
_InterlockedIncrement(&m_fzb_pages[*i]);
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset);
}
}
void GSRendererSW::ReleaseTargetPages(GSOffset* o, const GSVector4i& rect)
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
{
list<uint32>* pages = o->GetPages(rect);
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
ASSERT(m_fzb_pages[*i] > 0);
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
_InterlockedDecrement(&m_fzb_pages[*i]);
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
}
}

View File

@ -41,6 +41,8 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
{
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
gd->sel.key = 0;
gd->clut = NULL;
gd->dimx = NULL;
@ -55,12 +57,12 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
if(gd->sel.fwrite)
{
m_parent->ReleaseTargetPages(m_fb, r);
m_parent->ReleaseTargetPages(m_fb->GetPages(r), 0);
}
if(gd->sel.zwrite)
{
m_parent->ReleaseTargetPages(m_zb, r);
m_parent->ReleaseTargetPages(m_zb->GetPages(r), 1);
}
if(gd->clut) _aligned_free(gd->clut);
@ -77,7 +79,7 @@ protected:
uint8* m_output;
bool m_reset;
GSPixelOffset4* m_fzb;
long m_fzb_pages[512];
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
uint32 m_tex_pages[16];
void Reset();
@ -90,8 +92,8 @@ protected:
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
void UseTargetPages(GSOffset* o, const GSVector4i& rect);
void ReleaseTargetPages(GSOffset* o, const GSVector4i& rect);
void UseTargetPages(const list<uint32>* pages, int offset);
void ReleaseTargetPages(const list<uint32>* pages, int offset);
void UseSourcePages(const GSTextureCacheSW::Texture* t);
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);

View File

@ -83,11 +83,9 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
return t;
}
void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
{
list<uint32>* pages = o->GetPages(rect);
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
for(list<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
{
uint32 page = *p;
@ -97,7 +95,7 @@ void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
{
Texture* t = *i;
if(GSUtil::HasSharedBits(o->psm, t->m_TEX0.PSM))
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
if(t->m_repeating)
{
@ -185,7 +183,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
{
uint32 page = *i;

View File

@ -64,7 +64,7 @@ public:
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r);
void InvalidatePages(const list<uint32>* pages, uint32 psm);
void RemoveAll();
void RemoveAt(Texture* t);

View File

@ -204,7 +204,7 @@ protected:
}
{
// NOTE: this is scoped because we must make sure the last "item" is no longer around when Wait detects an empty queue
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
T item = m_queue.front();

View File

@ -332,6 +332,13 @@ typedef signed long long int64;
return retval;
}
__forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
{
long retval = Value;
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
return retval;
}
__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
{
return _InterlockedExchangeAdd(lpAddend, -1) - 1;
@ -342,6 +349,16 @@ typedef signed long long int64;
return _InterlockedExchangeAdd(lpAddend, 1) + 1;
}
__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
}
__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
{
return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
}
#ifdef __GNUC__
__forceinline unsigned long long __rdtsc()