mirror of https://github.com/PCSX2/pcsx2.git
GSdx: a little refinement to the fix for the issue that come up with Bully.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5016 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ed8eb53c22
commit
2628d5bb7d
|
@ -42,10 +42,14 @@ public:
|
|||
void* param;
|
||||
|
||||
GSRasterizerData()
|
||||
: vertices(NULL)
|
||||
: scissor(GSVector4i::zero())
|
||||
, bbox(GSVector4i::zero())
|
||||
, primclass(GS_INVALID_CLASS)
|
||||
, vertices(NULL)
|
||||
, count(0)
|
||||
, solidrect(false)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
, param(NULL)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -185,6 +185,25 @@ void GSRendererSW::Draw()
|
|||
data->solidrect = gd->sel.IsSolidRect();
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
|
||||
GSVector4i r = data->bbox.rintersect(data->scissor);
|
||||
|
||||
list<uint32>* fb_pages = m_context->offset.fb->GetPages(r);
|
||||
list<uint32>* zb_pages = m_context->offset.zb->GetPages(r);
|
||||
|
||||
//
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
if(m_fzb != m_context->offset.fzb)
|
||||
{
|
||||
m_fzb = m_context->offset.fzb;
|
||||
|
@ -192,35 +211,39 @@ void GSRendererSW::Draw()
|
|||
data->syncpoint = true;
|
||||
}
|
||||
|
||||
if(m_context->FRAME.Block() == m_context->ZBUF.Block())
|
||||
{
|
||||
// Writing the same address in a different format is incompatible with our screen splitting technique,
|
||||
// it must not necessarily be done by the same batch, enough if there are two in the queue one after eachother,
|
||||
// first it writes frame buffer at address X, then as z buffer also at address X, due to format differences the
|
||||
// block layout in memory is not the same.
|
||||
//
|
||||
// Most of these situations are detected by the previous m_fzb != m_context->offset.fzb check,
|
||||
// but when FRAME.Block() == ZBUF.Block() and f/z writes are switched on/off mutually then offset.fzb stays the same.
|
||||
//
|
||||
// Bully: FBP/ZBP = 0x2300
|
||||
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue
|
||||
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
|
||||
|
||||
data->syncpoint = true;
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
for(list<uint32>::iterator i = fb_pages->begin(); i != fb_pages->end(); i++)
|
||||
{
|
||||
if(m_fzb_pages[*i] & 0xffff0000) data->syncpoint = true; // already used as a z-buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i r = data->bbox.rintersect(data->scissor);
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
for(list<uint32>::iterator i = zb_pages->begin(); i != zb_pages->end(); i++)
|
||||
{
|
||||
if(m_fzb_pages[*i] & 0x0000ffff) data->syncpoint = true; // already used as a frame buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.fb, r);
|
||||
|
||||
UseTargetPages(m_context->offset.fb, r);
|
||||
UseTargetPages(fb_pages, 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_tc->InvalidateVideoMem(m_context->offset.zb, r);
|
||||
|
||||
UseTargetPages(m_context->offset.zb, r);
|
||||
UseTargetPages(zb_pages, 1);
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
|
@ -322,13 +345,13 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
m_tc->InvalidateVideoMem(o, r);
|
||||
list<uint32>* pages = o->GetPages(r);
|
||||
|
||||
m_tc->InvalidatePages(pages, o->psm);
|
||||
|
||||
// check if the changing pages either used as a texture or a target
|
||||
|
||||
list<uint32>* pages = o->GetPages(r);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
|
||||
|
@ -347,11 +370,9 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
list<uint32>* pages = o->GetPages(r);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
|
||||
if(m_fzb_pages[page])
|
||||
if(m_fzb_pages[*i])
|
||||
{
|
||||
Sync(6);
|
||||
|
||||
|
@ -360,25 +381,23 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::UseTargetPages(GSOffset* o, const GSVector4i& rect)
|
||||
void GSRendererSW::UseTargetPages(const list<uint32>* pages, int offset)
|
||||
{
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
_InterlockedIncrement(&m_fzb_pages[*i]);
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX);
|
||||
|
||||
_InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::ReleaseTargetPages(GSOffset* o, const GSVector4i& rect)
|
||||
void GSRendererSW::ReleaseTargetPages(const list<uint32>* pages, int offset)
|
||||
{
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
ASSERT(m_fzb_pages[*i] > 0);
|
||||
ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0);
|
||||
|
||||
_InterlockedDecrement(&m_fzb_pages[*i]);
|
||||
_InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,8 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
|
|||
{
|
||||
GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32);
|
||||
|
||||
gd->sel.key = 0;
|
||||
|
||||
gd->clut = NULL;
|
||||
gd->dimx = NULL;
|
||||
|
||||
|
@ -55,12 +57,12 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
|
|||
|
||||
if(gd->sel.fwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_fb, r);
|
||||
m_parent->ReleaseTargetPages(m_fb->GetPages(r), 0);
|
||||
}
|
||||
|
||||
if(gd->sel.zwrite)
|
||||
{
|
||||
m_parent->ReleaseTargetPages(m_zb, r);
|
||||
m_parent->ReleaseTargetPages(m_zb->GetPages(r), 1);
|
||||
}
|
||||
|
||||
if(gd->clut) _aligned_free(gd->clut);
|
||||
|
@ -77,7 +79,7 @@ protected:
|
|||
uint8* m_output;
|
||||
bool m_reset;
|
||||
GSPixelOffset4* m_fzb;
|
||||
long m_fzb_pages[512];
|
||||
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
||||
uint32 m_tex_pages[16];
|
||||
|
||||
void Reset();
|
||||
|
@ -90,8 +92,8 @@ protected:
|
|||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
|
||||
void UseTargetPages(GSOffset* o, const GSVector4i& rect);
|
||||
void ReleaseTargetPages(GSOffset* o, const GSVector4i& rect);
|
||||
void UseTargetPages(const list<uint32>* pages, int offset);
|
||||
void ReleaseTargetPages(const list<uint32>* pages, int offset);
|
||||
void UseSourcePages(const GSTextureCacheSW::Texture* t);
|
||||
|
||||
bool GetScanlineGlobalData(GSScanlineGlobalData& gd);
|
||||
|
|
|
@ -83,11 +83,9 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
|||
return t;
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
|
||||
void GSTextureCacheSW::InvalidatePages(const list<uint32>* pages, uint32 psm)
|
||||
{
|
||||
list<uint32>* pages = o->GetPages(rect);
|
||||
|
||||
for(list<uint32>::iterator p = pages->begin(); p != pages->end(); p++)
|
||||
for(list<uint32>::const_iterator p = pages->begin(); p != pages->end(); p++)
|
||||
{
|
||||
uint32 page = *p;
|
||||
|
||||
|
@ -97,7 +95,7 @@ void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect)
|
|||
{
|
||||
Texture* t = *i;
|
||||
|
||||
if(GSUtil::HasSharedBits(o->psm, t->m_TEX0.PSM))
|
||||
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
|
||||
{
|
||||
if(t->m_repeating)
|
||||
{
|
||||
|
@ -185,7 +183,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
|
||||
list<uint32>* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
|
||||
for(list<uint32>::iterator i = pages->begin(); i != pages->end(); i++)
|
||||
for(list<uint32>::const_iterator i = pages->begin(); i != pages->end(); i++)
|
||||
{
|
||||
uint32 page = *i;
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ public:
|
|||
|
||||
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
||||
|
||||
void InvalidateVideoMem(GSOffset* o, const GSVector4i& r);
|
||||
void InvalidatePages(const list<uint32>* pages, uint32 psm);
|
||||
|
||||
void RemoveAll();
|
||||
void RemoveAt(Texture* t);
|
||||
|
|
|
@ -204,7 +204,7 @@ protected:
|
|||
}
|
||||
|
||||
{
|
||||
// NOTE: this is scoped because we must make sure the last "item" is no longer around when Wait detects an empty queue
|
||||
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
|
||||
|
||||
T item = m_queue.front();
|
||||
|
||||
|
|
|
@ -332,6 +332,13 @@ typedef signed long long int64;
|
|||
return retval;
|
||||
}
|
||||
|
||||
__forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value)
|
||||
{
|
||||
long retval = Value;
|
||||
__asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory");
|
||||
return retval;
|
||||
}
|
||||
|
||||
__forceinline long _InterlockedDecrement(volatile long* const lpAddend)
|
||||
{
|
||||
return _InterlockedExchangeAdd(lpAddend, -1) - 1;
|
||||
|
@ -342,6 +349,16 @@ typedef signed long long int64;
|
|||
return _InterlockedExchangeAdd(lpAddend, 1) + 1;
|
||||
}
|
||||
|
||||
__forceinline short _InterlockedDecrement16(volatile short* const lpAddend)
|
||||
{
|
||||
return _InterlockedExchangeAdd16(lpAddend, -1) - 1;
|
||||
}
|
||||
|
||||
__forceinline short _InterlockedIncrement16(volatile short* const lpAddend)
|
||||
{
|
||||
return _InterlockedExchangeAdd16(lpAddend, 1) + 1;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
__forceinline unsigned long long __rdtsc()
|
||||
|
|
Loading…
Reference in New Issue