diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 035179ccbe..e90b499ec6 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -42,10 +42,14 @@ public: void* param; GSRasterizerData() - : vertices(NULL) + : scissor(GSVector4i::zero()) + , bbox(GSVector4i::zero()) + , primclass(GS_INVALID_CLASS) + , vertices(NULL) , count(0) , solidrect(false) , syncpoint(false) + , frame(0) , param(NULL) { } diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index f30d927534..ce579526fb 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -185,6 +185,25 @@ void GSRendererSW::Draw() data->solidrect = gd->sel.IsSolidRect(); data->frame = m_perfmon.GetFrame(); + GSVector4i r = data->bbox.rintersect(data->scissor); + + list* fb_pages = m_context->offset.fb->GetPages(r); + list* zb_pages = m_context->offset.zb->GetPages(r); + + // + + if(gd->sel.fwrite) + { + m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm); + } + + if(gd->sel.zwrite) + { + m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm); + } + + // + if(m_fzb != m_context->offset.fzb) { m_fzb = m_context->offset.fzb; @@ -192,35 +211,39 @@ void GSRendererSW::Draw() data->syncpoint = true; } - if(m_context->FRAME.Block() == m_context->ZBUF.Block()) - { - // Writing the same address in a different format is incompatible with our screen splitting technique, - // it must not necessarily be done by the same batch, enough if there are two in the queue one after eachother, - // first it writes frame buffer at address X, then as z buffer also at address X, due to format differences the - // block layout in memory is not the same. - // - // Most of these situations are detected by the previous m_fzb != m_context->offset.fzb check, - // but when FRAME.Block() == ZBUF.Block() and f/z writes are switched on/off mutually then offset.fzb stays the same. - // - // Bully: FBP/ZBP = 0x2300 + // - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue + // - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300) - data->syncpoint = true; + if(!data->syncpoint) + { + if(gd->sel.fwrite) + { + for(list::iterator i = fb_pages->begin(); i != fb_pages->end(); i++) + { + if(m_fzb_pages[*i] & 0xffff0000) data->syncpoint = true; // already used as a z-buffer + } + } } - GSVector4i r = data->bbox.rintersect(data->scissor); + if(!data->syncpoint) + { + if(gd->sel.zwrite) + { + for(list::iterator i = zb_pages->begin(); i != zb_pages->end(); i++) + { + if(m_fzb_pages[*i] & 0x0000ffff) data->syncpoint = true; // already used as a frame buffer + } + } + } if(gd->sel.fwrite) { - m_tc->InvalidateVideoMem(m_context->offset.fb, r); - - UseTargetPages(m_context->offset.fb, r); + UseTargetPages(fb_pages, 0); } if(gd->sel.zwrite) { - m_tc->InvalidateVideoMem(m_context->offset.zb, r); - - UseTargetPages(m_context->offset.zb, r); + UseTargetPages(zb_pages, 1); } if(s_dump) @@ -322,13 +345,13 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS { GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - m_tc->InvalidateVideoMem(o, r); + list* pages = o->GetPages(r); + + m_tc->InvalidatePages(pages, o->psm); // check if the changing pages either used as a texture or a target - list* pages = o->GetPages(r); - - for(list::iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { uint32 page = *i; @@ -347,11 +370,9 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS list* pages = o->GetPages(r); - for(list::iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { - uint32 page = *i; - - if(m_fzb_pages[page]) + if(m_fzb_pages[*i]) { Sync(6); @@ -360,25 +381,23 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS } } -void GSRendererSW::UseTargetPages(GSOffset* o, const GSVector4i& rect) +void GSRendererSW::UseTargetPages(const list* pages, int offset) { - list* pages = o->GetPages(rect); - - for(list::iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { - _InterlockedIncrement(&m_fzb_pages[*i]); + ASSERT(((short*)&m_fzb_pages[*i])[offset] < SHRT_MAX); + + _InterlockedIncrement16((short*)&m_fzb_pages[*i] + offset); } } -void GSRendererSW::ReleaseTargetPages(GSOffset* o, const GSVector4i& rect) +void GSRendererSW::ReleaseTargetPages(const list* pages, int offset) { - list* pages = o->GetPages(rect); - - for(list::iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { - ASSERT(m_fzb_pages[*i] > 0); + ASSERT(((short*)&m_fzb_pages[*i])[offset] > 0); - _InterlockedDecrement(&m_fzb_pages[*i]); + _InterlockedDecrement16((short*)&m_fzb_pages[*i] + offset); } } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 545349ad19..af249121bd 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -41,6 +41,8 @@ class GSRendererSW : public GSRendererT { GSScanlineGlobalData* gd = (GSScanlineGlobalData*)_aligned_malloc(sizeof(GSScanlineGlobalData), 32); + gd->sel.key = 0; + gd->clut = NULL; gd->dimx = NULL; @@ -55,12 +57,12 @@ class GSRendererSW : public GSRendererT if(gd->sel.fwrite) { - m_parent->ReleaseTargetPages(m_fb, r); + m_parent->ReleaseTargetPages(m_fb->GetPages(r), 0); } if(gd->sel.zwrite) { - m_parent->ReleaseTargetPages(m_zb, r); + m_parent->ReleaseTargetPages(m_zb->GetPages(r), 1); } if(gd->clut) _aligned_free(gd->clut); @@ -77,7 +79,7 @@ protected: uint8* m_output; bool m_reset; GSPixelOffset4* m_fzb; - long m_fzb_pages[512]; + uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved uint32 m_tex_pages[16]; void Reset(); @@ -90,8 +92,8 @@ protected: void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false); - void UseTargetPages(GSOffset* o, const GSVector4i& rect); - void ReleaseTargetPages(GSOffset* o, const GSVector4i& rect); + void UseTargetPages(const list* pages, int offset); + void ReleaseTargetPages(const list* pages, int offset); void UseSourcePages(const GSTextureCacheSW::Texture* t); bool GetScanlineGlobalData(GSScanlineGlobalData& gd); diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index 3ac54470f3..2f45a9bcde 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -83,11 +83,9 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons return t; } -void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect) +void GSTextureCacheSW::InvalidatePages(const list* pages, uint32 psm) { - list* pages = o->GetPages(rect); - - for(list::iterator p = pages->begin(); p != pages->end(); p++) + for(list::const_iterator p = pages->begin(); p != pages->end(); p++) { uint32 page = *p; @@ -97,7 +95,7 @@ void GSTextureCacheSW::InvalidateVideoMem(GSOffset* o, const GSVector4i& rect) { Texture* t = *i; - if(GSUtil::HasSharedBits(o->psm, t->m_TEX0.PSM)) + if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM)) { if(t->m_repeating) { @@ -185,7 +183,7 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& list* pages = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH)); - for(list::iterator i = pages->begin(); i != pages->end(); i++) + for(list::const_iterator i = pages->begin(); i != pages->end(); i++) { uint32 page = *i; diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index 346bc1e196..f65bcf42ba 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -64,7 +64,7 @@ public: Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0); - void InvalidateVideoMem(GSOffset* o, const GSVector4i& r); + void InvalidatePages(const list* pages, uint32 psm); void RemoveAll(); void RemoveAt(Texture* t); diff --git a/plugins/GSdx/GSThread.h b/plugins/GSdx/GSThread.h index 026d1358af..bd6d403c01 100644 --- a/plugins/GSdx/GSThread.h +++ b/plugins/GSdx/GSThread.h @@ -204,7 +204,7 @@ protected: } { - // NOTE: this is scoped because we must make sure the last "item" is no longer around when Wait detects an empty queue + // NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue T item = m_queue.front(); diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index 9e256e0378..c4ff5070ea 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -331,6 +331,13 @@ typedef signed long long int64; __asm__("lock; xaddl %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); return retval; } + + __forceinline long _InterlockedExchangeAdd16(volatile short* const Addend, const short Value) + { + long retval = Value; + __asm__("lock; xaddw %[retval], %[Addend]" : [retval] "+r" (retval) : [Addend] "m" (*Addend) : "memory"); + return retval; + } __forceinline long _InterlockedDecrement(volatile long* const lpAddend) { @@ -341,6 +348,16 @@ typedef signed long long int64; { return _InterlockedExchangeAdd(lpAddend, 1) + 1; } + + __forceinline short _InterlockedDecrement16(volatile short* const lpAddend) + { + return _InterlockedExchangeAdd16(lpAddend, -1) - 1; + } + + __forceinline short _InterlockedIncrement16(volatile short* const lpAddend) + { + return _InterlockedExchangeAdd16(lpAddend, 1) + 1; + } #ifdef __GNUC__