diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index b46c09b55f..27198c3988 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -64,14 +64,16 @@ bool GSRasterizer::IsOneOfMyScanlines(int scanline) const bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const { - top >>= THREAD_HEIGHT; - bottom >>= THREAD_HEIGHT; + top = top >> THREAD_HEIGHT; + bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT; - do + while(top < bottom) { - if(m_myscanline[top]) return true; + if(m_myscanline[top++]) + { + return true; + } } - while(top++ < bottom); return false; } @@ -292,8 +294,6 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices) GSVector4 tbmin = tbf.min(m_fscissor.ywyw()); GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); - if(m_threads > 1 && !IsOneOfMyScanlines(tb.x, tb.w)) return; - dv[0] = v[1] - v[0]; dv[1] = v[2] - v[0]; dv[2] = v[2] - v[1]; @@ -845,6 +845,7 @@ void GSRasterizerMT::ThreadProc() GSRasterizerList::GSRasterizerList() : m_sync_count(0) , m_count(0) + , m_dispatched(0) { } @@ -856,22 +857,34 @@ GSRasterizerList::~GSRasterizerList() } } +void GSRasterizerList::Draw(shared_ptr data) +{ + Sync(); + + front()->Draw(data); +} + void GSRasterizerList::Queue(shared_ptr data) { - // TODO: do not send data to every thread, try to bin them (based on bbox & scissor) - - if(data->solidrect) + if(size() > 1) { - Sync(); + ASSERT(!data->solidrect); // should call Draw instead, but it will work anyway - front()->Draw(data); - - return; + data->solidrect = false; } + GSVector4i bbox = data->bbox.rintersect(data->scissor); + for(int i = 0; i < size(); i++) { - (*this)[i]->Queue(data); + GSRasterizer* r = (*this)[i]; + + if(r->IsOneOfMyScanlines(bbox.top, bbox.bottom)) + { + r->Queue(data); + + m_dispatched++; + } } m_count++; @@ -888,6 +901,9 @@ void GSRasterizerList::Sync() m_sync_count++; + //printf("%d %d%%\n", m_count, 100 * m_dispatched / (m_count * size())); + m_count = 0; + m_dispatched = 0; } } diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 4cc145790d..d060894d0f 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -127,8 +127,6 @@ protected: void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side); - __forceinline bool IsOneOfMyScanlines(int scanline) const; - __forceinline bool IsOneOfMyScanlines(int top, int bottom) const; __forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan); __forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false); @@ -136,6 +134,9 @@ public: GSRasterizer(IDrawScanline* ds, int id, int threads); virtual ~GSRasterizer(); + __forceinline bool IsOneOfMyScanlines(int scanline) const; + __forceinline bool IsOneOfMyScanlines(int top, int bottom) const; + void Draw(shared_ptr data); // IRasterizer @@ -169,6 +170,7 @@ class GSRasterizerList : public IRasterizer, protected vector { protected: int m_count; + int m_dispatched; GSRasterizerList(); @@ -189,6 +191,12 @@ public: return rl; } + size_t IsMultiThreaded() const {return size();} + + void Draw(shared_ptr data); + + // IRasterizer + void Queue(shared_ptr data); void Sync(); diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index f540344bb7..4d6f0442fa 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -36,6 +36,9 @@ GSRendererSW::GSRendererSW(int threads) m_rl = GSRasterizerList::Create(threads); m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); + + memset(m_tex_pages, 0, sizeof(m_tex_pages)); + memset(m_fzb_pages, 0, sizeof(m_fzb_pages)); } GSRendererSW::~GSRendererSW() @@ -68,7 +71,7 @@ void GSRendererSW::VSync(int field) Sync(); // IncAge might delete a cached texture in use - // printf("m_sync_count = %d\n", m_rl->m_sync_count); m_rl->m_sync_count = 0; + //printf("m_sync_count = %d\n", m_rl->m_sync_count); m_rl->m_sync_count = 0; m_tc->IncAge(); @@ -133,8 +136,6 @@ void GSRendererSW::Draw() { if(m_dump) m_dump.Object(m_vertices, m_count, m_vt.m_primclass); - // TODO: palette may be rendered (point-list in a few visual novels) and not ready by the time it needs to be loaded => vm to clut transfer (TEX0.CLD) should wait for the rasterizers to finish, if needed - if(m_fzb != m_context->offset.fzb) { // rasterizers must write the same outputs at the same time, this makes sure each thread has its own private surface area @@ -157,7 +158,7 @@ void GSRendererSW::Draw() data->scissor = GSVector4i(m_context->scissor.in); data->scissor.z = std::min(data->scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour - data->bbox = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)); + data->bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil())); data->primclass = m_vt.m_primclass; data->vertices = (GSVertexSW*)_aligned_malloc(sizeof(GSVertexSW) * m_count, 16); // TODO: detach m_vertices and reallocate later? memcpy(data->vertices, m_vertices, sizeof(GSVertexSW) * m_count); // TODO: m_vt.Update fetches all the vertices already, could also store them here @@ -165,15 +166,6 @@ void GSRendererSW::Draw() data->solidrect = gd->sel.IsSolidRect(); data->frame = m_perfmon.GetFrame(); - if(s_dump) - { - if(data->solidrect) Sync(); - - ((GSRasterizerData2*)data.get())->DumpInput(); - } - - m_rl->Queue(data); - GSVector4i r = data->bbox.rintersect(data->scissor); if(gd->sel.fwrite) @@ -186,7 +178,83 @@ void GSRendererSW::Draw() m_tc->InvalidateVideoMem(m_context->offset.zb, r); } - // Sync(); + if(!m_rl->IsMultiThreaded() || data->solidrect || s_dump) + { + if(s_dump) + { + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven && PRIM->TME) + { + s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + + m_mem.SaveBMP(s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + } + + s_n++; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + + m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rz0_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); + + m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + } + + s_n++; + } + + m_rl->Draw(data); + + Sync(); + + if(s_dump) + { + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + + m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + } + + if(s_savez && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_rz1_%05x_%d.bmp", s_n, frame, m_context->ZBUF.Block(), m_context->ZBUF.PSM); + + m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512); + } + + s_n++; + } + } + else + { + m_rl->Queue(data); + + if(gd->sel.fwrite) + { + InvalidatePages(m_context->offset.fb, r); + } + + if(gd->sel.zwrite) + { + InvalidatePages(m_context->offset.zb, r); + } + + // Sync(); + } // TODO: m_perfmon.Put(GSPerfMon::Prim, stats.prims); // TODO: m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); @@ -205,21 +273,108 @@ void GSRendererSW::Draw() void GSRendererSW::Sync() { + //printf("sync\n"); + m_rl->Sync(); - m_tc->ResetInvalidPages(); + memset(m_tex_pages, 0, sizeof(m_tex_pages)); + memset(m_fzb_pages, 0, sizeof(m_fzb_pages)); } void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) { - m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r); + //printf("ivm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); - Sync(); // TODO: not needed if nothing uses the affected pages (this is the most frequently called Sync! get rid of it) + GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); + + m_tc->InvalidateVideoMem(o, r); + + if(CheckPages(o, r)) // check if the changing pages either used as a texture or a target + { + Sync(); + } } void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) { - Sync(); // TODO: not needed if nothing uses the affected pages + //printf("ilm %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); + + GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); + + if(CheckPages(o, r)) // TODO: only checking m_fzb_pages would be enough (read-backs are rare anyway) + { + Sync(); + } +} + +void GSRendererSW::InvalidatePages(const GSTextureCacheSW::Texture* t) +{ + //printf("tex %05x %d %d\n", t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM); + + for(size_t i = 0; i < countof(t->m_pages); i++) + { + if(m_fzb_pages[i] & t->m_pages[i]) // currently begin drawn to? => sync + { + Sync(); + + return; + } + + m_tex_pages[i] |= t->m_pages[i]; // remember which texture pages are used + } +} + +void GSRendererSW::InvalidatePages(const GSOffset* o, const GSVector4i& rect) +{ + //printf("fzb %05x %d %d\n", o->bp, o->bw, o->psm); + + GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs; + + GSVector4i r = rect.ralign(bs); + + for(int y = r.top; y < r.bottom; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = r.left; x < r.right; x += bs.x) + { + uint32 page = (base + o->block.col[x >> 3]) >> 5; + + if(page < MAX_PAGES) + { + m_fzb_pages[page >> 5] |= 1 << (page & 31); + } + } + } +} + +bool GSRendererSW::CheckPages(const GSOffset* o, const GSVector4i& rect) +{ + GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[o->psm].pgs : GSLocalMemory::m_psm[o->psm].bs; + + GSVector4i r = rect.ralign(bs); + + for(int y = r.top; y < r.bottom; y += bs.y) + { + uint32 base = o->block.row[y >> 3]; + + for(int x = r.left; x < r.right; x += bs.x) + { + uint32 page = (base + o->block.col[x >> 3]) >> 5; + + if(page < MAX_PAGES) + { + uint32 mask = 1 << (page & 31); + + if((m_tex_pages[page >> 5] | m_fzb_pages[page >> 5]) & mask) + { + return true; + } + } + } + } + + return false; } #include "GSTextureSW.h" @@ -335,7 +490,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) if(t == NULL) {ASSERT(0); return false;} - if(!m_tc->CanUpdate(t)) Sync(); + InvalidatePages(t); GSVector4i r; @@ -488,7 +643,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) if(t == NULL) {ASSERT(0); return false;} - if(!m_tc->CanUpdate(t)) Sync(); + InvalidatePages(t); GSVector4i r; diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index f956708238..5922a10387 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -29,13 +29,6 @@ class GSRendererSW : public GSRendererT { class GSRasterizerData2 : public GSRasterizerData { - GSRenderer* renderer; - GIFRegFRAME FRAME; - GIFRegZBUF ZBUF; - GIFRegTEX0 TEX0; - uint32 TME; - GSVector2i framesize; - public: GSRasterizerData2(GSRenderer* r) { @@ -45,13 +38,6 @@ class GSRendererSW : public GSRendererT gd->dimx = NULL; param = gd; - - renderer = r; - FRAME = r->m_context->FRAME; - ZBUF = r->m_context->ZBUF; - TEX0 = r->m_context->TEX0; - TME = r->PRIM->TME; - framesize = GSVector2i(r->GetFrameRect().width(), 512); } virtual ~GSRasterizerData2() @@ -62,73 +48,6 @@ class GSRendererSW : public GSRendererT if(gd->dimx) _aligned_free(gd->dimx); _aligned_free(gd); - - DumpOutput(); - } - - // FIXME: not really possible to save whole input/output anymore, strips of the picture may lag in multi-threaded mode - - void DumpInput() - { - if(!renderer->s_dump) return; // || !(m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)) - - GSAutoLock l(&renderer->s_lock); - - uint64 frame = renderer->m_perfmon.GetFrame(); - - string s; - - if(renderer->s_save && renderer->s_n >= renderer->s_saven && TME) - { - s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d.bmp", renderer->s_n, frame, (int)TEX0.TBP0, (int)TEX0.PSM); - - renderer->m_mem.SaveBMP(s, TEX0.TBP0, TEX0.TBW, TEX0.PSM, 1 << TEX0.TW, 1 << TEX0.TH); - } - - renderer->s_n++; - - if(renderer->s_save && renderer->s_n >= renderer->s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt0_%05x_%d.bmp", renderer->s_n, frame, FRAME.Block(), FRAME.PSM); - - renderer->m_mem.SaveBMP(s, FRAME.Block(), FRAME.FBW, FRAME.PSM, framesize.x, framesize.y); - } - - if(renderer->s_savez && renderer->s_n >= renderer->s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz0_%05x_%d.bmp", renderer->s_n, frame, ZBUF.Block(), ZBUF.PSM); - - renderer->m_mem.SaveBMP(s, ZBUF.Block(), FRAME.FBW, ZBUF.PSM, framesize.x, framesize.y); - } - - renderer->s_n++; - } - - void DumpOutput() - { - if(!renderer->s_dump) return; // || !(m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0) - - GSAutoLock l(&renderer->s_lock); - - uint64 frame = renderer->m_perfmon.GetFrame(); - - string s; - - if(renderer->s_save && renderer->s_n >= renderer->s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", renderer->s_n, frame, FRAME.Block(), FRAME.PSM); - - renderer->m_mem.SaveBMP(s, FRAME.Block(), FRAME.FBW, FRAME.PSM, framesize.x, framesize.y); - } - - if(renderer->s_savez && renderer->s_n >= renderer->s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_rz1_%05x_%d.bmp", renderer->s_n, frame, ZBUF.Block(), ZBUF.PSM); - - renderer->m_mem.SaveBMP(s, ZBUF.Block(), FRAME.FBW, ZBUF.PSM, framesize.x, framesize.y); - } - - renderer->s_n++; } }; @@ -139,6 +58,8 @@ protected: uint8* m_output; bool m_reset; GSPixelOffset4* m_fzb; + uint32 m_fzb_pages[16]; + uint32 m_tex_pages[16]; void Reset(); void VSync(int field); @@ -150,6 +71,10 @@ protected: void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); + void InvalidatePages(const GSOffset* o, const GSVector4i& rect); + void InvalidatePages(const GSTextureCacheSW::Texture* t); + bool CheckPages(const GSOffset* o, const GSVector4i& rect); + bool GetScanlineGlobalData(GSScanlineGlobalData& gd); public: diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 56bed5c0ab..31380639f8 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -613,6 +613,19 @@ void GSState::ApplyTEX0(int i, GIFRegTEX0& TEX0) if(wt) { + GIFRegBITBLTBUF BITBLTBUF; + + BITBLTBUF.SBP = TEX0.CBP; + BITBLTBUF.SBW = 1; + BITBLTBUF.SPSM = TEX0.CSM; + + GSVector4i r = GSVector4i::zero(); + + r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x; + r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y; + + InvalidateLocalMem(BITBLTBUF, r); + m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT); } } diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index b83452a709..82423017d8 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -25,7 +25,6 @@ GSTextureCacheSW::GSTextureCacheSW(GSState* state) : m_state(state) { - memset(m_invalid, 0, sizeof(m_invalid)); } GSTextureCacheSW::~GSTextureCacheSW() @@ -102,11 +101,9 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect) { - uint32 bp = o->bp; - uint32 bw = o->bw; uint32 psm = o->psm; - GSVector2i bs = (bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; + GSVector2i bs = (o->bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs; GSVector4i r = rect.ralign(bs); @@ -120,8 +117,6 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r if(page < MAX_PAGES) { - m_invalid[page >> 5] |= 1 << (page & 31); // remember which pages might be invalid for future texture updates - const list& map = m_map[page]; for(list::const_iterator i = map.begin(); i != map.end(); i++) @@ -198,24 +193,6 @@ void GSTextureCacheSW::IncAge() } } -bool GSTextureCacheSW::CanUpdate(Texture* t) -{ - for(size_t i = 0; i < countof(m_invalid); i++) - { - if(m_invalid[i] & t->m_pages[i]) - { - return false; - } - } - - return true; -} - -void GSTextureCacheSW::ResetInvalidPages() -{ - memset(m_invalid, 0, sizeof(m_invalid)); -} - // GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index 19aea0558e..d26d521bba 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -57,7 +57,6 @@ protected: GSState* m_state; hash_set m_textures; list m_map[MAX_PAGES]; - uint32 m_invalid[16]; public: GSTextureCacheSW(GSState* state); @@ -70,7 +69,4 @@ public: void RemoveAll(); void RemoveAt(Texture* t); void IncAge(); - - bool CanUpdate(Texture* t); - void ResetInvalidPages(); };