From 9b8c753eadfd916baa1df2ceb284b47e1d75d0ac Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 21 Jan 2012 04:44:04 +0000 Subject: [PATCH] GSdx: broken frame skipping should be fixed, and a few random sw renderer optimizations. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5077 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 2 +- plugins/GSdx/GSRasterizer.cpp | 5 - plugins/GSdx/GSRasterizer.h | 2 - plugins/GSdx/GSRendererSW.cpp | 361 +++++++++++++++++------------- plugins/GSdx/GSRendererSW.h | 17 +- plugins/GSdx/GSState.cpp | 75 ++----- plugins/GSdx/GSState.h | 2 +- plugins/GSdx/GSTextureCacheSW.cpp | 16 +- plugins/GSdx/GSVector.h | 5 + 9 files changed, 258 insertions(+), 227 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 443d0c2232..a39a5dcd83 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -1111,9 +1111,9 @@ __aligned(struct, 32) GIFPath nreg = tag.NREG ? tag.NREG : 16; regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg); nloop = tag.NLOOP; - type = TYPE_UNKNOWN; if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2; else if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1) type = TYPE_ADONLY; + else type = TYPE_UNKNOWN; } __forceinline uint8 GetReg() diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index 2c05114e52..5b1af85f85 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -932,11 +932,6 @@ GSRasterizerList::~GSRasterizerList() void GSRasterizerList::Queue(shared_ptr data) { - if(data->syncpoint) - { - Sync(); - } - GSVector4i r = data->bbox.rintersect(data->scissor); ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048); diff --git a/plugins/GSdx/GSRasterizer.h b/plugins/GSdx/GSRasterizer.h index 84bd01f31e..2dc611e7a8 100644 --- a/plugins/GSdx/GSRasterizer.h +++ b/plugins/GSdx/GSRasterizer.h @@ -39,7 +39,6 @@ public: int vertex_count; uint32* index; int index_count; - bool syncpoint; uint64 frame; GSRasterizerData() @@ -51,7 +50,6 @@ public: , vertex_count(0) , index(NULL) , index_count(0) - , syncpoint(false) , frame(0) { } diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index dab27400bd..e90bdaaef4 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -43,6 +43,17 @@ GSRendererSW::GSRendererSW(int threads) memset(m_fzb_pages, 0, sizeof(m_fzb_pages)); memset(m_tex_pages, 0, sizeof(m_tex_pages)); + + #define InitCVB(P) \ + m_cvb[P][0][0] = &GSRendererSW::ConvertVertexBuffer; \ + m_cvb[P][0][1] = &GSRendererSW::ConvertVertexBuffer; \ + m_cvb[P][1][0] = &GSRendererSW::ConvertVertexBuffer; \ + m_cvb[P][1][1] = &GSRendererSW::ConvertVertexBuffer; \ + + InitCVB(GS_POINT_CLASS); + InitCVB(GS_LINE_CLASS); + InitCVB(GS_TRIANGLE_CLASS); + InitCVB(GS_SPRITE_CLASS); } GSRendererSW::~GSRendererSW() @@ -72,7 +83,7 @@ void GSRendererSW::VSync(int field) { Sync(0); // IncAge might delete a cached texture in use - if(LOG) + if(0) if(LOG) { fprintf(s_fp, "%lld\n", m_perfmon.GetFrame()); @@ -230,6 +241,74 @@ GSTexture* GSRendererSW::GetOutput(int i) return m_texture[i]; } +template +void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) +{ + GSVector4i o = (GSVector4i)m_context->XYOFFSET; + GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); + + for(size_t i = 0; i < m_vertex.next; i++, src++, dst++) + { + GSVector4 stcq = GSVector4::load(&src->m[0]); // s t rgba q + + #if _M_SSE >= 0x401 + + GSVector4i xyzuvf(src->m[1]); + + GSVector4i xy = xyzuvf.upl16() - o; + GSVector4i zf = xyzuvf.ywww().min_u32(GSVector4i::xffffff00()); + + #else + + uint32 z = src->XYZ.Z; + + GSVector4i xy = GSVector4i::load((int)src->XYZ.u32[0]).upl16() - o; + GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), src->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later + + #endif + + dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; + dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); + + GSVector4 t; + + if(tme) + { + if(fst) + { + #if _M_SSE >= 0x401 + + t = GSVector4(xyzuvf.uph16() << (16 - 4)); + + #else + + t = GSVector4(GSVector4i::load(src->UV).upl16() << (16 - 4)); + + #endif + } + else + { + t = stcq.xyww() * tsize; + } + } + + if(primclass == GS_SPRITE_CLASS) + { + #if _M_SSE >= 0x401 + + t = t.insert<1, 3>(GSVector4::cast(xyzuvf)); + + #else + + t = t.insert<0, 3>(GSVector4::cast(GSVector4i::load(z))); + + #endif + } + + dst->t = t; + } +} + void GSRendererSW::Draw() { SharedData* sd = new SharedData(this); @@ -243,62 +322,10 @@ void GSRendererSW::Draw() sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next); sd->index_count = m_index.tail; - { - // TODO: template, JIT - - GSVertex* RESTRICT s = m_vertex.buff; - GSVertexSW* RESTRICT d = sd->vertex; - - GSVector4i o = (GSVector4i)m_context->XYOFFSET; - GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH, 1, 0); - - for(size_t i = 0; i < m_vertex.next; i++, s++, d++) - { - // TODO: load xyzuvf in one piece - - uint32 z = s->XYZ.Z; - - GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o; - GSVector4i zf = GSVector4i((int)std::min(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later - - GSVector4 p, t, c; - - p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; - - GSVector4 stcq = GSVector4::load(&s->m[0]); // s t rgba q - - if(PRIM->TME) - { - if(PRIM->FST) - { - t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4)); - } - else - { - t = stcq.xyww() * tsize; - } - } - - c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); - - d->p = p; - d->c = c; - d->t = t; - - if(sd->primclass == GS_SPRITE_CLASS) - { - d->t.u32[3] = z; // TODO: store this to the 4th unused GSVector4? - } - } - } + (this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST])(sd->vertex, m_vertex.buff, m_vertex.next); memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail); - // TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs - // TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true; - // TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers - // TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too - if(!GetScanlineGlobalData(sd)) return; // @@ -316,33 +343,7 @@ void GSRendererSW::Draw() sd->bbox = bbox; sd->frame = m_perfmon.GetFrame(); - // - - uint32* fb_pages = NULL; - uint32* zb_pages = NULL; - - GSVector4i r = bbox.rintersect(scissor); - - if(gd.sel.fwrite) - { - fb_pages = context->offset.fb->GetPages(r); - - m_tc->InvalidatePages(fb_pages, context->offset.fb->psm); - } - - if(gd.sel.zwrite) - { - zb_pages = context->offset.zb->GetPages(r); - - m_tc->InvalidatePages(zb_pages, context->offset.zb->psm); - } - - if(CheckTargetPages(fb_pages, zb_pages, r)) - { - sd->syncpoint = true; - } - - sd->UseTargetPages(fb_pages, zb_pages); + CheckDependencies(sd); if(LOG) {fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n", m_context->FRAME.Block(), m_context->FRAME.PSM, @@ -505,19 +506,6 @@ void GSRendererSW::UsePages(const uint32* pages, int type) } else { - if(!m_rl->IsSynced()) - { - for(const uint32* p = pages; *p != GSOffset::EOP; p++) - { - if(m_fzb_pages[*p]) // currently being drawn to? => sync - { - Sync(7); - - break; - } - } - } - for(const uint32* p = pages; *p != GSOffset::EOP; p++) { ASSERT(m_tex_pages[*p] < SHRT_MAX); @@ -549,6 +537,71 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type) } } +void GSRendererSW::CheckDependencies(SharedData* sd) +{ + GSVector4i r = sd->bbox.rintersect(sd->scissor); + + uint32* fb_pages = NULL; + uint32* zb_pages = NULL; + + if(sd->global.sel.fwrite) + { + fb_pages = m_context->offset.fb->GetPages(r); + } + + if(sd->global.sel.zwrite) + { + zb_pages = m_context->offset.zb->GetPages(r); + } + + // check if there is an overlap between this and previous targets + + bool target_syncpoint = false; + + if(CheckTargetPages(fb_pages, zb_pages, r)) + { + target_syncpoint = true; + } + + // check if the texture is not part of a target currently in use + + bool source_syncpoint = false; + + if(CheckSourcePages(sd)) + { + source_syncpoint = true; + target_syncpoint = false; + } + + // addref target pages + + sd->UseTargetPages(fb_pages, zb_pages); + + // addref texture pages and update previously invalidated parts + + if(source_syncpoint) + { + Sync(7); + } + + sd->UseSourcePages(); + + if(sd->global.sel.fwrite) + { + m_tc->InvalidatePages(fb_pages, m_context->offset.fb->psm); + } + + if(sd->global.sel.zwrite) + { + m_tc->InvalidatePages(zb_pages, m_context->offset.zb->psm); + } + + if(target_syncpoint) + { + Sync(8); + } +} + bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r) { bool synced = m_rl->IsSynced(); @@ -699,6 +752,31 @@ bool GSRendererSW::CheckTargetPages(const uint32* fb_pages, const uint32* zb_pag return false; } +bool GSRendererSW::CheckSourcePages(SharedData* sd) +{ + if(!m_rl->IsSynced()) + { + for(size_t i = 0; sd->m_tex[i].t != NULL; i++) + { + sd->m_tex[i].t->m_offset->GetPages(sd->m_tex[i].r, m_tmp_pages); + + uint32* pages = m_tmp_pages; // sd->m_tex[i].t->m_pages.n; + + for(const uint32* p = pages; *p != GSOffset::EOP; p++) + { + // TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2) + + if(m_fzb_pages[*p]) // currently being drawn to? => sync + { + return true; + } + } + } + } + + return false; +} + #include "GSTextureSW.h" bool GSRendererSW::GetScanlineGlobalData(SharedData* data) @@ -819,29 +897,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) if(t == NULL) {ASSERT(0); return false;} - data->UseSourcePages(t, 0); - GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf); - if(!t->Update(r)) {ASSERT(0); return false;} + data->SetSource(t, r, 0); - if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0) - { - uint64 frame = m_perfmon.GetFrame(); - - string s; - - if(s_save && s_n >= s_saven) - { - s = format("c:\\temp1\\_%05d_f%lld_tex32_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); - - t->Save(s); - } - } - - gd.tex[0] = t->m_buff; gd.sel.tw = t->m_tw - 3; if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0) @@ -972,38 +1033,11 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data) if(t == NULL) {ASSERT(0); return false;} - data->UseSourcePages(t, i); - GSVector4i r; GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf); - if(!t->Update(r)) {ASSERT(0); return false;} - - gd.tex[i] = t->m_buff; - - if(0) - //if(context->TEX0.TH > context->TEX0.TW) - //if(s_n >= s_saven && s_n < s_saven + 3) - //if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4) - { - t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i)); - /* - GIFRegTEX0 TEX0 = MIP_TEX0; - TEX0.TBP0 = context->TEX0.TBP0; - do - { - TEX0.TBP0++; - const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3); - if(t == NULL) {ASSERT(0); return false;} - t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i)); - } - while(TEX0.TBP0 < 0x3fff); - */ - - int i = 0; - } - + data->SetSource(t, r, i); } s_counter++; @@ -1232,7 +1266,7 @@ GSRendererSW::SharedData::SharedData(GSRendererSW* parent) , m_zb_pages(NULL) , m_using_pages(false) { - m_tex_pages[0] = NULL; + m_tex[0].t = NULL; global.sel.key = 0; @@ -1258,9 +1292,9 @@ GSRendererSW::SharedData::~SharedData() delete [] m_fb_pages; delete [] m_zb_pages; - for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++) + for(size_t i = 0; m_tex[i].t != NULL; i++) { - m_parent->ReleasePages(m_tex_pages[i], 2); + m_parent->ReleasePages(m_tex[i].t->m_pages.n, 2); } if(global.clut) _aligned_free(global.clut); @@ -1287,14 +1321,41 @@ void GSRendererSW::SharedData::UseTargetPages(const uint32* fb_pages, const uint m_using_pages = true; } -void GSRendererSW::SharedData::UseSourcePages(GSTextureCacheSW::Texture* t, int level) +void GSRendererSW::SharedData::SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level) { - ASSERT(m_tex_pages[level] == NULL); + ASSERT(m_tex[level].t == NULL); - const uint32* pages = t->m_pages.n; + m_tex[level].t = t; + m_tex[level].r = r; - m_tex_pages[level] = pages; - m_tex_pages[level + 1] = NULL; - - m_parent->UsePages(pages, 2); + m_tex[level + 1].t = NULL; +} + +void GSRendererSW::SharedData::UseSourcePages() +{ + for(size_t i = 0; m_tex[i].t != NULL; i++) + { + m_parent->UsePages(m_tex[i].t->m_pages.n, 2); + + m_tex[i].t->Update(m_tex[i].r); // TODO: check return value, false (out-of-memory) then disable texturing + + global.tex[i] = m_tex[i].t->m_buff; + + // TODO + /* + if(s_dump) + { + uint64 frame = m_perfmon.GetFrame(); + + string s; + + if(s_save && s_n >= s_saven) + { + s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", s_n, frame, i, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + + sd->m_tex[i].t->Save(s); + } + } + */ + } } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 96c3f25d5a..6d21ae29ff 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -29,20 +29,30 @@ class GSRendererSW : public GSRenderer { class SharedData : public GSDrawScanline::SharedData { + public: GSRendererSW* m_parent; const uint32* m_fb_pages; const uint32* m_zb_pages; - const uint32* m_tex_pages[7 + 1]; // NULL terminated bool m_using_pages; + __aligned(struct, 16) {GSVector4i r; GSTextureCacheSW::Texture* t;} m_tex[7 + 1]; // NULL terminated public: SharedData(GSRendererSW* parent); virtual ~SharedData(); void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages); - void UseSourcePages(GSTextureCacheSW::Texture* t, int level); + + void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level); + void UseSourcePages(); }; + typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); + + ConvertVertexBufferPtr m_cvb[4][2][2]; + + template + void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); + protected: IRasterizer* m_rl; GSTextureCacheSW* m_tc; @@ -67,7 +77,10 @@ protected: void UsePages(const uint32* pages, int type); void ReleasePages(const uint32* pages, int type); + + void CheckDependencies(SharedData* sd); bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r); + bool CheckSourcePages(SharedData* sd); bool GetScanlineGlobalData(SharedData* data); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 7db0cbc3d4..36797a113b 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -40,10 +40,11 @@ GSState::GSState() m_nativeres = !!theApp.GetConfig("nativeres", 0); memset(&m_v, 0, sizeof(m_v)); - m_q = 1.0f; memset(&m_vertex, 0, sizeof(m_vertex)); memset(&m_index, 0, sizeof(m_index)); + m_v.RGBAQ.Q = 1.0f; + GrowVertexBuffer(); m_sssize = 0; @@ -94,7 +95,7 @@ GSState::GSState() m_sssize += sizeof(m_tr.y); m_sssize += m_mem.m_vmsize; m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * countof(m_path); - m_sssize += sizeof(m_q); + m_sssize += sizeof(float); // obsolite PRIM = &m_env.PRIM; // CSR->rREV = 0x20; @@ -156,44 +157,18 @@ void GSState::SetFrameSkip(int skip) { m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP; m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP; m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP; m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP; m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP; m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP; m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP; m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP; + + m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP; } else { - m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2; - m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2; - m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>; - m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>; - m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG; - - m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; - m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; - m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2; - m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT; - m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE; - UpdateVertexKick(); } } @@ -442,22 +417,13 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r) m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v); - #elif _M_SSE >= 0x200 + #else GSVector4i v = GSVector4i::load(r) & GSVector4i::x000000ff(); m_v.RGBAQ.u32[0] = v.rgba32(); - #else - - m_v.RGBAQ.R = r->RGBA.R; - m_v.RGBAQ.G = r->RGBA.G; - m_v.RGBAQ.B = r->RGBA.B; - m_v.RGBAQ.A = r->RGBA.A; - #endif - - m_v.RGBAQ.Q = m_q; } void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r) @@ -466,19 +432,14 @@ void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r) m_v.ST.u64 = r->u64[0]; - #elif _M_SSE >= 0x200 + #else GSVector4i v = GSVector4i::loadl(r); GSVector4i::storel(&m_v.ST.u64, v); - #else - - m_v.ST.S = r->STQ.S; - m_v.ST.T = r->STQ.T; - #endif - m_q = r->STQ.Q; + m_v.RGBAQ.Q = r->STQ.Q; #ifdef Offset_ST GIFRegTEX0 TEX0 = m_context->TEX0; @@ -562,21 +523,23 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, ui GSVector4i q = GSVector4i::loadl(&r[0].u64[1]); GSVector4i rgba = (GSVector4i::load(&r[1]) & GSVector4i::x000000ff()).ps32().pu16(); - m_v.m[0] = st.upl64(rgba.upl32(q)); + m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]); GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]); xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV)); zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff()); - m_v.m[1] = xy.upl32(zf); + m_v.m[1] = xy.upl32(zf); // TODO: only store the last one VertexKick(r[2].XYZF2.Skip()); r += 3; } +} - m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time +void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size) +{ } // GIFRegHandler* @@ -719,8 +682,10 @@ template void GSState::ApplyTEX0(GIFRegTEX0& TEX0) BITBLTBUF.SBW = 1; BITBLTBUF.SPSM = TEX0.CSM; - GSVector4i r = GSVector4i::zero(); + GSVector4i r; + r.left = 0; + r.top = 0; r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x; r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y; @@ -1644,7 +1609,7 @@ void GSState::SoftReset(uint32 mask) m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value - m_q = 1; + m_v.RGBAQ.Q = 1.0f; } void GSState::ReadFIFO(uint8* mem, int size) @@ -1687,7 +1652,7 @@ template void GSState::Transfer(const uint8* mem, uint32 size) if(path.nloop > 0) // eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded." { - m_q = 1.0f; + m_v.RGBAQ.Q = 1.0f; // ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts @@ -1980,7 +1945,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly) WriteState(data, &m_path[i].reg); } - WriteState(data, &m_q); + data += sizeof(float); // obsolite return 0; } @@ -2076,7 +2041,7 @@ int GSState::Defrost(const GSFreezeData* fd) m_path[i].SetTag(&m_path[i].tag); // expand regs } - ReadState(&m_q, data); + data += sizeof(float); // obsolite PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM; diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 3cc03c3a0c..ad75f6a18e 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -65,6 +65,7 @@ class GSState : public GSAlignedClass<32> GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8]; template void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size); + void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size); template void ApplyTEX0(GIFRegTEX0& TEX0); void ApplyPRIM(const GIFRegPRIM& PRIM); @@ -137,7 +138,6 @@ protected: bool IsBadFrame(int& skip, int UserHacks_SkipDraw); GSVertex m_v; - float m_q; GSVector4 m_scissor; uint32 m_ofxy; diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index b0d6d83fbf..37e2720067 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -178,6 +178,11 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& m_TEX0 = TEX0; m_TEXA = TEXA; + if(m_tw == 0) + { + m_tw = std::max(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff + } + memset(m_valid, 0, sizeof(m_valid)); memset(m_pages.bm, 0, sizeof(m_pages.bm)); @@ -239,17 +244,6 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect) if(m_buff == NULL) { - uint32 tw0 = std::max(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff - - if(m_tw == 0) - { - m_tw = tw0; - } - else - { - ASSERT(m_tw >= tw0); - } - uint32 pitch = (1 << m_tw) << shift; m_buff = _aligned_malloc(pitch * th * 4, 32); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 5646dfbd6f..de20807352 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -2915,6 +2915,11 @@ public: return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p)); } + __forceinline static void storent(void* p, const GSVector4& v) + { + _mm_stream_ps((float*)p, v.m); + } + __forceinline static void storel(void* p, const GSVector4& v) { _mm_store_sd((double*)p, _mm_castps_pd(v.m));