From 09ee76d9eddd4c3c09e7cf62bd1a19e5bb33647b Mon Sep 17 00:00:00 2001 From: gabest11 Date: Wed, 20 May 2009 15:35:31 +0000 Subject: [PATCH] GSdx: optimized local-local transfer a bit, it boosts fps in games like ffxii or ico, which do a lot of moves in local memory. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1226 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 5 +- plugins/GSdx/GSClut.cpp | 14 ---- plugins/GSdx/GSDirtyRect.cpp | 7 +- plugins/GSdx/GSLocalMemory.cpp | 7 +- plugins/GSdx/GSRenderer.h | 38 +++------ plugins/GSdx/GSRendererHW.h | 8 +- plugins/GSdx/GSRendererSW.h | 2 +- plugins/GSdx/GSState.cpp | 133 ++++++++++++++++++++++-------- plugins/GSdx/GSState.h | 2 +- plugins/GSdx/GSTextureCache.h | 2 +- plugins/GSdx/GSTextureCache10.cpp | 33 +++----- plugins/GSdx/GSTextureCache9.cpp | 32 +++---- plugins/GSdx/GSTextureCacheSW.cpp | 23 ++---- plugins/GSdx/GSTextureCacheSW.h | 4 +- plugins/GSdx/GSVector.h | 36 +++++++- plugins/GSdx/GSdx_vs2008.vcproj | 8 ++ 16 files changed, 194 insertions(+), 160 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 39a799a0f1..e62751a8eb 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -221,6 +221,8 @@ union name \ uint64 u64; \ uint32 u32[2]; \ void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \ + bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \ + bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \ operator GSVector4i() const {return GSVector4i::loadl(this);} \ struct { \ @@ -864,7 +866,8 @@ REG64_(GIFReg, TRXPOS) uint32 DSAX:11; uint32 _PAD3:5; uint32 DSAY:11; - uint32 DIR:2; + uint32 DIRY:1; + uint32 DIRX:1; uint32 _PAD4:3; REG_END diff --git a/plugins/GSdx/GSClut.cpp b/plugins/GSdx/GSClut.cpp index 879764e108..c6e3385fd4 100644 --- a/plugins/GSdx/GSClut.cpp +++ b/plugins/GSdx/GSClut.cpp @@ -598,7 +598,6 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector // TODO -static const GSVector4i s_am(0x00008000); static const GSVector4i s_bm(0x00007c00); static const GSVector4i s_gm(0x000003e0); static const GSVector4i s_rm(0x0000001f); @@ -610,7 +609,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c const GSVector4i rm = s_rm; const GSVector4i gm = s_gm; const GSVector4i bm = s_bm; - // const GSVector4i am = s_am; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i TA1(TEXA.TA1 << 24); @@ -625,12 +623,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c for(int i = 0, j = w >> 3; i < j; i++) { c = s[i]; - /* - cl = c.upl16(); - ch = c.uph16(); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am); - */ cl = c.upl16(c); ch = c.uph16(c); d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)); @@ -642,12 +634,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c for(int i = 0, j = w >> 3; i < j; i++) { c = s[i]; - /* - cl = c.upl16(); - ch = c.uph16(); - d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am).andnot(cl == GSVector4i::zero()); - d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am).andnot(ch == GSVector4i::zero()); - */ cl = c.upl16(c); ch = c.uph16(c); d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero()); diff --git a/plugins/GSdx/GSDirtyRect.cpp b/plugins/GSdx/GSDirtyRect.cpp index 9815a4a28c..4276d0d47e 100644 --- a/plugins/GSdx/GSDirtyRect.cpp +++ b/plugins/GSdx/GSDirtyRect.cpp @@ -48,16 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0) GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs; r.left = MulDiv(left, dst.x, src.x); - r.right = MulDiv(right, dst.x, src.x); r.top = MulDiv(top, dst.y, src.y); + r.right = MulDiv(right, dst.x, src.x); r.bottom = MulDiv(bottom, dst.y, src.y); } else { - r.left = left & ~(src.x - 1); - r.right = (right + (src.x - 1) /* + 1 */) & ~(src.x - 1); - r.top = top & ~(src.y - 1); - r.bottom = (bottom + (src.y - 1) /* + 1 */) & ~(src.y - 1); + r = GSVector4i(left, top, right, bottom).ralign(src); } return r; diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index b9c499518d..b5f24fbb56 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -593,13 +593,8 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, uint32 c, uint32 psm, uint32 b case 4: c = (c & 0xf) * 0x11111111; break; } - GSVector4i clip; + GSVector4i clip = r.ralign(tbl.bs); - clip.left = (r.left + (w - 1)) & ~(w - 1); - clip.top = (r.top + (h - 1)) & ~(h - 1); - clip.right = r.right & ~(w - 1); - clip.bottom = r.bottom & ~(h - 1); - for(int y = r.top; y < clip.top; y++) { for(int x = r.left; x < r.right; x++) diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index a5fc7f3577..8722cf12cd 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -249,7 +249,7 @@ protected: if(dr[i].height() > 512) // hmm { - int y = GetDeviceRect(i).height(); + int y = GetDeviceSize(i).y; if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2; r.bottom = r.top + y; } @@ -258,35 +258,26 @@ protected: if(m_blur && blurdetected && i == 1) { - src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth(); - src[i].y = (tex[i].m_scale.y * r.top + 1) / tex[i].GetHeight(); - src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth(); - src[i].w = (tex[i].m_scale.y * r.bottom + 1) / tex[i].GetHeight(); - } - else - { - src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth(); - src[i].y = tex[i].m_scale.y * r.top / tex[i].GetHeight(); - src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth(); - src[i].w = tex[i].m_scale.y * r.bottom / tex[i].GetHeight(); + r += GSVector4i(0, 1).xyxy(); } - GSVector2 o; + GSVector4 scale = GSVector4(tex[i].m_scale).xyxy(); - o.x = 0; - o.y = 0; + src[i] = GSVector4(r) * scale / GSVector4(tex[i].GetSize()).xyxy(); + + GSVector2 o(0, 0); if(dr[i].top - baseline >= 4) // 2? { o.y = tex[i].m_scale.y * (dr[i].top - baseline); } - if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) o.y /= 2; + if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) + { + o.y /= 2; + } - dst[i].x = o.x; - dst[i].y = o.y; - dst[i].z = o.x + tex[i].m_scale.x * r.width(); - dst[i].w = o.y + tex[i].m_scale.y * r.height(); + dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize()); fs.x = max(fs.x, (int)(dst[i].z + 0.5f)); fs.y = max(fs.y, (int)(dst[i].w + 0.5f)); @@ -301,12 +292,7 @@ protected: if(tex[0] || tex[1]) { - GSVector4 c; - - c.r = (float)m_regs->BGCOLOR.R / 255; - c.g = (float)m_regs->BGCOLOR.G / 255; - c.b = (float)m_regs->BGCOLOR.B / 255; - c.a = (float)m_regs->PMODE.ALP / 255; + GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255; m_dev.Merge(tex, src, dst, fs, slbg, mmod, c); diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 34df556fbb..609790327e 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -93,7 +93,7 @@ protected: maxv = maxv.maxv(v0); } - mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH, 16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).rcpnr(); + mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).xyxy().rcpnr(); } else { @@ -211,12 +211,8 @@ protected: r = vr + GSVector4i(-1, -1, 1, 1); // one more pixel because of bilinear filtering GSVector2i bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs; - GSVector2i bsm(bs.x - 1, bs.y - 1); - r.left = max(r.left & ~bsm.x, 0); - r.top = max(r.top & ~bsm.y, 0); - r.right = min((r.right + bsm.x) & ~bsm.x, w); - r.bottom = min((r.bottom + bsm.y) & ~bsm.y, h); + r = r.ralign(bs).rintersect(GSVector4i(0, 0, w, h)); } void VSync(int field) diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 8c3d50bc42..aa70b60454 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -449,7 +449,7 @@ protected: MinMaxUV(w, h, r, p.sel.fst); - const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, &r); + const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r); if(!t) {ASSERT(0); return;} diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 20fd4124e1..31300861bf 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -235,7 +235,7 @@ GSVector4i GSState::GetFrameRect(int i) return r; } -GSVector4i GSState::GetDeviceRect(int i) +GSVector2i GSState::GetDeviceSize(int i) { // TODO: other params of SMODE1 should affect the true device display size @@ -257,7 +257,7 @@ GSVector4i GSState::GetDeviceRect(int i) h = (m_regs->SMODE1.CMOD & 1) ? 512 : 448; } - return GSVector4i(0, 0, w, h); + return GSVector2i(w, h); } @@ -279,7 +279,7 @@ bool GSState::IsEnabled(int i) int GSState::GetFPS() { - return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) / (m_regs->SMODE2.INT ? 1 : 2); + return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) >> (1 - m_regs->SMODE2.INT); } // GIFPackedRegHandler* @@ -486,7 +486,7 @@ template void GSState::GIFRegHandlerTEX0(GIFReg* r) bool wt = m_mem.m_clut.WriteTest(r->TEX0, m_env.TEXCLUT); - if(wt || PRIM->CTXT == i && !((GSVector4i)r->TEX0).eq(m_env.CTXT[i].TEX0)) + if(wt || PRIM->CTXT == i && r->TEX0 != m_env.CTXT[i].TEX0) { Flush(); } @@ -511,7 +511,7 @@ template void GSState::GIFRegHandlerTEX0(GIFReg* r) template void GSState::GIFRegHandlerCLAMP(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->CLAMP).eq(m_env.CTXT[i].CLAMP)) + if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP) { Flush(); } @@ -552,7 +552,7 @@ void GSState::GIFRegHandlerNOP(GIFReg* r) template void GSState::GIFRegHandlerTEX1(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->TEX1).eq(m_env.CTXT[i].TEX1)) + if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1) { Flush(); } @@ -587,7 +587,7 @@ template void GSState::GIFRegHandlerXYOFFSET(GIFReg* r) void GSState::GIFRegHandlerPRMODECONT(GIFReg* r) { - if(!((GSVector4i)r->PRMODECONT).eq(m_env.PRMODECONT)) + if(r->PRMODECONT != m_env.PRMODECONT) { Flush(); } @@ -621,7 +621,7 @@ void GSState::GIFRegHandlerPRMODE(GIFReg* r) void GSState::GIFRegHandlerTEXCLUT(GIFReg* r) { - if(!((GSVector4i)r->TEXCLUT).eq(m_env.TEXCLUT)) + if(r->TEXCLUT != m_env.TEXCLUT) { Flush(); } @@ -631,7 +631,7 @@ void GSState::GIFRegHandlerTEXCLUT(GIFReg* r) void GSState::GIFRegHandlerSCANMSK(GIFReg* r) { - if(!((GSVector4i)r->SCANMSK).eq(m_env.SCANMSK)) + if(r->SCANMSK != m_env.SCANMSK) { Flush(); } @@ -641,7 +641,7 @@ void GSState::GIFRegHandlerSCANMSK(GIFReg* r) template void GSState::GIFRegHandlerMIPTBP1(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP1).eq(m_env.CTXT[i].MIPTBP1)) + if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1) { Flush(); } @@ -651,7 +651,7 @@ template void GSState::GIFRegHandlerMIPTBP1(GIFReg* r) template void GSState::GIFRegHandlerMIPTBP2(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP2).eq(m_env.CTXT[i].MIPTBP2)) + if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2) { Flush(); } @@ -661,7 +661,7 @@ template void GSState::GIFRegHandlerMIPTBP2(GIFReg* r) void GSState::GIFRegHandlerTEXA(GIFReg* r) { - if(!((GSVector4i)r->TEXA).eq(m_env.TEXA)) + if(r->TEXA != m_env.TEXA) { Flush(); } @@ -671,7 +671,7 @@ void GSState::GIFRegHandlerTEXA(GIFReg* r) void GSState::GIFRegHandlerFOGCOL(GIFReg* r) { - if(!((GSVector4i)r->FOGCOL).eq(m_env.FOGCOL)) + if(r->FOGCOL != m_env.FOGCOL) { Flush(); } @@ -688,7 +688,7 @@ void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r) template void GSState::GIFRegHandlerSCISSOR(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->SCISSOR).eq(m_env.CTXT[i].SCISSOR)) + if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR) { Flush(); } @@ -705,7 +705,7 @@ template void GSState::GIFRegHandlerALPHA(GIFReg* r) ASSERT(r->ALPHA.C != 3); ASSERT(r->ALPHA.D != 3); - if(PRIM->CTXT == i && !((GSVector4i)r->ALPHA).eq(m_env.CTXT[i].ALPHA)) + if(PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA) { Flush(); } @@ -721,7 +721,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r) { bool update = false; - if(!((GSVector4i)r->DIMX).eq(m_env.DIMX)) + if(r->DIMX != m_env.DIMX) { Flush(); @@ -738,7 +738,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r) void GSState::GIFRegHandlerDTHE(GIFReg* r) { - if(!((GSVector4i)r->DTHE).eq(m_env.DTHE)) + if(r->DTHE != m_env.DTHE) { Flush(); } @@ -748,7 +748,7 @@ void GSState::GIFRegHandlerDTHE(GIFReg* r) void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r) { - if(!((GSVector4i)r->COLCLAMP).eq(m_env.COLCLAMP)) + if(r->COLCLAMP != m_env.COLCLAMP) { Flush(); } @@ -758,7 +758,7 @@ void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r) template void GSState::GIFRegHandlerTEST(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->TEST).eq(m_env.CTXT[i].TEST)) + if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST) { Flush(); } @@ -768,7 +768,7 @@ template void GSState::GIFRegHandlerTEST(GIFReg* r) void GSState::GIFRegHandlerPABE(GIFReg* r) { - if(!((GSVector4i)r->PABE).eq(m_env.PABE)) + if(r->PABE != m_env.PABE) { Flush(); } @@ -778,7 +778,7 @@ void GSState::GIFRegHandlerPABE(GIFReg* r) template void GSState::GIFRegHandlerFBA(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->FBA).eq(m_env.CTXT[i].FBA)) + if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA) { Flush(); } @@ -788,7 +788,7 @@ template void GSState::GIFRegHandlerFBA(GIFReg* r) template void GSState::GIFRegHandlerFRAME(GIFReg* r) { - if(PRIM->CTXT == i && !((GSVector4i)r->FRAME).eq(m_env.CTXT[i].FRAME)) + if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME) { Flush(); } @@ -807,7 +807,7 @@ template void GSState::GIFRegHandlerZBUF(GIFReg* r) r->ZBUF.PSM |= 0x30; - if(PRIM->CTXT == i && !((GSVector4i)r->ZBUF).eq(m_env.CTXT[i].ZBUF)) + if(PRIM->CTXT == i && r->ZBUF != m_env.CTXT[i].ZBUF) { Flush(); } @@ -825,7 +825,7 @@ template void GSState::GIFRegHandlerZBUF(GIFReg* r) void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r) { - if(!((GSVector4i)r->BITBLTBUF).eq(m_env.BITBLTBUF)) + if(r->BITBLTBUF != m_env.BITBLTBUF) { FlushWrite(); } @@ -845,7 +845,7 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r) void GSState::GIFRegHandlerTRXPOS(GIFReg* r) { - if(!((GSVector4i)r->TRXPOS).eq(m_env.TRXPOS)) + if(r->TRXPOS != m_env.TRXPOS) { FlushWrite(); } @@ -855,7 +855,7 @@ void GSState::GIFRegHandlerTRXPOS(GIFReg* r) void GSState::GIFRegHandlerTRXREG(GIFReg* r) { - if(!((GSVector4i)r->TRXREG).eq(m_env.TRXREG)) + if(r->TRXREG != m_env.TRXREG) { FlushWrite(); } @@ -1036,9 +1036,15 @@ void GSState::Move() int xinc = 1; int yinc = 1; - if(sx < dx) {sx += w - 1; dx += w - 1; xinc = -1;} - if(sy < dy) {sy += h - 1; dy += h - 1; yinc = -1;} - + if(m_env.TRXPOS.DIRX) {sx += w - 1; dx += w - 1; xinc = -1;} + if(m_env.TRXPOS.DIRY) {sy += h - 1; dy += h - 1; yinc = -1;} +/* + printf("%05x %d %d => %05x %d %d (%d%d), %d %d %d %d %d %d\n", + m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + sx, sy, dx, dy, w, h); +*/ /* GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp; @@ -1051,19 +1057,80 @@ void GSState::Move() const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM]; const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]; - if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32) + if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16) + { + int* soffset = spsm.rowOffset[0]; + int* doffset = dpsm.rowOffset[0]; + + if(spsm.trbpp == 32) + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) + { + uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx])); + } + } + } + else if(spsm.trbpp == 24) + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) + { + uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + m_mem.WritePixel24(dbase + doffset[dx], m_mem.ReadPixel24(sbase + soffset[sx])); + } + } + } + else // if(spsm.trbpp == 16) + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) + { + uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + m_mem.WritePixel16(dbase + doffset[dx], m_mem.ReadPixel16(sbase + soffset[sx])); + } + } + } + } + else if(m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8) { for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { - uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); int* soffset = spsm.rowOffset[sy & 7]; - uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); int* doffset = dpsm.rowOffset[dy & 7]; for(int x = 0; x < w; x++, sx += xinc, dx += xinc) { - m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx])); + m_mem.WritePixel8(dbase + doffset[dx], m_mem.ReadPixel8(sbase + soffset[sx])); + } + } + } + else if(m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4) + { + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) + { + uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); + int* soffset = spsm.rowOffset[sy & 7]; + + uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW); + int* doffset = dpsm.rowOffset[dy & 7]; + + for(int x = 0; x < w; x++, sx += xinc, dx += xinc) + { + m_mem.WritePixel4(dbase + doffset[dx], m_mem.ReadPixel4(sbase + soffset[sx])); } } } diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 595a8b538f..0f7163aa0b 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -221,7 +221,7 @@ public: GSVector4i GetDisplayRect(int i = -1); GSVector4i GetFrameRect(int i = -1); - GSVector4i GetDeviceRect(int i = -1); + GSVector2i GetDeviceSize(int i = -1); bool IsEnabled(int i); diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 282e0b18ca..6dd495118c 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -376,7 +376,7 @@ public: int ww = (int)(fr.left + rt->m_TEX0.TBW * 64); int hh = (int)(fr.top + m_renderer->GetDisplayRect().height()); - if(hh <= m_renderer->GetDeviceRect().height() / 2) + if(hh <= m_renderer->GetDeviceSize().y / 2) { hh *= 2; } diff --git a/plugins/GSdx/GSTextureCache10.cpp b/plugins/GSdx/GSTextureCache10.cpp index 6a59e815dd..4670658e84 100644 --- a/plugins/GSdx/GSTextureCache10.cpp +++ b/plugins/GSdx/GSTextureCache10.cpp @@ -69,11 +69,7 @@ void GSTextureCache10::GSRenderTargetHW10::Update() texture.Update(GSVector4i(0, 0, w, h), buff, pitch); - GSVector4 dr( - m_texture.m_scale.x * r.left, - m_texture.m_scale.y * r.top, - m_texture.m_scale.x * r.right, - m_texture.m_scale.y * r.bottom); + GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy(); m_renderer->m_dev.StretchRect(texture, m_texture, dr); @@ -103,12 +99,7 @@ void GSTextureCache10::GSRenderTargetHW10::Read(const GSVector4i& r) int w = r.width(); int h = r.height(); - GSVector4 src; - - src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth(); - src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight(); - src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth(); - src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight(); + GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy(); DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; @@ -269,9 +260,14 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt) m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight()); + GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy(); + GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy(); + int bw = 64; int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64; + GSVector4i br(0, 0, bw, bh); + int sw = (int)rt->m_TEX0.TBW << 6; int dw = (int)m_TEX0.TBW << 6; @@ -287,18 +283,9 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt) int sx = o % sw; int sy = o / sw; - GSVector4 src, dst; - - src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth(); - src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight(); - src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth(); - src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight(); - - dst.x = rt->m_texture.m_scale.x * dx; - dst.y = rt->m_texture.m_scale.y * dy; - dst.z = rt->m_texture.m_scale.x * (dx + bw); - dst.w = rt->m_texture.m_scale.y * (dy + bh); - + GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; + GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; + m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst); // TODO: this is quite a lot of StretchRect, do it with one Draw diff --git a/plugins/GSdx/GSTextureCache9.cpp b/plugins/GSdx/GSTextureCache9.cpp index 8ae4d90c42..fa8e028e4f 100644 --- a/plugins/GSdx/GSTextureCache9.cpp +++ b/plugins/GSdx/GSTextureCache9.cpp @@ -71,11 +71,7 @@ void GSTextureCache9::GSRenderTarget9::Update() // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4); - GSVector4 dr( - m_texture.m_scale.x * r.left, - m_texture.m_scale.y * r.top, - m_texture.m_scale.x * r.right, - m_texture.m_scale.y * r.bottom); + GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy(); m_renderer->m_dev.StretchRect(texture, m_texture, dr); } @@ -106,12 +102,7 @@ void GSTextureCache9::GSRenderTarget9::Read(const GSVector4i& r) int w = r.width(); int h = r.height(); - GSVector4 src; - - src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth(); - src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight(); - src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth(); - src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight(); + GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy(); Texture offscreen; @@ -270,14 +261,20 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt) m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight()); + GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy(); + GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy(); + int bw = 64; int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64; + GSVector4i br(0, 0, bw, bh); + int sw = (int)rt->m_TEX0.TBW << 6; int dw = (int)m_TEX0.TBW << 6; int dh = 1 << m_TEX0.TH; + if(sw != 0) for(int dy = 0; dy < dh; dy += bh) { for(int dx = 0; dx < dw; dx += bw) @@ -287,17 +284,8 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt) int sx = o % sw; int sy = o / sw; - GSVector4 src, dst; - - src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth(); - src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight(); - src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth(); - src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight(); - - dst.x = rt->m_texture.m_scale.x * dx; - dst.y = rt->m_texture.m_scale.y * dy; - dst.z = rt->m_texture.m_scale.x * (dx + bw); - dst.w = rt->m_texture.m_scale.y * (dy + bh); + GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; + GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst); diff --git a/plugins/GSdx/GSTextureCacheSW.cpp b/plugins/GSdx/GSTextureCacheSW.cpp index a9433b9ccb..524521e5ec 100644 --- a/plugins/GSdx/GSTextureCacheSW.cpp +++ b/plugins/GSdx/GSTextureCacheSW.cpp @@ -32,7 +32,7 @@ GSTextureCacheSW::~GSTextureCacheSW() RemoveAll(); } -const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r) +const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r) { GSLocalMemory& mem = m_state->m_mem; @@ -52,7 +52,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE continue; } - if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && !((GSVector4i)TEXA).eq(t2->m_TEXA))) + if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && TEXA != t2->m_TEXA)) { continue; } @@ -161,12 +161,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs; - GSVector4i r; - - r.left = rect.left & ~(s.x - 1); - r.top = rect.top & ~(s.y - 1); - r.right = (rect.right + (s.x - 1)) & ~(s.x - 1); - r.bottom = (rect.bottom + (s.y - 1)) & ~(s.y - 1); + GSVector4i r = rect.ralign(s); for(int y = r.top; y < r.bottom; y += s.y) { @@ -215,7 +210,7 @@ GSTextureCacheSW::GSTexture::~GSTexture() } } -bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* rect) +bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect) { if(m_complete) { @@ -249,15 +244,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff } - GSVector4i r(0, 0, tw, th); - - if(rect) - { - r.left = rect->left & ~(s.x - 1); - r.top = rect->top & ~(s.y - 1); - r.right = (rect->right + (s.x - 1)) & ~(s.x - 1); - r.bottom = (rect->bottom + (s.y - 1)) & ~(s.y - 1); - } + GSVector4i r = rect.ralign(s); if(r.left == 0 && r.top == 0 && r.right == tw && r.bottom == th) { diff --git a/plugins/GSdx/GSTextureCacheSW.h b/plugins/GSdx/GSTextureCacheSW.h index ea5128c566..4e99396ca9 100644 --- a/plugins/GSdx/GSTextureCacheSW.h +++ b/plugins/GSdx/GSTextureCacheSW.h @@ -44,7 +44,7 @@ public: explicit GSTexture(GSState* state); virtual ~GSTexture(); - bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL); + bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); }; protected: @@ -56,7 +56,7 @@ public: GSTextureCacheSW(GSState* state); virtual ~GSTextureCacheSW(); - const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL); + const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); void RemoveAll(); void IncAge(); diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 80a102c146..fdeeac7286 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -155,6 +155,11 @@ public: return bottom - top; } + GSVector4i rsize() const + { + return *this - xyxy(); // same as GSVector4i(0, 0, width(), height()); + } + bool rempty() const { return (*this < zwzw()).mask() != 0x00ff; @@ -178,6 +183,28 @@ public: return sat_i32(a); } + enum RoundMode {Outside, Inside, NegInf, PosInf}; + + template GSVector4i ralign(const GSVector2i& a) const + { + // a must be 1 << n + + GSVector4i mask = GSVector4i(a) - GSVector4i(1, 1); + + GSVector4i v; + + switch(mode) + { + case Inside: v = *this + mask; break; + case Outside: v = *this + mask.zwxy(); break; + case NegInf: v = *this; break; + case PosInf: v = *this + mask.zwzw(); break; + default: ASSERT(0); break; + } + + return v.andnot(mask.xyxy()); + } + GSVector4i fit(int arx, int ary) const; GSVector4i fit(int preset) const; @@ -2194,7 +2221,9 @@ public: GSVector4(int x, int y, int z, int w) { - m = _mm_cvtepi32_ps(_mm_set_epi32(w, z, y, x)); + GSVector4i v(x, y, z, w); + + m = _mm_cvtepi32_ps(v.m); } GSVector4(int x, int y) @@ -2212,6 +2241,11 @@ public: m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v)); } + explicit GSVector4(const GSVector2i& v) + { + m = _mm_cvtepi32_ps(_mm_loadl_epi64((__m128i*)&v)); + } + explicit GSVector4(float f) { m = _mm_set1_ps(f); diff --git a/plugins/GSdx/GSdx_vs2008.vcproj b/plugins/GSdx/GSdx_vs2008.vcproj index 2523f25124..7cd1211c08 100644 --- a/plugins/GSdx/GSdx_vs2008.vcproj +++ b/plugins/GSdx/GSdx_vs2008.vcproj @@ -1399,6 +1399,14 @@ + + +