GSdx: optimized local-local transfer a bit, it boosts fps in games like ffxii or ico, which do a lot of moves in local memory.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1226 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-05-20 15:35:31 +00:00
parent 23c833e922
commit 09ee76d9ed
16 changed files with 194 additions and 160 deletions

View File

@ -221,6 +221,8 @@ union name \
uint64 u64; \
uint32 u32[2]; \
void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \
bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \
bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \
operator GSVector4i() const {return GSVector4i::loadl(this);} \
struct { \
@ -864,7 +866,8 @@ REG64_(GIFReg, TRXPOS)
uint32 DSAX:11;
uint32 _PAD3:5;
uint32 DSAY:11;
uint32 DIR:2;
uint32 DIRY:1;
uint32 DIRX:1;
uint32 _PAD4:3;
REG_END

View File

@ -598,7 +598,6 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
// TODO
static const GSVector4i s_am(0x00008000);
static const GSVector4i s_bm(0x00007c00);
static const GSVector4i s_gm(0x000003e0);
static const GSVector4i s_rm(0x0000001f);
@ -610,7 +609,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
const GSVector4i rm = s_rm;
const GSVector4i gm = s_gm;
const GSVector4i bm = s_bm;
// const GSVector4i am = s_am;
GSVector4i TA0(TEXA.TA0 << 24);
GSVector4i TA1(TEXA.TA1 << 24);
@ -625,12 +623,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
for(int i = 0, j = w >> 3; i < j; i++)
{
c = s[i];
/*
cl = c.upl16();
ch = c.uph16();
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am);
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am);
*/
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15));
@ -642,12 +634,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
for(int i = 0, j = w >> 3; i < j; i++)
{
c = s[i];
/*
cl = c.upl16();
ch = c.uph16();
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am).andnot(cl == GSVector4i::zero());
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am).andnot(ch == GSVector4i::zero());
*/
cl = c.upl16(c);
ch = c.uph16(c);
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero());

View File

@ -48,16 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0)
GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs;
r.left = MulDiv(left, dst.x, src.x);
r.right = MulDiv(right, dst.x, src.x);
r.top = MulDiv(top, dst.y, src.y);
r.right = MulDiv(right, dst.x, src.x);
r.bottom = MulDiv(bottom, dst.y, src.y);
}
else
{
r.left = left & ~(src.x - 1);
r.right = (right + (src.x - 1) /* + 1 */) & ~(src.x - 1);
r.top = top & ~(src.y - 1);
r.bottom = (bottom + (src.y - 1) /* + 1 */) & ~(src.y - 1);
r = GSVector4i(left, top, right, bottom).ralign<GSVector4i::Outside>(src);
}
return r;

View File

@ -593,13 +593,8 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, uint32 c, uint32 psm, uint32 b
case 4: c = (c & 0xf) * 0x11111111; break;
}
GSVector4i clip;
GSVector4i clip = r.ralign<GSVector4i::Inside>(tbl.bs);
clip.left = (r.left + (w - 1)) & ~(w - 1);
clip.top = (r.top + (h - 1)) & ~(h - 1);
clip.right = r.right & ~(w - 1);
clip.bottom = r.bottom & ~(h - 1);
for(int y = r.top; y < clip.top; y++)
{
for(int x = r.left; x < r.right; x++)

View File

@ -249,7 +249,7 @@ protected:
if(dr[i].height() > 512) // hmm
{
int y = GetDeviceRect(i).height();
int y = GetDeviceSize(i).y;
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2;
r.bottom = r.top + y;
}
@ -258,35 +258,26 @@ protected:
if(m_blur && blurdetected && i == 1)
{
src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth();
src[i].y = (tex[i].m_scale.y * r.top + 1) / tex[i].GetHeight();
src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth();
src[i].w = (tex[i].m_scale.y * r.bottom + 1) / tex[i].GetHeight();
}
else
{
src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth();
src[i].y = tex[i].m_scale.y * r.top / tex[i].GetHeight();
src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth();
src[i].w = tex[i].m_scale.y * r.bottom / tex[i].GetHeight();
r += GSVector4i(0, 1).xyxy();
}
GSVector2 o;
GSVector4 scale = GSVector4(tex[i].m_scale).xyxy();
o.x = 0;
o.y = 0;
src[i] = GSVector4(r) * scale / GSVector4(tex[i].GetSize()).xyxy();
GSVector2 o(0, 0);
if(dr[i].top - baseline >= 4) // 2?
{
o.y = tex[i].m_scale.y * (dr[i].top - baseline);
}
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) o.y /= 2;
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
{
o.y /= 2;
}
dst[i].x = o.x;
dst[i].y = o.y;
dst[i].z = o.x + tex[i].m_scale.x * r.width();
dst[i].w = o.y + tex[i].m_scale.y * r.height();
dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize());
fs.x = max(fs.x, (int)(dst[i].z + 0.5f));
fs.y = max(fs.y, (int)(dst[i].w + 0.5f));
@ -301,12 +292,7 @@ protected:
if(tex[0] || tex[1])
{
GSVector4 c;
c.r = (float)m_regs->BGCOLOR.R / 255;
c.g = (float)m_regs->BGCOLOR.G / 255;
c.b = (float)m_regs->BGCOLOR.B / 255;
c.a = (float)m_regs->PMODE.ALP / 255;
GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255;
m_dev.Merge(tex, src, dst, fs, slbg, mmod, c);

View File

@ -93,7 +93,7 @@ protected:
maxv = maxv.maxv(v0);
}
mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH, 16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).rcpnr();
mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).xyxy().rcpnr();
}
else
{
@ -211,12 +211,8 @@ protected:
r = vr + GSVector4i(-1, -1, 1, 1); // one more pixel because of bilinear filtering
GSVector2i bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs;
GSVector2i bsm(bs.x - 1, bs.y - 1);
r.left = max(r.left & ~bsm.x, 0);
r.top = max(r.top & ~bsm.y, 0);
r.right = min((r.right + bsm.x) & ~bsm.x, w);
r.bottom = min((r.bottom + bsm.y) & ~bsm.y, h);
r = r.ralign<GSVector4i::Outside>(bs).rintersect(GSVector4i(0, 0, w, h));
}
void VSync(int field)

View File

@ -449,7 +449,7 @@ protected:
MinMaxUV(w, h, r, p.sel.fst);
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, &r);
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
if(!t) {ASSERT(0); return;}

View File

@ -235,7 +235,7 @@ GSVector4i GSState::GetFrameRect(int i)
return r;
}
GSVector4i GSState::GetDeviceRect(int i)
GSVector2i GSState::GetDeviceSize(int i)
{
// TODO: other params of SMODE1 should affect the true device display size
@ -257,7 +257,7 @@ GSVector4i GSState::GetDeviceRect(int i)
h = (m_regs->SMODE1.CMOD & 1) ? 512 : 448;
}
return GSVector4i(0, 0, w, h);
return GSVector2i(w, h);
}
@ -279,7 +279,7 @@ bool GSState::IsEnabled(int i)
int GSState::GetFPS()
{
return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) / (m_regs->SMODE2.INT ? 1 : 2);
return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) >> (1 - m_regs->SMODE2.INT);
}
// GIFPackedRegHandler*
@ -486,7 +486,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
bool wt = m_mem.m_clut.WriteTest(r->TEX0, m_env.TEXCLUT);
if(wt || PRIM->CTXT == i && !((GSVector4i)r->TEX0).eq(m_env.CTXT[i].TEX0))
if(wt || PRIM->CTXT == i && r->TEX0 != m_env.CTXT[i].TEX0)
{
Flush();
}
@ -511,7 +511,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->CLAMP).eq(m_env.CTXT[i].CLAMP))
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
{
Flush();
}
@ -552,7 +552,7 @@ void GSState::GIFRegHandlerNOP(GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEX1(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->TEX1).eq(m_env.CTXT[i].TEX1))
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
{
Flush();
}
@ -587,7 +587,7 @@ template<int i> void GSState::GIFRegHandlerXYOFFSET(GIFReg* r)
void GSState::GIFRegHandlerPRMODECONT(GIFReg* r)
{
if(!((GSVector4i)r->PRMODECONT).eq(m_env.PRMODECONT))
if(r->PRMODECONT != m_env.PRMODECONT)
{
Flush();
}
@ -621,7 +621,7 @@ void GSState::GIFRegHandlerPRMODE(GIFReg* r)
void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
{
if(!((GSVector4i)r->TEXCLUT).eq(m_env.TEXCLUT))
if(r->TEXCLUT != m_env.TEXCLUT)
{
Flush();
}
@ -631,7 +631,7 @@ void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
{
if(!((GSVector4i)r->SCANMSK).eq(m_env.SCANMSK))
if(r->SCANMSK != m_env.SCANMSK)
{
Flush();
}
@ -641,7 +641,7 @@ void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP1).eq(m_env.CTXT[i].MIPTBP1))
if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
{
Flush();
}
@ -651,7 +651,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP2).eq(m_env.CTXT[i].MIPTBP2))
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
{
Flush();
}
@ -661,7 +661,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
void GSState::GIFRegHandlerTEXA(GIFReg* r)
{
if(!((GSVector4i)r->TEXA).eq(m_env.TEXA))
if(r->TEXA != m_env.TEXA)
{
Flush();
}
@ -671,7 +671,7 @@ void GSState::GIFRegHandlerTEXA(GIFReg* r)
void GSState::GIFRegHandlerFOGCOL(GIFReg* r)
{
if(!((GSVector4i)r->FOGCOL).eq(m_env.FOGCOL))
if(r->FOGCOL != m_env.FOGCOL)
{
Flush();
}
@ -688,7 +688,7 @@ void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r)
template<int i> void GSState::GIFRegHandlerSCISSOR(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->SCISSOR).eq(m_env.CTXT[i].SCISSOR))
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
{
Flush();
}
@ -705,7 +705,7 @@ template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
ASSERT(r->ALPHA.C != 3);
ASSERT(r->ALPHA.D != 3);
if(PRIM->CTXT == i && !((GSVector4i)r->ALPHA).eq(m_env.CTXT[i].ALPHA))
if(PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA)
{
Flush();
}
@ -721,7 +721,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r)
{
bool update = false;
if(!((GSVector4i)r->DIMX).eq(m_env.DIMX))
if(r->DIMX != m_env.DIMX)
{
Flush();
@ -738,7 +738,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r)
void GSState::GIFRegHandlerDTHE(GIFReg* r)
{
if(!((GSVector4i)r->DTHE).eq(m_env.DTHE))
if(r->DTHE != m_env.DTHE)
{
Flush();
}
@ -748,7 +748,7 @@ void GSState::GIFRegHandlerDTHE(GIFReg* r)
void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
{
if(!((GSVector4i)r->COLCLAMP).eq(m_env.COLCLAMP))
if(r->COLCLAMP != m_env.COLCLAMP)
{
Flush();
}
@ -758,7 +758,7 @@ void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->TEST).eq(m_env.CTXT[i].TEST))
if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST)
{
Flush();
}
@ -768,7 +768,7 @@ template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
void GSState::GIFRegHandlerPABE(GIFReg* r)
{
if(!((GSVector4i)r->PABE).eq(m_env.PABE))
if(r->PABE != m_env.PABE)
{
Flush();
}
@ -778,7 +778,7 @@ void GSState::GIFRegHandlerPABE(GIFReg* r)
template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->FBA).eq(m_env.CTXT[i].FBA))
if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA)
{
Flush();
}
@ -788,7 +788,7 @@ template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
template<int i> void GSState::GIFRegHandlerFRAME(GIFReg* r)
{
if(PRIM->CTXT == i && !((GSVector4i)r->FRAME).eq(m_env.CTXT[i].FRAME))
if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME)
{
Flush();
}
@ -807,7 +807,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
r->ZBUF.PSM |= 0x30;
if(PRIM->CTXT == i && !((GSVector4i)r->ZBUF).eq(m_env.CTXT[i].ZBUF))
if(PRIM->CTXT == i && r->ZBUF != m_env.CTXT[i].ZBUF)
{
Flush();
}
@ -825,7 +825,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
{
if(!((GSVector4i)r->BITBLTBUF).eq(m_env.BITBLTBUF))
if(r->BITBLTBUF != m_env.BITBLTBUF)
{
FlushWrite();
}
@ -845,7 +845,7 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
{
if(!((GSVector4i)r->TRXPOS).eq(m_env.TRXPOS))
if(r->TRXPOS != m_env.TRXPOS)
{
FlushWrite();
}
@ -855,7 +855,7 @@ void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
void GSState::GIFRegHandlerTRXREG(GIFReg* r)
{
if(!((GSVector4i)r->TRXREG).eq(m_env.TRXREG))
if(r->TRXREG != m_env.TRXREG)
{
FlushWrite();
}
@ -1036,9 +1036,15 @@ void GSState::Move()
int xinc = 1;
int yinc = 1;
if(sx < dx) {sx += w - 1; dx += w - 1; xinc = -1;}
if(sy < dy) {sy += h - 1; dy += h - 1; yinc = -1;}
if(m_env.TRXPOS.DIRX) {sx += w - 1; dx += w - 1; xinc = -1;}
if(m_env.TRXPOS.DIRY) {sy += h - 1; dy += h - 1; yinc = -1;}
/*
printf("%05x %d %d => %05x %d %d (%d%d), %d %d %d %d %d %d\n",
m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM,
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM,
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
sx, sy, dx, dy, w, h);
*/
/*
GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp;
GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp;
@ -1051,19 +1057,80 @@ void GSState::Move()
const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM];
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM];
if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32)
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
int* soffset = spsm.rowOffset[0];
int* doffset = dpsm.rowOffset[0];
if(spsm.trbpp == 32)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
}
}
}
else if(spsm.trbpp == 24)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
m_mem.WritePixel24(dbase + doffset[dx], m_mem.ReadPixel24(sbase + soffset[sx]));
}
}
}
else // if(spsm.trbpp == 16)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
m_mem.WritePixel16(dbase + doffset[dx], m_mem.ReadPixel16(sbase + soffset[sx]));
}
}
}
}
else if(m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
int* soffset = spsm.rowOffset[sy & 7];
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
int* doffset = dpsm.rowOffset[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
m_mem.WritePixel8(dbase + doffset[dx], m_mem.ReadPixel8(sbase + soffset[sx]));
}
}
}
else if(m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
{
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
{
uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
int* soffset = spsm.rowOffset[sy & 7];
uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
int* doffset = dpsm.rowOffset[dy & 7];
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
{
m_mem.WritePixel4(dbase + doffset[dx], m_mem.ReadPixel4(sbase + soffset[sx]));
}
}
}

View File

@ -221,7 +221,7 @@ public:
GSVector4i GetDisplayRect(int i = -1);
GSVector4i GetFrameRect(int i = -1);
GSVector4i GetDeviceRect(int i = -1);
GSVector2i GetDeviceSize(int i = -1);
bool IsEnabled(int i);

View File

@ -376,7 +376,7 @@ public:
int ww = (int)(fr.left + rt->m_TEX0.TBW * 64);
int hh = (int)(fr.top + m_renderer->GetDisplayRect().height());
if(hh <= m_renderer->GetDeviceRect().height() / 2)
if(hh <= m_renderer->GetDeviceSize().y / 2)
{
hh *= 2;
}

View File

@ -69,11 +69,7 @@ void GSTextureCache10::GSRenderTargetHW10::Update()
texture.Update(GSVector4i(0, 0, w, h), buff, pitch);
GSVector4 dr(
m_texture.m_scale.x * r.left,
m_texture.m_scale.y * r.top,
m_texture.m_scale.x * r.right,
m_texture.m_scale.y * r.bottom);
GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy();
m_renderer->m_dev.StretchRect(texture, m_texture, dr);
@ -103,12 +99,7 @@ void GSTextureCache10::GSRenderTargetHW10::Read(const GSVector4i& r)
int w = r.width();
int h = r.height();
GSVector4 src;
src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth();
src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight();
src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth();
src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight();
GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy();
DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM;
@ -269,9 +260,14 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt)
m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight());
GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy();
GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)rt->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
@ -287,18 +283,9 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt)
int sx = o % sw;
int sy = o / sw;
GSVector4 src, dst;
src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth();
src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight();
src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth();
src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight();
dst.x = rt->m_texture.m_scale.x * dx;
dst.y = rt->m_texture.m_scale.y * dy;
dst.z = rt->m_texture.m_scale.x * (dx + bw);
dst.w = rt->m_texture.m_scale.y * (dy + bh);
GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst);
// TODO: this is quite a lot of StretchRect, do it with one Draw

View File

@ -71,11 +71,7 @@ void GSTextureCache9::GSRenderTarget9::Update()
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
GSVector4 dr(
m_texture.m_scale.x * r.left,
m_texture.m_scale.y * r.top,
m_texture.m_scale.x * r.right,
m_texture.m_scale.y * r.bottom);
GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy();
m_renderer->m_dev.StretchRect(texture, m_texture, dr);
}
@ -106,12 +102,7 @@ void GSTextureCache9::GSRenderTarget9::Read(const GSVector4i& r)
int w = r.width();
int h = r.height();
GSVector4 src;
src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth();
src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight();
src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth();
src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight();
GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy();
Texture offscreen;
@ -270,14 +261,20 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt)
m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight());
GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy();
GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy();
int bw = 64;
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
GSVector4i br(0, 0, bw, bh);
int sw = (int)rt->m_TEX0.TBW << 6;
int dw = (int)m_TEX0.TBW << 6;
int dh = 1 << m_TEX0.TH;
if(sw != 0)
for(int dy = 0; dy < dh; dy += bh)
{
for(int dx = 0; dx < dw; dx += bw)
@ -287,17 +284,8 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt)
int sx = o % sw;
int sy = o / sw;
GSVector4 src, dst;
src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth();
src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight();
src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth();
src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight();
dst.x = rt->m_texture.m_scale.x * dx;
dst.y = rt->m_texture.m_scale.y * dy;
dst.z = rt->m_texture.m_scale.x * (dx + bw);
dst.w = rt->m_texture.m_scale.y * (dy + bh);
GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst);

View File

@ -32,7 +32,7 @@ GSTextureCacheSW::~GSTextureCacheSW()
RemoveAll();
}
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r)
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
{
GSLocalMemory& mem = m_state->m_mem;
@ -52,7 +52,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
continue;
}
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && !((GSVector4i)TEXA).eq(t2->m_TEXA)))
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && TEXA != t2->m_TEXA))
{
continue;
}
@ -161,12 +161,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs;
GSVector4i r;
r.left = rect.left & ~(s.x - 1);
r.top = rect.top & ~(s.y - 1);
r.right = (rect.right + (s.x - 1)) & ~(s.x - 1);
r.bottom = (rect.bottom + (s.y - 1)) & ~(s.y - 1);
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
for(int y = r.top; y < r.bottom; y += s.y)
{
@ -215,7 +210,7 @@ GSTextureCacheSW::GSTexture::~GSTexture()
}
}
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* rect)
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
{
if(m_complete)
{
@ -249,15 +244,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
}
GSVector4i r(0, 0, tw, th);
if(rect)
{
r.left = rect->left & ~(s.x - 1);
r.top = rect->top & ~(s.y - 1);
r.right = (rect->right + (s.x - 1)) & ~(s.x - 1);
r.bottom = (rect->bottom + (s.y - 1)) & ~(s.y - 1);
}
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
if(r.left == 0 && r.top == 0 && r.right == tw && r.bottom == th)
{

View File

@ -44,7 +44,7 @@ public:
explicit GSTexture(GSState* state);
virtual ~GSTexture();
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL);
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
};
protected:
@ -56,7 +56,7 @@ public:
GSTextureCacheSW(GSState* state);
virtual ~GSTextureCacheSW();
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL);
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
void RemoveAll();
void IncAge();

View File

@ -155,6 +155,11 @@ public:
return bottom - top;
}
GSVector4i rsize() const
{
return *this - xyxy(); // same as GSVector4i(0, 0, width(), height());
}
bool rempty() const
{
return (*this < zwzw()).mask() != 0x00ff;
@ -178,6 +183,28 @@ public:
return sat_i32(a);
}
enum RoundMode {Outside, Inside, NegInf, PosInf};
template<int mode> GSVector4i ralign(const GSVector2i& a) const
{
// a must be 1 << n
GSVector4i mask = GSVector4i(a) - GSVector4i(1, 1);
GSVector4i v;
switch(mode)
{
case Inside: v = *this + mask; break;
case Outside: v = *this + mask.zwxy(); break;
case NegInf: v = *this; break;
case PosInf: v = *this + mask.zwzw(); break;
default: ASSERT(0); break;
}
return v.andnot(mask.xyxy());
}
GSVector4i fit(int arx, int ary) const;
GSVector4i fit(int preset) const;
@ -2194,7 +2221,9 @@ public:
GSVector4(int x, int y, int z, int w)
{
m = _mm_cvtepi32_ps(_mm_set_epi32(w, z, y, x));
GSVector4i v(x, y, z, w);
m = _mm_cvtepi32_ps(v.m);
}
GSVector4(int x, int y)
@ -2212,6 +2241,11 @@ public:
m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v));
}
explicit GSVector4(const GSVector2i& v)
{
m = _mm_cvtepi32_ps(_mm_loadl_epi64((__m128i*)&v));
}
explicit GSVector4(float f)
{
m = _mm_set1_ps(f);

View File

@ -1399,6 +1399,14 @@
<File
RelativePath=".\GSTextureCacheSW.cpp"
>
<FileConfiguration
Name="Release SSE4|Win32"
>
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\GSTextureFX10.cpp"