mirror of https://github.com/PCSX2/pcsx2.git
GSdx: optimized local-local transfer a bit, it boosts fps in games like ffxii or ico, which do a lot of moves in local memory.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1226 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
23c833e922
commit
09ee76d9ed
|
@ -221,6 +221,8 @@ union name \
|
|||
uint64 u64; \
|
||||
uint32 u32[2]; \
|
||||
void operator = (const GSVector4i& v) {GSVector4i::storel(this, v);} \
|
||||
bool operator == (const union name& r) const {return ((GSVector4i)r).eq(*this);} \
|
||||
bool operator != (const union name& r) const {return !((GSVector4i)r).eq(*this);} \
|
||||
operator GSVector4i() const {return GSVector4i::loadl(this);} \
|
||||
struct { \
|
||||
|
||||
|
@ -864,7 +866,8 @@ REG64_(GIFReg, TRXPOS)
|
|||
uint32 DSAX:11;
|
||||
uint32 _PAD3:5;
|
||||
uint32 DSAY:11;
|
||||
uint32 DIR:2;
|
||||
uint32 DIRY:1;
|
||||
uint32 DIRX:1;
|
||||
uint32 _PAD4:3;
|
||||
REG_END
|
||||
|
||||
|
|
|
@ -598,7 +598,6 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
|
|||
|
||||
// TODO
|
||||
|
||||
static const GSVector4i s_am(0x00008000);
|
||||
static const GSVector4i s_bm(0x00007c00);
|
||||
static const GSVector4i s_gm(0x000003e0);
|
||||
static const GSVector4i s_rm(0x0000001f);
|
||||
|
@ -610,7 +609,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
|
|||
const GSVector4i rm = s_rm;
|
||||
const GSVector4i gm = s_gm;
|
||||
const GSVector4i bm = s_bm;
|
||||
// const GSVector4i am = s_am;
|
||||
|
||||
GSVector4i TA0(TEXA.TA0 << 24);
|
||||
GSVector4i TA1(TEXA.TA1 << 24);
|
||||
|
@ -625,12 +623,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
|
|||
for(int i = 0, j = w >> 3; i < j; i++)
|
||||
{
|
||||
c = s[i];
|
||||
/*
|
||||
cl = c.upl16();
|
||||
ch = c.uph16();
|
||||
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am);
|
||||
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am);
|
||||
*/
|
||||
cl = c.upl16(c);
|
||||
ch = c.uph16(c);
|
||||
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15));
|
||||
|
@ -642,12 +634,6 @@ void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, c
|
|||
for(int i = 0, j = w >> 3; i < j; i++)
|
||||
{
|
||||
c = s[i];
|
||||
/*
|
||||
cl = c.upl16();
|
||||
ch = c.uph16();
|
||||
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA1.blend(TA0, cl < am).andnot(cl == GSVector4i::zero());
|
||||
d[i * 2 + 1] = ((ch & rm) << 3) | ((ch & gm) << 6) | ((ch & bm) << 9) | TA1.blend(TA0, ch < am).andnot(ch == GSVector4i::zero());
|
||||
*/
|
||||
cl = c.upl16(c);
|
||||
ch = c.uph16(c);
|
||||
d[i * 2 + 0] = ((cl & rm) << 3) | ((cl & gm) << 6) | ((cl & bm) << 9) | TA0.blend8(TA1, cl.sra16(15)).andnot(cl == GSVector4i::zero());
|
||||
|
|
|
@ -48,16 +48,13 @@ GSVector4i GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0)
|
|||
GSVector2i dst = GSLocalMemory::m_psm[TEX0.PSM].bs;
|
||||
|
||||
r.left = MulDiv(left, dst.x, src.x);
|
||||
r.right = MulDiv(right, dst.x, src.x);
|
||||
r.top = MulDiv(top, dst.y, src.y);
|
||||
r.right = MulDiv(right, dst.x, src.x);
|
||||
r.bottom = MulDiv(bottom, dst.y, src.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
r.left = left & ~(src.x - 1);
|
||||
r.right = (right + (src.x - 1) /* + 1 */) & ~(src.x - 1);
|
||||
r.top = top & ~(src.y - 1);
|
||||
r.bottom = (bottom + (src.y - 1) /* + 1 */) & ~(src.y - 1);
|
||||
r = GSVector4i(left, top, right, bottom).ralign<GSVector4i::Outside>(src);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
|
|
@ -593,13 +593,8 @@ bool GSLocalMemory::FillRect(const GSVector4i& r, uint32 c, uint32 psm, uint32 b
|
|||
case 4: c = (c & 0xf) * 0x11111111; break;
|
||||
}
|
||||
|
||||
GSVector4i clip;
|
||||
GSVector4i clip = r.ralign<GSVector4i::Inside>(tbl.bs);
|
||||
|
||||
clip.left = (r.left + (w - 1)) & ~(w - 1);
|
||||
clip.top = (r.top + (h - 1)) & ~(h - 1);
|
||||
clip.right = r.right & ~(w - 1);
|
||||
clip.bottom = r.bottom & ~(h - 1);
|
||||
|
||||
for(int y = r.top; y < clip.top; y++)
|
||||
{
|
||||
for(int x = r.left; x < r.right; x++)
|
||||
|
|
|
@ -249,7 +249,7 @@ protected:
|
|||
|
||||
if(dr[i].height() > 512) // hmm
|
||||
{
|
||||
int y = GetDeviceRect(i).height();
|
||||
int y = GetDeviceSize(i).y;
|
||||
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) y /= 2;
|
||||
r.bottom = r.top + y;
|
||||
}
|
||||
|
@ -258,35 +258,26 @@ protected:
|
|||
|
||||
if(m_blur && blurdetected && i == 1)
|
||||
{
|
||||
src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth();
|
||||
src[i].y = (tex[i].m_scale.y * r.top + 1) / tex[i].GetHeight();
|
||||
src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth();
|
||||
src[i].w = (tex[i].m_scale.y * r.bottom + 1) / tex[i].GetHeight();
|
||||
}
|
||||
else
|
||||
{
|
||||
src[i].x = tex[i].m_scale.x * r.left / tex[i].GetWidth();
|
||||
src[i].y = tex[i].m_scale.y * r.top / tex[i].GetHeight();
|
||||
src[i].z = tex[i].m_scale.x * r.right / tex[i].GetWidth();
|
||||
src[i].w = tex[i].m_scale.y * r.bottom / tex[i].GetHeight();
|
||||
r += GSVector4i(0, 1).xyxy();
|
||||
}
|
||||
|
||||
GSVector2 o;
|
||||
GSVector4 scale = GSVector4(tex[i].m_scale).xyxy();
|
||||
|
||||
o.x = 0;
|
||||
o.y = 0;
|
||||
src[i] = GSVector4(r) * scale / GSVector4(tex[i].GetSize()).xyxy();
|
||||
|
||||
GSVector2 o(0, 0);
|
||||
|
||||
if(dr[i].top - baseline >= 4) // 2?
|
||||
{
|
||||
o.y = tex[i].m_scale.y * (dr[i].top - baseline);
|
||||
}
|
||||
|
||||
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) o.y /= 2;
|
||||
if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD)
|
||||
{
|
||||
o.y /= 2;
|
||||
}
|
||||
|
||||
dst[i].x = o.x;
|
||||
dst[i].y = o.y;
|
||||
dst[i].z = o.x + tex[i].m_scale.x * r.width();
|
||||
dst[i].w = o.y + tex[i].m_scale.y * r.height();
|
||||
dst[i] = GSVector4(o).xyxy() + scale * GSVector4(r.rsize());
|
||||
|
||||
fs.x = max(fs.x, (int)(dst[i].z + 0.5f));
|
||||
fs.y = max(fs.y, (int)(dst[i].w + 0.5f));
|
||||
|
@ -301,12 +292,7 @@ protected:
|
|||
|
||||
if(tex[0] || tex[1])
|
||||
{
|
||||
GSVector4 c;
|
||||
|
||||
c.r = (float)m_regs->BGCOLOR.R / 255;
|
||||
c.g = (float)m_regs->BGCOLOR.G / 255;
|
||||
c.b = (float)m_regs->BGCOLOR.B / 255;
|
||||
c.a = (float)m_regs->PMODE.ALP / 255;
|
||||
GSVector4 c = GSVector4((int)m_regs->BGCOLOR.R, (int)m_regs->BGCOLOR.G, (int)m_regs->BGCOLOR.B, (int)m_regs->PMODE.ALP) / 255;
|
||||
|
||||
m_dev.Merge(tex, src, dst, fs, slbg, mmod, c);
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ protected:
|
|||
maxv = maxv.maxv(v0);
|
||||
}
|
||||
|
||||
mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH, 16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).rcpnr();
|
||||
mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).xyxy().rcpnr();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -211,12 +211,8 @@ protected:
|
|||
r = vr + GSVector4i(-1, -1, 1, 1); // one more pixel because of bilinear filtering
|
||||
|
||||
GSVector2i bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs;
|
||||
GSVector2i bsm(bs.x - 1, bs.y - 1);
|
||||
|
||||
r.left = max(r.left & ~bsm.x, 0);
|
||||
r.top = max(r.top & ~bsm.y, 0);
|
||||
r.right = min((r.right + bsm.x) & ~bsm.x, w);
|
||||
r.bottom = min((r.bottom + bsm.y) & ~bsm.y, h);
|
||||
r = r.ralign<GSVector4i::Outside>(bs).rintersect(GSVector4i(0, 0, w, h));
|
||||
}
|
||||
|
||||
void VSync(int field)
|
||||
|
|
|
@ -449,7 +449,7 @@ protected:
|
|||
|
||||
MinMaxUV(w, h, r, p.sel.fst);
|
||||
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, &r);
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
||||
|
|
|
@ -235,7 +235,7 @@ GSVector4i GSState::GetFrameRect(int i)
|
|||
return r;
|
||||
}
|
||||
|
||||
GSVector4i GSState::GetDeviceRect(int i)
|
||||
GSVector2i GSState::GetDeviceSize(int i)
|
||||
{
|
||||
// TODO: other params of SMODE1 should affect the true device display size
|
||||
|
||||
|
@ -257,7 +257,7 @@ GSVector4i GSState::GetDeviceRect(int i)
|
|||
h = (m_regs->SMODE1.CMOD & 1) ? 512 : 448;
|
||||
}
|
||||
|
||||
return GSVector4i(0, 0, w, h);
|
||||
return GSVector2i(w, h);
|
||||
|
||||
}
|
||||
|
||||
|
@ -279,7 +279,7 @@ bool GSState::IsEnabled(int i)
|
|||
|
||||
int GSState::GetFPS()
|
||||
{
|
||||
return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) / (m_regs->SMODE2.INT ? 1 : 2);
|
||||
return ((m_regs->SMODE1.CMOD & 1) ? 50 : 60) >> (1 - m_regs->SMODE2.INT);
|
||||
}
|
||||
|
||||
// GIFPackedRegHandler*
|
||||
|
@ -486,7 +486,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
|
|||
|
||||
bool wt = m_mem.m_clut.WriteTest(r->TEX0, m_env.TEXCLUT);
|
||||
|
||||
if(wt || PRIM->CTXT == i && !((GSVector4i)r->TEX0).eq(m_env.CTXT[i].TEX0))
|
||||
if(wt || PRIM->CTXT == i && r->TEX0 != m_env.CTXT[i].TEX0)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -511,7 +511,7 @@ template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->CLAMP).eq(m_env.CTXT[i].CLAMP))
|
||||
if(PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -552,7 +552,7 @@ void GSState::GIFRegHandlerNOP(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerTEX1(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->TEX1).eq(m_env.CTXT[i].TEX1))
|
||||
if(PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -587,7 +587,7 @@ template<int i> void GSState::GIFRegHandlerXYOFFSET(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerPRMODECONT(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->PRMODECONT).eq(m_env.PRMODECONT))
|
||||
if(r->PRMODECONT != m_env.PRMODECONT)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -621,7 +621,7 @@ void GSState::GIFRegHandlerPRMODE(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->TEXCLUT).eq(m_env.TEXCLUT))
|
||||
if(r->TEXCLUT != m_env.TEXCLUT)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -631,7 +631,7 @@ void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->SCANMSK).eq(m_env.SCANMSK))
|
||||
if(r->SCANMSK != m_env.SCANMSK)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -641,7 +641,7 @@ void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP1).eq(m_env.CTXT[i].MIPTBP1))
|
||||
if(PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -651,7 +651,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->MIPTBP2).eq(m_env.CTXT[i].MIPTBP2))
|
||||
if(PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -661,7 +661,7 @@ template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerTEXA(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->TEXA).eq(m_env.TEXA))
|
||||
if(r->TEXA != m_env.TEXA)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -671,7 +671,7 @@ void GSState::GIFRegHandlerTEXA(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerFOGCOL(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->FOGCOL).eq(m_env.FOGCOL))
|
||||
if(r->FOGCOL != m_env.FOGCOL)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -688,7 +688,7 @@ void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerSCISSOR(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->SCISSOR).eq(m_env.CTXT[i].SCISSOR))
|
||||
if(PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -705,7 +705,7 @@ template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
|
|||
ASSERT(r->ALPHA.C != 3);
|
||||
ASSERT(r->ALPHA.D != 3);
|
||||
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->ALPHA).eq(m_env.CTXT[i].ALPHA))
|
||||
if(PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -721,7 +721,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r)
|
|||
{
|
||||
bool update = false;
|
||||
|
||||
if(!((GSVector4i)r->DIMX).eq(m_env.DIMX))
|
||||
if(r->DIMX != m_env.DIMX)
|
||||
{
|
||||
Flush();
|
||||
|
||||
|
@ -738,7 +738,7 @@ void GSState::GIFRegHandlerDIMX(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerDTHE(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->DTHE).eq(m_env.DTHE))
|
||||
if(r->DTHE != m_env.DTHE)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -748,7 +748,7 @@ void GSState::GIFRegHandlerDTHE(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->COLCLAMP).eq(m_env.COLCLAMP))
|
||||
if(r->COLCLAMP != m_env.COLCLAMP)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -758,7 +758,7 @@ void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->TEST).eq(m_env.CTXT[i].TEST))
|
||||
if(PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -768,7 +768,7 @@ template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerPABE(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->PABE).eq(m_env.PABE))
|
||||
if(r->PABE != m_env.PABE)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -778,7 +778,7 @@ void GSState::GIFRegHandlerPABE(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->FBA).eq(m_env.CTXT[i].FBA))
|
||||
if(PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -788,7 +788,7 @@ template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
|
|||
|
||||
template<int i> void GSState::GIFRegHandlerFRAME(GIFReg* r)
|
||||
{
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->FRAME).eq(m_env.CTXT[i].FRAME))
|
||||
if(PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -807,7 +807,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
|
|||
|
||||
r->ZBUF.PSM |= 0x30;
|
||||
|
||||
if(PRIM->CTXT == i && !((GSVector4i)r->ZBUF).eq(m_env.CTXT[i].ZBUF))
|
||||
if(PRIM->CTXT == i && r->ZBUF != m_env.CTXT[i].ZBUF)
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -825,7 +825,7 @@ template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->BITBLTBUF).eq(m_env.BITBLTBUF))
|
||||
if(r->BITBLTBUF != m_env.BITBLTBUF)
|
||||
{
|
||||
FlushWrite();
|
||||
}
|
||||
|
@ -845,7 +845,7 @@ void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->TRXPOS).eq(m_env.TRXPOS))
|
||||
if(r->TRXPOS != m_env.TRXPOS)
|
||||
{
|
||||
FlushWrite();
|
||||
}
|
||||
|
@ -855,7 +855,7 @@ void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerTRXREG(GIFReg* r)
|
||||
{
|
||||
if(!((GSVector4i)r->TRXREG).eq(m_env.TRXREG))
|
||||
if(r->TRXREG != m_env.TRXREG)
|
||||
{
|
||||
FlushWrite();
|
||||
}
|
||||
|
@ -1036,9 +1036,15 @@ void GSState::Move()
|
|||
int xinc = 1;
|
||||
int yinc = 1;
|
||||
|
||||
if(sx < dx) {sx += w - 1; dx += w - 1; xinc = -1;}
|
||||
if(sy < dy) {sy += h - 1; dy += h - 1; yinc = -1;}
|
||||
|
||||
if(m_env.TRXPOS.DIRX) {sx += w - 1; dx += w - 1; xinc = -1;}
|
||||
if(m_env.TRXPOS.DIRY) {sy += h - 1; dy += h - 1; yinc = -1;}
|
||||
/*
|
||||
printf("%05x %d %d => %05x %d %d (%d%d), %d %d %d %d %d %d\n",
|
||||
m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM,
|
||||
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM,
|
||||
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
|
||||
sx, sy, dx, dy, w, h);
|
||||
*/
|
||||
/*
|
||||
GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp;
|
||||
GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp;
|
||||
|
@ -1051,19 +1057,80 @@ void GSState::Move()
|
|||
const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM];
|
||||
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM];
|
||||
|
||||
if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32)
|
||||
if(spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
|
||||
{
|
||||
int* soffset = spsm.rowOffset[0];
|
||||
int* doffset = dpsm.rowOffset[0];
|
||||
|
||||
if(spsm.trbpp == 32)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
|
||||
{
|
||||
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(spsm.trbpp == 24)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
|
||||
{
|
||||
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel24(dbase + doffset[dx], m_mem.ReadPixel24(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else // if(spsm.trbpp == 16)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
|
||||
{
|
||||
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel16(dbase + doffset[dx], m_mem.ReadPixel16(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
|
||||
{
|
||||
uint32 sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
uint32 sbase = GSLocalMemory::PixelAddress8(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
int* soffset = spsm.rowOffset[sy & 7];
|
||||
|
||||
uint32 dbase = dpsm.pa(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
uint32 dbase = GSLocalMemory::PixelAddress8(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
int* doffset = dpsm.rowOffset[dy & 7];
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel32(dbase + doffset[dx], m_mem.ReadPixel32(sbase + soffset[sx]));
|
||||
m_mem.WritePixel8(dbase + doffset[dx], m_mem.ReadPixel8(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
||||
{
|
||||
for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w)
|
||||
{
|
||||
uint32 sbase = GSLocalMemory::PixelAddress4(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
|
||||
int* soffset = spsm.rowOffset[sy & 7];
|
||||
|
||||
uint32 dbase = GSLocalMemory::PixelAddress4(0, dy, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
|
||||
int* doffset = dpsm.rowOffset[dy & 7];
|
||||
|
||||
for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
|
||||
{
|
||||
m_mem.WritePixel4(dbase + doffset[dx], m_mem.ReadPixel4(sbase + soffset[sx]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -221,7 +221,7 @@ public:
|
|||
|
||||
GSVector4i GetDisplayRect(int i = -1);
|
||||
GSVector4i GetFrameRect(int i = -1);
|
||||
GSVector4i GetDeviceRect(int i = -1);
|
||||
GSVector2i GetDeviceSize(int i = -1);
|
||||
|
||||
bool IsEnabled(int i);
|
||||
|
||||
|
|
|
@ -376,7 +376,7 @@ public:
|
|||
int ww = (int)(fr.left + rt->m_TEX0.TBW * 64);
|
||||
int hh = (int)(fr.top + m_renderer->GetDisplayRect().height());
|
||||
|
||||
if(hh <= m_renderer->GetDeviceRect().height() / 2)
|
||||
if(hh <= m_renderer->GetDeviceSize().y / 2)
|
||||
{
|
||||
hh *= 2;
|
||||
}
|
||||
|
|
|
@ -69,11 +69,7 @@ void GSTextureCache10::GSRenderTargetHW10::Update()
|
|||
|
||||
texture.Update(GSVector4i(0, 0, w, h), buff, pitch);
|
||||
|
||||
GSVector4 dr(
|
||||
m_texture.m_scale.x * r.left,
|
||||
m_texture.m_scale.y * r.top,
|
||||
m_texture.m_scale.x * r.right,
|
||||
m_texture.m_scale.y * r.bottom);
|
||||
GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy();
|
||||
|
||||
m_renderer->m_dev.StretchRect(texture, m_texture, dr);
|
||||
|
||||
|
@ -103,12 +99,7 @@ void GSTextureCache10::GSRenderTargetHW10::Read(const GSVector4i& r)
|
|||
int w = r.width();
|
||||
int h = r.height();
|
||||
|
||||
GSVector4 src;
|
||||
|
||||
src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth();
|
||||
src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight();
|
||||
src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth();
|
||||
src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight();
|
||||
GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy();
|
||||
|
||||
DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
|
||||
|
@ -269,9 +260,14 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt)
|
|||
|
||||
m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight());
|
||||
|
||||
GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy();
|
||||
GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy();
|
||||
|
||||
int bw = 64;
|
||||
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
|
||||
|
||||
GSVector4i br(0, 0, bw, bh);
|
||||
|
||||
int sw = (int)rt->m_TEX0.TBW << 6;
|
||||
|
||||
int dw = (int)m_TEX0.TBW << 6;
|
||||
|
@ -287,18 +283,9 @@ bool GSTextureCache10::GSTextureHW10::Create(GSRenderTarget* rt)
|
|||
int sx = o % sw;
|
||||
int sy = o / sw;
|
||||
|
||||
GSVector4 src, dst;
|
||||
|
||||
src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth();
|
||||
src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight();
|
||||
src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth();
|
||||
src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight();
|
||||
|
||||
dst.x = rt->m_texture.m_scale.x * dx;
|
||||
dst.y = rt->m_texture.m_scale.y * dy;
|
||||
dst.z = rt->m_texture.m_scale.x * (dx + bw);
|
||||
dst.w = rt->m_texture.m_scale.y * (dy + bh);
|
||||
|
||||
GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
|
||||
GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
|
||||
|
||||
m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst);
|
||||
|
||||
// TODO: this is quite a lot of StretchRect, do it with one Draw
|
||||
|
|
|
@ -71,11 +71,7 @@ void GSTextureCache9::GSRenderTarget9::Update()
|
|||
|
||||
// m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * 4);
|
||||
|
||||
GSVector4 dr(
|
||||
m_texture.m_scale.x * r.left,
|
||||
m_texture.m_scale.y * r.top,
|
||||
m_texture.m_scale.x * r.right,
|
||||
m_texture.m_scale.y * r.bottom);
|
||||
GSVector4 dr = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy();
|
||||
|
||||
m_renderer->m_dev.StretchRect(texture, m_texture, dr);
|
||||
}
|
||||
|
@ -106,12 +102,7 @@ void GSTextureCache9::GSRenderTarget9::Read(const GSVector4i& r)
|
|||
int w = r.width();
|
||||
int h = r.height();
|
||||
|
||||
GSVector4 src;
|
||||
|
||||
src.x = m_texture.m_scale.x * r.left / m_texture.GetWidth();
|
||||
src.y = m_texture.m_scale.y * r.top / m_texture.GetHeight();
|
||||
src.z = m_texture.m_scale.x * r.right / m_texture.GetWidth();
|
||||
src.w = m_texture.m_scale.y * r.bottom / m_texture.GetHeight();
|
||||
GSVector4 src = GSVector4(r) * GSVector4(m_texture.m_scale).xyxy() / GSVector4(m_texture.GetSize()).xyxy();
|
||||
|
||||
Texture offscreen;
|
||||
|
||||
|
@ -270,14 +261,20 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt)
|
|||
|
||||
m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.GetWidth(), rt->m_texture.GetHeight());
|
||||
|
||||
GSVector4 size = GSVector4(rt->m_texture.GetSize()).xyxy();
|
||||
GSVector4 scale = GSVector4(rt->m_texture.m_scale).xyxy();
|
||||
|
||||
int bw = 64;
|
||||
int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
|
||||
|
||||
GSVector4i br(0, 0, bw, bh);
|
||||
|
||||
int sw = (int)rt->m_TEX0.TBW << 6;
|
||||
|
||||
int dw = (int)m_TEX0.TBW << 6;
|
||||
int dh = 1 << m_TEX0.TH;
|
||||
|
||||
if(sw != 0)
|
||||
for(int dy = 0; dy < dh; dy += bh)
|
||||
{
|
||||
for(int dx = 0; dx < dw; dx += bw)
|
||||
|
@ -287,17 +284,8 @@ bool GSTextureCache9::GSTexture9::Create(GSRenderTarget* rt)
|
|||
int sx = o % sw;
|
||||
int sy = o / sw;
|
||||
|
||||
GSVector4 src, dst;
|
||||
|
||||
src.x = rt->m_texture.m_scale.x * sx / rt->m_texture.GetWidth();
|
||||
src.y = rt->m_texture.m_scale.y * sy / rt->m_texture.GetHeight();
|
||||
src.z = rt->m_texture.m_scale.x * (sx + bw) / rt->m_texture.GetWidth();
|
||||
src.w = rt->m_texture.m_scale.y * (sy + bh) / rt->m_texture.GetHeight();
|
||||
|
||||
dst.x = rt->m_texture.m_scale.x * dx;
|
||||
dst.y = rt->m_texture.m_scale.y * dy;
|
||||
dst.z = rt->m_texture.m_scale.x * (dx + bw);
|
||||
dst.w = rt->m_texture.m_scale.y * (dy + bh);
|
||||
GSVector4 src = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size;
|
||||
GSVector4 dst = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale;
|
||||
|
||||
m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst);
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ GSTextureCacheSW::~GSTextureCacheSW()
|
|||
RemoveAll();
|
||||
}
|
||||
|
||||
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r)
|
||||
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
|
||||
{
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
||||
|
@ -52,7 +52,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
|
|||
continue;
|
||||
}
|
||||
|
||||
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && !((GSVector4i)TEXA).eq(t2->m_TEXA)))
|
||||
if((psm.trbpp == 16 || psm.trbpp == 24) && (t2->m_TEX0.TCC != TEX0.TCC || TEX0.TCC && TEXA != t2->m_TEXA))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -161,12 +161,7 @@ void GSTextureCacheSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, cons
|
|||
|
||||
GSVector2i s = (bp & 31) == 0 ? psm.pgs : psm.bs;
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
r.left = rect.left & ~(s.x - 1);
|
||||
r.top = rect.top & ~(s.y - 1);
|
||||
r.right = (rect.right + (s.x - 1)) & ~(s.x - 1);
|
||||
r.bottom = (rect.bottom + (s.y - 1)) & ~(s.y - 1);
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
|
||||
|
||||
for(int y = r.top; y < r.bottom; y += s.y)
|
||||
{
|
||||
|
@ -215,7 +210,7 @@ GSTextureCacheSW::GSTexture::~GSTexture()
|
|||
}
|
||||
}
|
||||
|
||||
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* rect)
|
||||
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
|
||||
{
|
||||
if(m_complete)
|
||||
{
|
||||
|
@ -249,15 +244,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
m_tw = max(psm.pal > 0 ? 5 : 3, TEX0.TW); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
}
|
||||
|
||||
GSVector4i r(0, 0, tw, th);
|
||||
|
||||
if(rect)
|
||||
{
|
||||
r.left = rect->left & ~(s.x - 1);
|
||||
r.top = rect->top & ~(s.y - 1);
|
||||
r.right = (rect->right + (s.x - 1)) & ~(s.x - 1);
|
||||
r.bottom = (rect->bottom + (s.y - 1)) & ~(s.y - 1);
|
||||
}
|
||||
GSVector4i r = rect.ralign<GSVector4i::Outside>(s);
|
||||
|
||||
if(r.left == 0 && r.top == 0 && r.right == tw && r.bottom == th)
|
||||
{
|
||||
|
|
|
@ -44,7 +44,7 @@ public:
|
|||
explicit GSTexture(GSState* state);
|
||||
virtual ~GSTexture();
|
||||
|
||||
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL);
|
||||
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||
};
|
||||
|
||||
protected:
|
||||
|
@ -56,7 +56,7 @@ public:
|
|||
GSTextureCacheSW(GSState* state);
|
||||
virtual ~GSTextureCacheSW();
|
||||
|
||||
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i* r = NULL);
|
||||
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||
|
||||
void RemoveAll();
|
||||
void IncAge();
|
||||
|
|
|
@ -155,6 +155,11 @@ public:
|
|||
return bottom - top;
|
||||
}
|
||||
|
||||
GSVector4i rsize() const
|
||||
{
|
||||
return *this - xyxy(); // same as GSVector4i(0, 0, width(), height());
|
||||
}
|
||||
|
||||
bool rempty() const
|
||||
{
|
||||
return (*this < zwzw()).mask() != 0x00ff;
|
||||
|
@ -178,6 +183,28 @@ public:
|
|||
return sat_i32(a);
|
||||
}
|
||||
|
||||
enum RoundMode {Outside, Inside, NegInf, PosInf};
|
||||
|
||||
template<int mode> GSVector4i ralign(const GSVector2i& a) const
|
||||
{
|
||||
// a must be 1 << n
|
||||
|
||||
GSVector4i mask = GSVector4i(a) - GSVector4i(1, 1);
|
||||
|
||||
GSVector4i v;
|
||||
|
||||
switch(mode)
|
||||
{
|
||||
case Inside: v = *this + mask; break;
|
||||
case Outside: v = *this + mask.zwxy(); break;
|
||||
case NegInf: v = *this; break;
|
||||
case PosInf: v = *this + mask.zwzw(); break;
|
||||
default: ASSERT(0); break;
|
||||
}
|
||||
|
||||
return v.andnot(mask.xyxy());
|
||||
}
|
||||
|
||||
GSVector4i fit(int arx, int ary) const;
|
||||
|
||||
GSVector4i fit(int preset) const;
|
||||
|
@ -2194,7 +2221,9 @@ public:
|
|||
|
||||
GSVector4(int x, int y, int z, int w)
|
||||
{
|
||||
m = _mm_cvtepi32_ps(_mm_set_epi32(w, z, y, x));
|
||||
GSVector4i v(x, y, z, w);
|
||||
|
||||
m = _mm_cvtepi32_ps(v.m);
|
||||
}
|
||||
|
||||
GSVector4(int x, int y)
|
||||
|
@ -2212,6 +2241,11 @@ public:
|
|||
m = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&v));
|
||||
}
|
||||
|
||||
explicit GSVector4(const GSVector2i& v)
|
||||
{
|
||||
m = _mm_cvtepi32_ps(_mm_loadl_epi64((__m128i*)&v));
|
||||
}
|
||||
|
||||
explicit GSVector4(float f)
|
||||
{
|
||||
m = _mm_set1_ps(f);
|
||||
|
|
|
@ -1399,6 +1399,14 @@
|
|||
<File
|
||||
RelativePath=".\GSTextureCacheSW.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Release SSE4|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AssemblerOutput="4"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSTextureFX10.cpp"
|
||||
|
|
Loading…
Reference in New Issue