mirror of https://github.com/PCSX2/pcsx2.git
GSdx: just a small mipmapping optimization when lod is constant, it was already a TODO.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4518 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ef979dde29
commit
cc28241128
|
@ -40,6 +40,16 @@ void GSDrawScanline::BeginDraw(const void* param)
|
||||||
{
|
{
|
||||||
memcpy(&m_global, param, sizeof(m_global));
|
memcpy(&m_global, param, sizeof(m_global));
|
||||||
|
|
||||||
|
if(m_global.sel.mmin && m_global.sel.lcm)
|
||||||
|
{
|
||||||
|
GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.x);
|
||||||
|
|
||||||
|
v = v.upl16(v);
|
||||||
|
|
||||||
|
m_local.temp.uv_minmax[0] = v.upl32(v);
|
||||||
|
m_local.temp.uv_minmax[1] = v.uph32(v);
|
||||||
|
}
|
||||||
|
|
||||||
m_ds = m_ds_map[m_global.sel];
|
m_ds = m_ds_map[m_global.sel];
|
||||||
|
|
||||||
if(m_global.sel.aa1)
|
if(m_global.sel.aa1)
|
||||||
|
|
|
@ -293,8 +293,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
{
|
{
|
||||||
// edx = &m_local.d[skip]
|
// edx = &m_local.d[skip]
|
||||||
|
|
||||||
shl(edx, 3);
|
lea(edx, ptr[edx * 8 + (size_t)m_local.d]);
|
||||||
lea(edx, ptr[edx + (size_t)m_local.d]);
|
|
||||||
|
|
||||||
// ebx = &v
|
// ebx = &v
|
||||||
|
|
||||||
|
@ -1243,18 +1242,8 @@ return;
|
||||||
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
||||||
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
||||||
|
|
||||||
// TODO: precalc
|
vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
|
||||||
|
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
|
||||||
vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
|
|
||||||
|
|
||||||
vpsrlw(xmm4, xmm0);
|
|
||||||
|
|
||||||
vpunpcklwd(xmm4, xmm4);
|
|
||||||
vpunpckldq(xmm5, xmm4, xmm4);
|
|
||||||
vpunpckhdq(xmm6, xmm4, xmm4);
|
|
||||||
|
|
||||||
vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
|
|
||||||
vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// xmm2 = m_local.temp.uv[0] = u (level m)
|
// xmm2 = m_local.temp.uv[0] = u (level m)
|
||||||
|
@ -2762,7 +2751,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
|
|
||||||
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
|
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
|
||||||
|
|
||||||
if(m_sel.mmin)
|
if(m_sel.mmin && !m_sel.lcm)
|
||||||
{
|
{
|
||||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
|
||||||
|
|
||||||
|
@ -2790,6 +2779,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if(m_sel.mmin && m_sel.lcm)
|
||||||
|
{
|
||||||
|
mov(ebx, ptr[&lod_i->u32[0]]);
|
||||||
|
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||||
|
}
|
||||||
|
|
||||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||||
|
|
||||||
for(int i = 0; i < pixels; i++)
|
for(int i = 0; i < pixels; i++)
|
||||||
|
|
|
@ -290,8 +290,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
{
|
{
|
||||||
// edx = &m_local.d[skip]
|
// edx = &m_local.d[skip]
|
||||||
|
|
||||||
shl(edx, 3);
|
lea(edx, ptr[edx * 8 + (size_t)m_local.d]);
|
||||||
lea(edx, ptr[edx + (size_t)m_local.d]);
|
|
||||||
|
|
||||||
// ebx = &v
|
// ebx = &v
|
||||||
|
|
||||||
|
@ -1295,20 +1294,8 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||||
movdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
movdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
||||||
movdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
movdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
||||||
|
|
||||||
// TODO: precalc
|
movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
|
||||||
|
movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
|
||||||
movq(xmm4, ptr[&m_local.gd->t.minmax]);
|
|
||||||
|
|
||||||
psrlw(xmm4, xmm0);
|
|
||||||
|
|
||||||
punpcklwd(xmm4, xmm4);
|
|
||||||
movdqa(xmm5, xmm4);
|
|
||||||
movdqa(xmm6, xmm4);
|
|
||||||
punpckldq(xmm5, xmm4);
|
|
||||||
punpckhdq(xmm6, xmm4);
|
|
||||||
|
|
||||||
movdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
|
|
||||||
movdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// xmm2 = m_local.temp.uv[0] = u (level m)
|
// xmm2 = m_local.temp.uv[0] = u (level m)
|
||||||
|
@ -2924,7 +2911,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
|
|
||||||
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
|
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
|
||||||
|
|
||||||
if(m_sel.mmin)
|
if(m_sel.mmin && !m_sel.lcm)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x401
|
#if _M_SSE >= 0x401
|
||||||
|
|
||||||
|
@ -3075,6 +3062,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if(m_sel.mmin && m_sel.lcm)
|
||||||
|
{
|
||||||
|
mov(ebx, ptr[&lod_i->u32[0]]);
|
||||||
|
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
|
||||||
|
}
|
||||||
|
|
||||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||||
|
|
||||||
#if _M_SSE >= 0x401
|
#if _M_SSE >= 0x401
|
||||||
|
|
|
@ -785,7 +785,7 @@ GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, volatile long& sync)
|
||||||
|
|
||||||
GSRasterizerMT::~GSRasterizerMT()
|
GSRasterizerMT::~GSRasterizerMT()
|
||||||
{
|
{
|
||||||
Draw(NULL);
|
Draw(NULL);
|
||||||
|
|
||||||
CloseThread();
|
CloseThread();
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,8 +137,6 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
|
||||||
|
|
||||||
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
|
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
|
||||||
{
|
{
|
||||||
const GSScanlineGlobalData* gd;
|
|
||||||
|
|
||||||
struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4];
|
struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4];
|
||||||
struct step {GSVector4 z, stq; GSVector4i c, f;} d4;
|
struct step {GSVector4 z, stq; GSVector4i c, f;} d4;
|
||||||
struct {GSVector4i rb, ga;} c;
|
struct {GSVector4i rb, ga;} c;
|
||||||
|
@ -156,10 +154,15 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
|
||||||
GSVector4i cov;
|
GSVector4i cov;
|
||||||
|
|
||||||
// mipmapping
|
// mipmapping
|
||||||
|
|
||||||
struct {GSVector4i i, f;} lod;
|
struct {GSVector4i i, f;} lod;
|
||||||
GSVector4i uv[2];
|
GSVector4i uv[2];
|
||||||
GSVector4i uv_minmax[2];
|
GSVector4i uv_minmax[2];
|
||||||
GSVector4i trb, tga;
|
GSVector4i trb, tga;
|
||||||
GSVector4i test;
|
GSVector4i test;
|
||||||
} temp;
|
} temp;
|
||||||
|
|
||||||
|
//
|
||||||
|
|
||||||
|
const GSScanlineGlobalData* gd;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue