GSdx: just a small mipmapping optimization when lod is constant, it was already a TODO.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4518 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-04-01 12:36:21 +00:00
parent ef979dde29
commit cc28241128
5 changed files with 36 additions and 35 deletions

View File

@ -40,6 +40,16 @@ void GSDrawScanline::BeginDraw(const void* param)
{ {
memcpy(&m_global, param, sizeof(m_global)); memcpy(&m_global, param, sizeof(m_global));
if(m_global.sel.mmin && m_global.sel.lcm)
{
GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.x);
v = v.upl16(v);
m_local.temp.uv_minmax[0] = v.upl32(v);
m_local.temp.uv_minmax[1] = v.uph32(v);
}
m_ds = m_ds_map[m_global.sel]; m_ds = m_ds_map[m_global.sel];
if(m_global.sel.aa1) if(m_global.sel.aa1)

View File

@ -293,8 +293,7 @@ void GSDrawScanlineCodeGenerator::Init()
{ {
// edx = &m_local.d[skip] // edx = &m_local.d[skip]
shl(edx, 3); lea(edx, ptr[edx * 8 + (size_t)m_local.d]);
lea(edx, ptr[edx + (size_t)m_local.d]);
// ebx = &v // ebx = &v
@ -1243,18 +1242,8 @@ return;
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
// TODO: precalc vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
vpsrlw(xmm4, xmm0);
vpunpcklwd(xmm4, xmm4);
vpunpckldq(xmm5, xmm4, xmm4);
vpunpckhdq(xmm6, xmm4, xmm4);
vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
} }
// xmm2 = m_local.temp.uv[0] = u (level m) // xmm2 = m_local.temp.uv[0] = u (level m)
@ -2762,7 +2751,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
if(m_sel.mmin) if(m_sel.mmin && !m_sel.lcm)
{ {
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
@ -2790,6 +2779,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
} }
else else
{ {
if(m_sel.mmin && m_sel.lcm)
{
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
}
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
for(int i = 0; i < pixels; i++) for(int i = 0; i < pixels; i++)

View File

@ -290,8 +290,7 @@ void GSDrawScanlineCodeGenerator::Init()
{ {
// edx = &m_local.d[skip] // edx = &m_local.d[skip]
shl(edx, 3); lea(edx, ptr[edx * 8 + (size_t)m_local.d]);
lea(edx, ptr[edx + (size_t)m_local.d]);
// ebx = &v // ebx = &v
@ -1295,20 +1294,8 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
movdqa(ptr[&m_local.temp.uv[0]], xmm2); movdqa(ptr[&m_local.temp.uv[0]], xmm2);
movdqa(ptr[&m_local.temp.uv[1]], xmm3); movdqa(ptr[&m_local.temp.uv[1]], xmm3);
// TODO: precalc movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]);
movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]);
movq(xmm4, ptr[&m_local.gd->t.minmax]);
psrlw(xmm4, xmm0);
punpcklwd(xmm4, xmm4);
movdqa(xmm5, xmm4);
movdqa(xmm6, xmm4);
punpckldq(xmm5, xmm4);
punpckhdq(xmm6, xmm4);
movdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5);
movdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6);
} }
// xmm2 = m_local.temp.uv[0] = u (level m) // xmm2 = m_local.temp.uv[0] = u (level m)
@ -2924,7 +2911,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i;
if(m_sel.mmin) if(m_sel.mmin && !m_sel.lcm)
{ {
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
@ -3075,6 +3062,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
} }
else else
{ {
if(m_sel.mmin && m_sel.lcm)
{
mov(ebx, ptr[&lod_i->u32[0]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
}
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401

View File

@ -137,8 +137,6 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
{ {
const GSScanlineGlobalData* gd;
struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4]; struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4];
struct step {GSVector4 z, stq; GSVector4i c, f;} d4; struct step {GSVector4 z, stq; GSVector4i c, f;} d4;
struct {GSVector4i rb, ga;} c; struct {GSVector4i rb, ga;} c;
@ -156,10 +154,15 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
GSVector4i cov; GSVector4i cov;
// mipmapping // mipmapping
struct {GSVector4i i, f;} lod; struct {GSVector4i i, f;} lod;
GSVector4i uv[2]; GSVector4i uv[2];
GSVector4i uv_minmax[2]; GSVector4i uv_minmax[2];
GSVector4i trb, tga; GSVector4i trb, tga;
GSVector4i test; GSVector4i test;
} temp; } temp;
//
const GSScanlineGlobalData* gd;
}; };