From cc28241128cc784821c64583dba6f84edb74e78d Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 1 Apr 2011 12:36:21 +0000 Subject: [PATCH] GSdx: just a small mipmapping optimization when lod is constant, it was already a TODO. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4518 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDrawScanline.cpp | 10 +++++++ .../GSDrawScanlineCodeGenerator.x86.avx.cpp | 25 +++++++---------- .../GSdx/GSDrawScanlineCodeGenerator.x86.cpp | 27 +++++++------------ plugins/GSdx/GSRasterizer.cpp | 2 +- plugins/GSdx/GSScanlineEnvironment.h | 7 +++-- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index ad995dedbf..d6c30bf89f 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -40,6 +40,16 @@ void GSDrawScanline::BeginDraw(const void* param) { memcpy(&m_global, param, sizeof(m_global)); + if(m_global.sel.mmin && m_global.sel.lcm) + { + GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.x); + + v = v.upl16(v); + + m_local.temp.uv_minmax[0] = v.upl32(v); + m_local.temp.uv_minmax[1] = v.uph32(v); + } + m_ds = m_ds_map[m_global.sel]; if(m_global.sel.aa1) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index 60c2ecd300..c5a57dc86a 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -293,8 +293,7 @@ void GSDrawScanlineCodeGenerator::Init() { // edx = &m_local.d[skip] - shl(edx, 3); - lea(edx, ptr[edx + (size_t)m_local.d]); + lea(edx, ptr[edx * 8 + (size_t)m_local.d]); // ebx = &v @@ -1243,18 +1242,8 @@ return; vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); - // TODO: precalc - - vmovq(xmm4, ptr[&m_local.gd->t.minmax]); - - vpsrlw(xmm4, xmm0); - - vpunpcklwd(xmm4, xmm4); - vpunpckldq(xmm5, xmm4, xmm4); - vpunpckhdq(xmm6, xmm4, xmm4); - - vmovdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5); - vmovdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6); + vmovdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); + vmovdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); } // xmm2 = m_local.temp.uv[0] = u (level m) @@ -2762,7 +2751,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin) + if(m_sel.mmin && !m_sel.lcm) { const int r[] = {5, 6, 2, 4, 0, 1, 3, 7}; @@ -2790,6 +2779,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) } else { + if(m_sel.mmin && m_sel.lcm) + { + mov(ebx, ptr[&lod_i->u32[0]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + } + const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; for(int i = 0; i < pixels; i++) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index 2f97cd953d..5eac262f19 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -290,8 +290,7 @@ void GSDrawScanlineCodeGenerator::Init() { // edx = &m_local.d[skip] - shl(edx, 3); - lea(edx, ptr[edx + (size_t)m_local.d]); + lea(edx, ptr[edx * 8 + (size_t)m_local.d]); // ebx = &v @@ -1295,20 +1294,8 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() movdqa(ptr[&m_local.temp.uv[0]], xmm2); movdqa(ptr[&m_local.temp.uv[1]], xmm3); - // TODO: precalc - - movq(xmm4, ptr[&m_local.gd->t.minmax]); - - psrlw(xmm4, xmm0); - - punpcklwd(xmm4, xmm4); - movdqa(xmm5, xmm4); - movdqa(xmm6, xmm4); - punpckldq(xmm5, xmm4); - punpckhdq(xmm6, xmm4); - - movdqa(ptr[&m_local.temp.uv_minmax[0]], xmm5); - movdqa(ptr[&m_local.temp.uv_minmax[1]], xmm6); + movdqa(xmm5, ptr[&m_local.temp.uv_minmax[0]]); + movdqa(xmm6, ptr[&m_local.temp.uv_minmax[1]]); } // xmm2 = m_local.temp.uv[0] = u (level m) @@ -2924,7 +2911,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) const GSVector4i* lod_i = m_sel.lcm ? &m_local.gd->lod.i : &m_local.temp.lod.i; - if(m_sel.mmin) + if(m_sel.mmin && !m_sel.lcm) { #if _M_SSE >= 0x401 @@ -3075,6 +3062,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) } else { + if(m_sel.mmin && m_sel.lcm) + { + mov(ebx, ptr[&lod_i->u32[0]]); + mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]); + } + const int r[] = {5, 6, 2, 4, 0, 1, 3, 5}; #if _M_SSE >= 0x401 diff --git a/plugins/GSdx/GSRasterizer.cpp b/plugins/GSdx/GSRasterizer.cpp index ade6d563e3..280b18d09c 100644 --- a/plugins/GSdx/GSRasterizer.cpp +++ b/plugins/GSdx/GSRasterizer.cpp @@ -785,7 +785,7 @@ GSRasterizerMT::GSRasterizerMT(IDrawScanline* ds, volatile long& sync) GSRasterizerMT::~GSRasterizerMT() { - Draw(NULL); + Draw(NULL); CloseThread(); } diff --git a/plugins/GSdx/GSScanlineEnvironment.h b/plugins/GSdx/GSScanlineEnvironment.h index b784ea9155..46cfb9d008 100644 --- a/plugins/GSdx/GSScanlineEnvironment.h +++ b/plugins/GSdx/GSScanlineEnvironment.h @@ -137,8 +137,6 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own { - const GSScanlineGlobalData* gd; - struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4]; struct step {GSVector4 z, stq; GSVector4i c, f;} d4; struct {GSVector4i rb, ga;} c; @@ -156,10 +154,15 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has GSVector4i cov; // mipmapping + struct {GSVector4i i, f;} lod; GSVector4i uv[2]; GSVector4i uv_minmax[2]; GSVector4i trb, tga; GSVector4i test; } temp; + + // + + const GSScanlineGlobalData* gd; };