diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index 51b5a7674f..e2578b6396 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -387,13 +387,6 @@ void GSDrawScanlineCodeGenerator::Init() vmovdqa(ptr[&m_local.temp.s], xmm2); vmovdqa(ptr[&m_local.temp.t], xmm3); - - if(m_sel.mmin && !m_sel.lcm) - { - vshufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - vaddps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]); - vmovaps(ptr[&m_local.temp.q], xmm4); - } } else { @@ -517,13 +510,6 @@ void GSDrawScanlineCodeGenerator::Step() { vmovdqa(xmm3, ptr[&m_local.temp.t]); } - - if(m_sel.mmin && !m_sel.lcm) - { - vshufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - vaddps(xmm4, ptr[&m_local.temp.q]); - vmovaps(ptr[&m_local.temp.q], xmm4); - } } else { @@ -1175,6 +1161,14 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vmaxps(xmm4, xmm0); vcvtps2dq(xmm4, xmm4); + if(m_sel.mmin == 1) // round-off mode + { + mov(eax, 0x8000); + vmovd(xmm0, eax); + vpshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + vpaddd(xmm4, xmm0); + } + vpsrld(xmm0, xmm4, 16); vmovdqa(ptr[&m_local.temp.lod.i], xmm0); /* @@ -1183,9 +1177,12 @@ vpslld(xmm6, xmm4, 16); vpsrld(xmm6, xmm6, 24); return; */ - vpshuflw(xmm0, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); - vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - vmovdqa(ptr[&m_local.temp.lod.f], xmm0); + if(m_sel.mmin == 2) // trilinear mode + { + vpshuflw(xmm0, xmm4, _MM_SHUFFLE(2, 2, 0, 0)); + vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); + vmovdqa(ptr[&m_local.temp.lod.f], xmm0); + } // shift u/v by (int)lod diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index 5da468bf88..59dd83d242 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -384,13 +384,6 @@ void GSDrawScanlineCodeGenerator::Init() movdqa(ptr[&m_local.temp.s], xmm2); movdqa(ptr[&m_local.temp.t], xmm3); - - if(m_sel.mmin && !m_sel.lcm) - { - shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - addps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]); - movaps(ptr[&m_local.temp.q], xmm4); - } } else { @@ -517,13 +510,6 @@ void GSDrawScanlineCodeGenerator::Step() { movdqa(xmm3, ptr[&m_local.temp.t]); } - - if(m_sel.mmin && !m_sel.lcm) - { - shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); - addps(xmm4, ptr[&m_local.temp.q]); - movaps(ptr[&m_local.temp.q], xmm4); - } } else { @@ -1221,13 +1207,24 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() maxps(xmm4, xmm0); cvtps2dq(xmm4, xmm4); + if(m_sel.mmin == 1) // round-off mode + { + mov(eax, 0x8000); + movd(xmm0, eax); + pshufd(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + paddd(xmm4, xmm0); + } + movdqa(xmm0, xmm4); psrld(xmm4, 16); movdqa(ptr[&m_local.temp.lod.i], xmm4); - pshuflw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); - movdqa(ptr[&m_local.temp.lod.f], xmm0); + if(m_sel.mmin == 2) // trilinear mode + { + pshuflw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); + pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0)); + movdqa(ptr[&m_local.temp.lod.f], xmm0); + } // shift u/v by (int)lod diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 82f3546587..604e00094f 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -513,6 +513,7 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) return; case VK_DELETE: m_aa1 = !m_aa1; + printf("GSdx: (Software) aa1 is now %s.\n", m_aa1 ? "enabled" : "disabled"); return; case VK_INSERT: m_mipmap = !m_mipmap; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 3461842be6..0b8caf45d3 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -365,7 +365,7 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) if(m_vt.m_lod.x > 0) { - gd.sel.ltf = (context->TEX1.MMIN & 5) ? 1 : 0; + gd.sel.ltf = context->TEX1.MMIN >> 2; } else { @@ -387,9 +387,22 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) gd.l = GSVector4((float)(-0x10000 << context->TEX1.L)); gd.k = GSVector4((float)k); + if(gd.sel.fst) + { + ASSERT(gd.sel.lcm == 1); + ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu) + + gd.sel.lcm = 1; + } + if(gd.sel.lcm) { - int lod = std::min(k, mxl); + int lod = std::max(std::min(k, mxl), 0); + + if(gd.sel.mmin == 1) + { + lod = (lod + 0x8000) & 0xffff0000; // rounding + } gd.lod.i = GSVector4i(lod >> 16); gd.lod.f = GSVector4i(lod & 0xffff).xxxxl().xxzz(); @@ -403,7 +416,7 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) GSVector4 tmin = m_vt.m_min.t; GSVector4 tmax = m_vt.m_max.t; - //static int s_counter = 0; + static int s_counter = 0; //t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0)); @@ -460,10 +473,10 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd) gd.tex[i] = t->m_buff; - //t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i)); + // t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i)); } - //s_counter++; + s_counter++; m_vt.m_min.t = tmin; m_vt.m_max.t = tmax; diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp index c607fb7cce..8620124946 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp @@ -170,16 +170,11 @@ void GSSetupPrimCodeGenerator::Texture() if(m_sel.fst) { - // m_local.d4.st = GSVector4i(t * 4.0f); - - if(m_sel.mmin && !m_sel.lcm) - { - vmovhps(ptr[&m_local.d4.stq.z], xmm1); - } + // m_local.d4.stq = GSVector4i(t * 4.0f); vcvttps2dq(xmm1, xmm1); - vmovq(ptr[&m_local.d4.stq], xmm1); + vmovdqa(ptr[&m_local.d4.stq], xmm1); } else { @@ -188,7 +183,7 @@ void GSSetupPrimCodeGenerator::Texture() vmovaps(ptr[&m_local.d4.stq], xmm1); } - for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++) + for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -202,7 +197,7 @@ void GSSetupPrimCodeGenerator::Texture() vmulps(xmm2, xmm1, Xmm(4 + i)); - if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm)) + if(m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp index 71dacfe10b..3ca05cd23a 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp @@ -176,16 +176,11 @@ void GSSetupPrimCodeGenerator::Texture() if(m_sel.fst) { - // m_local.d4.st = GSVector4i(t * 4.0f); - - if(m_sel.mmin && !m_sel.lcm) - { - movhps(ptr[&m_local.d4.stq.z], xmm1); - } + // m_local.d4.stq = GSVector4i(t * 4.0f); cvttps2dq(xmm1, xmm1); - movq(ptr[&m_local.d4.stq], xmm1); + movdqa(ptr[&m_local.d4.stq], xmm1); } else { @@ -194,7 +189,7 @@ void GSSetupPrimCodeGenerator::Texture() movaps(ptr[&m_local.d4.stq], xmm1); } - for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++) + for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++) { // GSVector4 ds = t.xxxx(); // GSVector4 dt = t.yyyy(); @@ -210,7 +205,7 @@ void GSSetupPrimCodeGenerator::Texture() movaps(xmm2, xmm1); mulps(xmm2, Xmm(4 + i)); - if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm)) + if(m_sel.fst) { // m_local.d[i].s/t = GSVector4i(v); diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index af4e0f8ad2..df4933687e 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -66,7 +66,7 @@ void GSVertexTrace::UpdateLOD() float K = (float)TEX1.K / 16; - if(TEX1.LCM == 0) // && m_state->PRIM->FST == 0 // if FST => assume Q = 1.0f (should not, but Q is very often bogus, 0 or DEN) + if(TEX1.LCM == 0 && m_state->PRIM->FST == 0) // FST == 1 => Q is not interpolated { // LOD = log2(1/|Q|) * (1 << L) + K