mirror of https://github.com/PCSX2/pcsx2.git
GSdx: When mipmapping is on, LOD is calculated per pixel, it isn't used for anything, but it's there. I cannot really measure any significant slowdown, but rest of the fun is yet to come.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4428 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5e9930a9dc
commit
1d759c852d
|
@ -34,6 +34,14 @@ const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
|
|||
GSVector4i::zero(),
|
||||
};
|
||||
|
||||
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||
{
|
||||
GSVector4(0.204446009836232697516f),
|
||||
GSVector4(-1.04913055217340124191f),
|
||||
GSVector4(2.28330284476918490682f),
|
||||
GSVector4(1.0f),
|
||||
};
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_local(*(GSScanlineLocalData*)param)
|
||||
|
|
|
@ -31,6 +31,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void operator = (const GSDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const GSVector4 m_log2_coef[4];
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
|
|
@ -256,7 +256,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
shl(rdx, 4);
|
||||
shl(rdx, 3);
|
||||
lea(rdx, ptr[rdx + r11 + offsetof(GSScanlineLocalData, d)]);
|
||||
}
|
||||
|
||||
|
@ -317,17 +317,17 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
|
||||
// si = vti.xxxx() + m_local.d[skip].si;
|
||||
// ti = vti.yyyy(); if(!sprite) ti += m_local.d[skip].ti;
|
||||
// s = vti.xxxx() + m_local.d[skip].s;
|
||||
// t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t;
|
||||
|
||||
vpshufd(xmm10, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm11, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
vpaddd(xmm10, ptr[rdx + 16 * 7]);
|
||||
vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
vpaddd(xmm11, ptr[rdx + 16 * 8]);
|
||||
vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -338,6 +338,12 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vpsrlw(xmm6, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -349,9 +355,9 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vshufps(xmm11, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vaddps(xmm10, ptr[rdx + 16 * 1]);
|
||||
vaddps(xmm11, ptr[rdx + 16 * 2]);
|
||||
vaddps(xmm12, ptr[rdx + 16 * 3]);
|
||||
vaddps(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
vaddps(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,8 +380,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vpshufd(xmm13, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm14, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vpaddw(xmm13, ptr[rdx + 16 * 4]);
|
||||
vpaddw(xmm14, ptr[rdx + 16 * 5]);
|
||||
vpaddw(xmm13, ptr[rdx + offsetof(GSScanlineLocalData::skip, rb)]);
|
||||
vpaddw(xmm14, ptr[rdx + offsetof(GSScanlineLocalData::skip, ga)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -430,7 +436,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
// si += st.xxxx();
|
||||
// if(!sprite) ti += st.yyyy();
|
||||
|
||||
vmovdqa(xmm0, ptr[r11 + offsetof(GSScanlineLocalData, d4.st)]);
|
||||
vmovdqa(xmm0, ptr[r11 + offsetof(GSScanlineLocalData, d4.stq)]);
|
||||
|
||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpaddd(xmm10, xmm1);
|
||||
|
@ -440,6 +446,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vpaddd(xmm11, xmm1);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm12, xmm3);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -617,6 +629,11 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// ebx = tex
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
vrcpps(xmm0, xmm12);
|
||||
|
|
|
@ -52,8 +52,9 @@ L("loop");
|
|||
// esi = fzbr
|
||||
// edi = fzbc
|
||||
// xmm0 = z/zi
|
||||
// xmm2 = u (tme)
|
||||
// xmm3 = v (tme)
|
||||
// xmm2 = s/u (tme)
|
||||
// xmm3 = t/v (tme)
|
||||
// xmm4 = q (tme)
|
||||
// xmm5 = rb (!tme)
|
||||
// xmm6 = ga (!tme)
|
||||
// xmm7 = test
|
||||
|
@ -66,8 +67,9 @@ L("loop");
|
|||
// esi = fzbr
|
||||
// edi = fzbc
|
||||
// - xmm0
|
||||
// xmm2 = u (tme)
|
||||
// xmm3 = v (tme)
|
||||
// xmm2 = s/u (tme)
|
||||
// xmm3 = t/v (tme)
|
||||
// xmm4 = q (tme)
|
||||
// xmm5 = rb (!tme)
|
||||
// xmm6 = ga (!tme)
|
||||
// xmm7 = test
|
||||
|
@ -284,7 +286,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 3);
|
||||
lea(edx, ptr[edx + (size_t)m_local.d]);
|
||||
|
||||
// ebx = &v
|
||||
|
@ -305,7 +307,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vcvttps2dq(xmm1, xmm0);
|
||||
vpshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpaddw(xmm1, ptr[edx + 16 * 6]);
|
||||
vpaddw(xmm1, ptr[edx + offsetof(GSScanlineLocalData::skip, f)]);
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.f], xmm1);
|
||||
}
|
||||
|
@ -315,7 +317,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm0, ptr[edx]);
|
||||
vaddps(xmm0, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
|
||||
vmovaps(ptr[&m_local.temp.z], xmm0);
|
||||
}
|
||||
|
@ -351,34 +353,41 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// GSVector4i vti(vt);
|
||||
|
||||
vcvttps2dq(xmm4, xmm4);
|
||||
vcvttps2dq(xmm6, xmm4);
|
||||
|
||||
// si = vti.xxxx() + m_local.d[skip].si;
|
||||
// ti = vti.yyyy(); if(!sprite) ti += m_local.d[skip].ti;
|
||||
// s = vti.xxxx() + m_local.d[skip].s;
|
||||
// t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t;
|
||||
|
||||
vpshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vpshufd(xmm2, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm3, xmm6, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
vpaddd(xmm2, ptr[edx + 16 * 7]);
|
||||
vpaddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
vpaddd(xmm3, ptr[edx + 16 * 8]);
|
||||
vpaddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
vpshuflw(xmm4, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm4, 1);
|
||||
vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm6, 1);
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm4);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.s], xmm2);
|
||||
vmovdqa(ptr[&m_local.temp.t], xmm3);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
vmovaps(ptr[&m_local.temp.q], xmm4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -390,17 +399,13 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vshufps(xmm3, xmm4, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(xmm4, xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vaddps(xmm2, ptr[edx + 16 * 1]);
|
||||
vaddps(xmm3, ptr[edx + 16 * 2]);
|
||||
vaddps(xmm4, ptr[edx + 16 * 3]);
|
||||
vaddps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
vaddps(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
vaddps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
|
||||
vmovaps(ptr[&m_local.temp.s], xmm2);
|
||||
vmovaps(ptr[&m_local.temp.t], xmm3);
|
||||
vmovaps(ptr[&m_local.temp.q], xmm4);
|
||||
|
||||
vrcpps(xmm4, xmm4);
|
||||
vmulps(xmm2, xmm4);
|
||||
vmulps(xmm3, xmm4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -423,8 +428,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vpshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
vpaddw(xmm5, ptr[edx + 16 * 4]);
|
||||
vpaddw(xmm6, ptr[edx + 16 * 5]);
|
||||
vpaddw(xmm5, ptr[edx + offsetof(GSScanlineLocalData::skip, rb)]);
|
||||
vpaddw(xmm6, ptr[edx + offsetof(GSScanlineLocalData::skip, ga)]);
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.rb], xmm5);
|
||||
vmovdqa(ptr[&m_local.temp.ga], xmm6);
|
||||
|
@ -485,12 +490,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
{
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// GSVector4i st = m_local.d4.st;
|
||||
// GSVector4i stq = m_local.d4.stq;
|
||||
|
||||
// si += st.xxxx();
|
||||
// if(!sprite) ti += st.yyyy();
|
||||
// s += stq.xxxx();
|
||||
// if(!sprite) t += stq.yyyy();
|
||||
|
||||
vmovdqa(xmm4, ptr[&m_local.d4.st]);
|
||||
vmovdqa(xmm4, ptr[&m_local.d4.stq]);
|
||||
|
||||
vpshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpaddd(xmm2, ptr[&m_local.temp.s]);
|
||||
|
@ -506,6 +511,13 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
{
|
||||
vmovdqa(xmm3, ptr[&m_local.temp.t]);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm4, ptr[&m_local.temp.q]);
|
||||
vmovaps(ptr[&m_local.temp.q], xmm4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -528,10 +540,6 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
vmovaps(ptr[&m_local.temp.s], xmm2);
|
||||
vmovaps(ptr[&m_local.temp.t], xmm3);
|
||||
vmovaps(ptr[&m_local.temp.q], xmm4);
|
||||
|
||||
vrcpps(xmm4, xmm4);
|
||||
vmulps(xmm2, xmm4);
|
||||
vmulps(xmm3, xmm4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -648,16 +656,14 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
{
|
||||
// GSVector4i o = GSVector4i::x80000000();
|
||||
|
||||
vpcmpeqd(xmm4, xmm4);
|
||||
vpslld(xmm4, 31);
|
||||
vpcmpeqd(temp1, temp1);
|
||||
vpslld(temp1, 31);
|
||||
|
||||
// GSVector4i zso = zs - o;
|
||||
|
||||
vpsubd(xmm0, xmm4);
|
||||
|
||||
// GSVector4i zdo = zd - o;
|
||||
|
||||
vpsubd(xmm1, xmm4);
|
||||
vpsubd(xmm0, temp1);
|
||||
vpsubd(xmm1, temp1);
|
||||
}
|
||||
|
||||
switch(m_sel.ztst)
|
||||
|
@ -671,8 +677,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL
|
||||
// test |= zso <= zdo; // ~(zso > zdo)
|
||||
vpcmpgtd(xmm0, xmm1);
|
||||
vpcmpeqd(xmm4, xmm4);
|
||||
vpxor(xmm0, xmm4);
|
||||
vpcmpeqd(temp1, temp1);
|
||||
vpxor(xmm0, temp1);
|
||||
vpor(xmm7, xmm0);
|
||||
break;
|
||||
}
|
||||
|
@ -694,11 +700,66 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
// TODO: move these into Init/Step too?
|
||||
vrcpps(xmm0, xmm4);
|
||||
|
||||
vmulps(xmm2, xmm0);
|
||||
vmulps(xmm3, xmm0);
|
||||
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vcvttps2dq(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap)
|
||||
{
|
||||
// TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (but can we round it?)
|
||||
|
||||
if(!m_sel.lcm)
|
||||
{
|
||||
// lod = -log2(Q) * (1 << L) + K
|
||||
|
||||
vpcmpeqd(xmm1, xmm1);
|
||||
vpsrld(xmm1, xmm1, 25);
|
||||
vpslld(xmm0, xmm4, 1);
|
||||
vpsrld(xmm0, xmm0, 24);
|
||||
vpsubd(xmm0, xmm1);
|
||||
vcvtdq2ps(xmm0, xmm0);
|
||||
|
||||
// xmm0 = (float)(exp(e) - 127)
|
||||
|
||||
vpslld(xmm4, xmm4, 9);
|
||||
vpsrld(xmm4, xmm4, 9);
|
||||
vorps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
|
||||
// xmm4 = mant(q) | 1.0f
|
||||
|
||||
vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
vmulps(xmm5, xmm4);
|
||||
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
vmulps(xmm4, xmm5);
|
||||
vaddps(xmm4, xmm0);
|
||||
|
||||
// xmm4 = log2(Q) = (((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0)
|
||||
|
||||
vmulps(xmm4, ptr[&m_local.gd->l]);
|
||||
vaddps(xmm4, ptr[&m_local.gd->k]);
|
||||
|
||||
// xmm4 = (-log2(Q) * (1 << L) + K) * 0x10000
|
||||
|
||||
vcvtps2dq(xmm4, xmm4);
|
||||
vmovdqa(ptr[&m_local.temp.lod], xmm4);
|
||||
}
|
||||
else
|
||||
{
|
||||
// lod = K (=> use m_local->gd.k later when lod is needed)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: if(m_sel.mipmap) ...
|
||||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// u -= 0x8000;
|
||||
|
|
|
@ -49,8 +49,9 @@ L("loop");
|
|||
// esi = fzbr
|
||||
// edi = fzbc
|
||||
// xmm0 = z/zi
|
||||
// xmm2 = u (tme)
|
||||
// xmm3 = v (tme)
|
||||
// xmm2 = s/u (tme)
|
||||
// xmm3 = t/v (tme)
|
||||
// xmm4 = q (tme)
|
||||
// xmm5 = rb (!tme)
|
||||
// xmm6 = ga (!tme)
|
||||
// xmm7 = test
|
||||
|
@ -63,8 +64,9 @@ L("loop");
|
|||
// esi = fzbr
|
||||
// edi = fzbc
|
||||
// - xmm0
|
||||
// xmm2 = u (tme)
|
||||
// xmm3 = v (tme)
|
||||
// xmm2 = s/u (tme)
|
||||
// xmm3 = t/v (tme)
|
||||
// xmm4 = q (tme)
|
||||
// xmm5 = rb (!tme)
|
||||
// xmm6 = ga (!tme)
|
||||
// xmm7 = test
|
||||
|
@ -281,7 +283,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 3);
|
||||
lea(edx, ptr[edx + (size_t)m_local.d]);
|
||||
|
||||
// ebx = &v
|
||||
|
@ -302,7 +304,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
cvttps2dq(xmm1, xmm0);
|
||||
pshufhw(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
paddw(xmm1, ptr[edx + 16 * 6]);
|
||||
paddw(xmm1, ptr[edx + offsetof(GSScanlineLocalData::skip, f)]);
|
||||
|
||||
movdqa(ptr[&m_local.temp.f], xmm1);
|
||||
}
|
||||
|
@ -312,7 +314,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
addps(xmm0, ptr[edx]);
|
||||
addps(xmm0, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
|
||||
movaps(ptr[&m_local.temp.z], xmm0);
|
||||
}
|
||||
|
@ -348,34 +350,41 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// GSVector4i vti(vt);
|
||||
|
||||
cvttps2dq(xmm4, xmm4);
|
||||
cvttps2dq(xmm6, xmm4);
|
||||
|
||||
// si = vti.xxxx() + m_local.d[skip].si;
|
||||
// ti = vti.yyyy(); if(!sprite) ti += m_local.d[skip].ti;
|
||||
// s = vti.xxxx() + m_local.d[skip].s;
|
||||
// t = vti.yyyy(); if(!sprite) t += m_local.d[skip].t;
|
||||
|
||||
pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm3, xmm4, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
pshufd(xmm2, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm3, xmm6, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
||||
paddd(xmm2, ptr[edx + 16 * 7]);
|
||||
paddd(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
{
|
||||
paddd(xmm3, ptr[edx + 16 * 8]);
|
||||
paddd(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
movdqa(xmm4, xmm3);
|
||||
pshuflw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm4, xmm4, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm4, 1);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm4);
|
||||
movdqa(xmm6, xmm3);
|
||||
pshuflw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm6, 1);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
||||
movdqa(ptr[&m_local.temp.s], xmm2);
|
||||
movdqa(ptr[&m_local.temp.t], xmm3);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
addps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
movaps(ptr[&m_local.temp.q], xmm4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -390,17 +399,13 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
addps(xmm2, ptr[edx + 16 * 1]);
|
||||
addps(xmm3, ptr[edx + 16 * 2]);
|
||||
addps(xmm4, ptr[edx + 16 * 3]);
|
||||
addps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
addps(xmm3, ptr[edx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
addps(xmm4, ptr[edx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
|
||||
movaps(ptr[&m_local.temp.s], xmm2);
|
||||
movaps(ptr[&m_local.temp.t], xmm3);
|
||||
movaps(ptr[&m_local.temp.q], xmm4);
|
||||
|
||||
rcpps(xmm4, xmm4);
|
||||
mulps(xmm2, xmm4);
|
||||
mulps(xmm3, xmm4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -423,8 +428,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
pshufd(xmm5, xmm6, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
pshufd(xmm6, xmm6, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
paddw(xmm5, ptr[edx + 16 * 4]);
|
||||
paddw(xmm6, ptr[edx + 16 * 5]);
|
||||
paddw(xmm5, ptr[edx + offsetof(GSScanlineLocalData::skip, rb)]);
|
||||
paddw(xmm6, ptr[edx + offsetof(GSScanlineLocalData::skip, ga)]);
|
||||
|
||||
movdqa(ptr[&m_local.temp.rb], xmm5);
|
||||
movdqa(ptr[&m_local.temp.ga], xmm6);
|
||||
|
@ -485,12 +490,12 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
{
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// GSVector4i st = m_local.d4.st;
|
||||
// GSVector4i st = m_local.d4.stq;
|
||||
|
||||
// si += st.xxxx();
|
||||
// if(!sprite) ti += st.yyyy();
|
||||
|
||||
movdqa(xmm4, ptr[&m_local.d4.st]);
|
||||
movdqa(xmm4, ptr[&m_local.d4.stq]);
|
||||
|
||||
pshufd(xmm2, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
paddd(xmm2, ptr[&m_local.temp.s]);
|
||||
|
@ -506,6 +511,13 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
{
|
||||
movdqa(xmm3, ptr[&m_local.temp.t]);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
addps(xmm4, ptr[&m_local.temp.q]);
|
||||
movaps(ptr[&m_local.temp.q], xmm4);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -515,9 +527,9 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
// t += stq.yyyy();
|
||||
// q += stq.zzzz();
|
||||
|
||||
movaps(xmm2, ptr[&m_local.d4.stq]);
|
||||
movaps(xmm3, xmm2);
|
||||
movaps(xmm4, xmm2);
|
||||
movaps(xmm4, ptr[&m_local.d4.stq]);
|
||||
movaps(xmm2, xmm4);
|
||||
movaps(xmm3, xmm4);
|
||||
|
||||
shufps(xmm2, xmm2, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm3, xmm3, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
|
@ -530,10 +542,6 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
movaps(ptr[&m_local.temp.s], xmm2);
|
||||
movaps(ptr[&m_local.temp.t], xmm3);
|
||||
movaps(ptr[&m_local.temp.q], xmm4);
|
||||
|
||||
rcpps(xmm4, xmm4);
|
||||
mulps(xmm2, xmm4);
|
||||
mulps(xmm3, xmm4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -650,16 +658,14 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
{
|
||||
// GSVector4i o = GSVector4i::x80000000();
|
||||
|
||||
pcmpeqd(xmm4, xmm4);
|
||||
pslld(xmm4, 31);
|
||||
pcmpeqd(temp1, temp1);
|
||||
pslld(temp1, 31);
|
||||
|
||||
// GSVector4i zso = zs - o;
|
||||
|
||||
psubd(xmm0, xmm4);
|
||||
|
||||
// GSVector4i zdo = zd - o;
|
||||
|
||||
psubd(xmm1, xmm4);
|
||||
psubd(xmm0, temp1);
|
||||
psubd(xmm1, temp1);
|
||||
}
|
||||
|
||||
switch(m_sel.ztst)
|
||||
|
@ -673,8 +679,8 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL
|
||||
// test |= zso <= zdo; // ~(zso > zdo)
|
||||
pcmpgtd(xmm0, xmm1);
|
||||
pcmpeqd(xmm4, xmm4);
|
||||
pxor(xmm0, xmm4);
|
||||
pcmpeqd(temp1, temp1);
|
||||
pxor(xmm0, temp1);
|
||||
por(xmm7, xmm0);
|
||||
break;
|
||||
}
|
||||
|
@ -696,11 +702,66 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
// TODO: move these into Init/Step too?
|
||||
rcpps(xmm4, xmm4);
|
||||
|
||||
mulps(xmm2, xmm4);
|
||||
mulps(xmm3, xmm4);
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap)
|
||||
{
|
||||
// TODO: if the fractional part is not needed in round-off mode then there is a faster integer log2 (but can we round it?)
|
||||
|
||||
if(!m_sel.lcm)
|
||||
{
|
||||
// lod = -log2(Q) * (1 << L) + K
|
||||
|
||||
vpcmpeqd(xmm1, xmm1);
|
||||
vpsrld(xmm1, xmm1, 25);
|
||||
vpslld(xmm0, xmm4, 1);
|
||||
vpsrld(xmm0, xmm0, 24);
|
||||
vpsubd(xmm0, xmm1);
|
||||
vcvtdq2ps(xmm0, xmm0);
|
||||
|
||||
// xmm0 = (float)(exp(e) - 127)
|
||||
|
||||
vpslld(xmm4, xmm4, 9);
|
||||
vpsrld(xmm4, xmm4, 9);
|
||||
vorps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
|
||||
// xmm4 = mant(q) | 1.0f
|
||||
|
||||
vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
vmulps(xmm5, xmm4);
|
||||
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
vmulps(xmm4, xmm5);
|
||||
vaddps(xmm4, xmm0);
|
||||
|
||||
// xmm4 = log2(Q) = (((((c0 * xmm4) + c1) * xmm4) + c2) * (xmm4 - 1.0f) + xmm0)
|
||||
|
||||
vmulps(xmm4, ptr[&m_local.gd->l]);
|
||||
vaddps(xmm4, ptr[&m_local.gd->k]);
|
||||
|
||||
// xmm4 = (-log2(Q) * (1 << L) + K) * 0x10000
|
||||
|
||||
vcvtps2dq(xmm4, xmm4);
|
||||
vmovdqa(ptr[&m_local.temp.lod], xmm4);
|
||||
}
|
||||
else
|
||||
{
|
||||
// lod = K (=> use m_local->gd.k later when lod is needed)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: if(m_sel.mipmap) ...
|
||||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
if(m_sel.ltf)
|
||||
{
|
||||
// u -= 0x8000;
|
||||
|
|
|
@ -375,6 +375,14 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
gd.sel.mipmap = 1; // TODO: pass mmin here and store mxl to m_global for clamping the lod
|
||||
gd.sel.lcm = context->TEX1.LCM;
|
||||
|
||||
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
|
||||
gd.k = GSVector4((float)(0x1000 * context->TEX1.K));
|
||||
|
||||
// the rest is fake, should be removed later
|
||||
|
||||
int level = (int)(m_vt.m_lod.x + 0.5f);
|
||||
|
||||
level = std::min<int>(level, context->TEX1.MXL);
|
||||
|
@ -691,21 +699,17 @@ void GSRendererSW::VertexKick(bool skip)
|
|||
|
||||
if(tme)
|
||||
{
|
||||
float q;
|
||||
|
||||
if(fst)
|
||||
{
|
||||
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
q = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
||||
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||
q = m_v.RGBAQ.Q;
|
||||
}
|
||||
|
||||
v.t = v.t.xyxy(GSVector4::load(q));
|
||||
v.t = v.t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||
}
|
||||
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
|
|
@ -65,6 +65,8 @@ union GSScanlineSelector
|
|||
uint32 edge:1; // 47
|
||||
|
||||
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 mipmap:1; // 49
|
||||
uint32 lcm:1; // 50
|
||||
};
|
||||
|
||||
struct
|
||||
|
@ -127,18 +129,20 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
|
|||
GSVector4i aref;
|
||||
GSVector4i afix;
|
||||
GSVector4i frb, fga;
|
||||
GSVector4 k; // TEX1.K * 0x10000
|
||||
GSVector4 l; // TEX1.L * -0x10000
|
||||
};
|
||||
|
||||
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
|
||||
{
|
||||
const GSScanlineGlobalData* gd;
|
||||
|
||||
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];
|
||||
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
|
||||
struct skip {GSVector4 z, s, t, q; GSVector4i rb, ga, f, _pad;} d[4];
|
||||
struct step {GSVector4 z, stq; GSVector4i c, f;} d4;
|
||||
struct {GSVector4i rb, ga;} c;
|
||||
struct {GSVector4i z, f;} p;
|
||||
|
||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov;} temp;
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov, lod;} temp;
|
||||
};
|
||||
|
|
|
@ -188,8 +188,14 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vmovhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
||||
}
|
||||
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.st)], xmm1);
|
||||
|
||||
vmovq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -198,7 +204,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -212,16 +218,16 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst)
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d[i].si)], xmm2); break;
|
||||
case 1: vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d[i].ti)], xmm2); break;
|
||||
case 0: vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d[i].s)], xmm2); break;
|
||||
case 1: vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d[i].t)], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -193,8 +193,14 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
movhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
||||
}
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.st)], xmm1);
|
||||
|
||||
movq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -203,7 +209,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -219,9 +225,9 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst)
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
|
||||
|
|
|
@ -172,8 +172,14 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vmovhps(ptr[&m_local.d4.stq.z], xmm1);
|
||||
}
|
||||
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vmovdqa(ptr[&m_local.d4.st], xmm1);
|
||||
|
||||
vmovq(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -182,7 +188,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
vmovaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -196,16 +202,16 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst)
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: vmovdqa(ptr[&m_local.d[i].si], xmm2); break;
|
||||
case 1: vmovdqa(ptr[&m_local.d[i].ti], xmm2); break;
|
||||
case 0: vmovdqa(ptr[&m_local.d[i].s], xmm2); break;
|
||||
case 1: vmovdqa(ptr[&m_local.d[i].t], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -178,8 +178,14 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
movhps(ptr[&m_local.d4.stq.z], xmm1);
|
||||
}
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
movdqa(ptr[&m_local.d4.st], xmm1);
|
||||
|
||||
movq(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -188,7 +194,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -204,16 +210,16 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst)
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].si/ti = GSVector4i(v);
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
|
||||
switch(j)
|
||||
{
|
||||
case 0: movdqa(ptr[&m_local.d[i].si], xmm2); break;
|
||||
case 1: movdqa(ptr[&m_local.d[i].ti], xmm2); break;
|
||||
case 0: movdqa(ptr[&m_local.d[i].s], xmm2); break;
|
||||
case 1: movdqa(ptr[&m_local.d[i].t], xmm2); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -24,7 +24,8 @@
|
|||
|
||||
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
const GSVector4 GSVector4::m_half(0.5f, 0.5f, 0.5f, 0.5f);
|
||||
const GSVector4 GSVector4::m_half(0.5f);
|
||||
const GSVector4 GSVector4::m_one(1.0f);
|
||||
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
|
||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
|
||||
|
|
|
@ -1,3 +1,23 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
|
@ -2309,6 +2329,7 @@ public:
|
|||
static const GSVector4 m_ps0123;
|
||||
static const GSVector4 m_ps4567;
|
||||
static const GSVector4 m_half;
|
||||
static const GSVector4 m_one;
|
||||
|
||||
static const GSVector4 m_x3f800000;
|
||||
static const GSVector4 m_x4b000000;
|
||||
|
@ -2479,14 +2500,12 @@ public:
|
|||
{
|
||||
// NOTE: sign bit ignored, safe to pass negative numbers
|
||||
|
||||
GSVector4i exp = GSVector4i::xff000000() >> 1;
|
||||
GSVector4i mant = GSVector4i::x007fffff();
|
||||
GSVector4 one(1.0f);
|
||||
|
||||
GSVector4i i = GSVector4i::cast(*this);
|
||||
|
||||
GSVector4 e = GSVector4(((i & exp) >> 23) - GSVector4i::x0000007f());
|
||||
GSVector4 m = GSVector4::cast(i & mant) | one;
|
||||
GSVector4 e = GSVector4(((i << 1) >> 24) - GSVector4i::x0000007f());
|
||||
GSVector4 m = GSVector4::cast((i << 9) >> 9) | one;
|
||||
|
||||
GSVector4 p;
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ void GSVertexTrace::UpdateLOD()
|
|||
{
|
||||
// LOD = log2(1/|Q|) * (1 << L) + K
|
||||
|
||||
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(2).neg() * (float)(1 << TEX1.L) + K);
|
||||
GSVector4::storel(&m_lod, m_max.t.uph(m_min.t).log2(3).neg() * (float)(1 << TEX1.L) + K);
|
||||
|
||||
if(m_lod.x > m_lod.y) {float tmp = m_lod.x; m_lod.x = m_lod.y; m_lod.y = tmp;}
|
||||
}
|
||||
|
|
|
@ -612,7 +612,11 @@
|
|||
<ClCompile Include="GSTextureSW.cpp" />
|
||||
<ClCompile Include="GSThread.cpp" />
|
||||
<ClCompile Include="GSUtil.cpp" />
|
||||
<ClCompile Include="GSVector.cpp" />
|
||||
<ClCompile Include="GSVector.cpp">
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexList.cpp" />
|
||||
<ClCompile Include="GSVertexSW.cpp" />
|
||||
<ClCompile Include="GSVertexTrace.cpp" />
|
||||
|
|
|
@ -1,3 +1,24 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
//{{NO_DEPENDENCIES}}
|
||||
// Microsoft Visual C++ generated include file.
|
||||
// Used by GSdx.rc
|
||||
|
|
|
@ -1,3 +1,24 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
// stdafx.cpp : source file that includes just the standard includes
|
||||
// GSdx.pch will be the pre-compiled header
|
||||
// stdafx.obj will contain the pre-compiled type information
|
||||
|
|
|
@ -1,3 +1,24 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
// stdafx.h : include file for standard system include files,
|
||||
// or project specific include files that are used frequently, but
|
||||
// are changed infrequently
|
||||
|
|
Loading…
Reference in New Issue