GSdx: BoF DQ fixed

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4459 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-20 00:07:52 +00:00
parent 2628a03cdd
commit 870a86c117
2 changed files with 117 additions and 101 deletions

View File

@ -195,6 +195,10 @@ void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
{ {
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
vpblendvb(a, a, b, xmm0);
#elif _M_SSE >= 0x401
pblendvb(a, b); pblendvb(a, b);
#else #else
@ -208,6 +212,10 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
{ {
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
vpblendvb(b, a, b, xmm0);
#elif _M_SSE >= 0x401
pblendvb(a, b); pblendvb(a, b);
movdqa(b, a); movdqa(b, a);

View File

@ -1061,15 +1061,16 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
movdqa(xmm4, ptr[&m_local.gd->t.min]); movdqa(xmm4, ptr[&m_local.gd->t.min]);
movdqa(xmm5, ptr[&m_local.gd->t.max]); movdqa(xmm5, ptr[&m_local.gd->t.max]);
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
movdqa(xmm0, ptr[&m_local.gd->t.mask]); movdqa(xmm0, ptr[&m_local.gd->t.mask]);
}
else #else
{
movdqa(xmm0, ptr[&m_local.gd->t.invmask]); movdqa(xmm0, ptr[&m_local.gd->t.invmask]);
movdqa(xmm6, xmm0); movdqa(xmm6, xmm0);
}
#endif
// uv0 // uv0
@ -1091,14 +1092,15 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask); // clamp.blend8(repeat, m_local.gd->t.mask);
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
pblendvb(uv0, xmm1); pblendvb(uv0, xmm1);
}
else #else
{
blendr(uv0, xmm1, xmm0); blendr(uv0, xmm1, xmm0);
}
#endif
// uv1 // uv1
@ -1120,14 +1122,15 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask); // clamp.blend8(repeat, m_local.gd->t.mask);
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
pblendvb(uv1, xmm1); pblendvb(uv1, xmm1);
}
else #else
{
blendr(uv1, xmm1, xmm6); blendr(uv1, xmm1, xmm6);
}
#endif
} }
} }
@ -1919,15 +1922,16 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
} }
else else
{ {
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
movdqa(xmm0, ptr[&m_local.gd->t.mask]); movdqa(xmm0, ptr[&m_local.gd->t.mask]);
}
else #else
{
movdqa(xmm0, ptr[&m_local.gd->t.invmask]); movdqa(xmm0, ptr[&m_local.gd->t.invmask]);
movdqa(xmm4, xmm0); movdqa(xmm4, xmm0);
}
#endif
// uv0 // uv0
@ -1949,14 +1953,15 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask); // clamp.blend8(repeat, m_local.gd->t.mask);
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
pblendvb(uv0, xmm1); pblendvb(uv0, xmm1);
}
else #else
{
blendr(uv0, xmm1, xmm0); blendr(uv0, xmm1, xmm0);
}
#endif
// uv1 // uv1
@ -1978,14 +1983,15 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
// clamp.blend8(repeat, m_local.gd->t.mask); // clamp.blend8(repeat, m_local.gd->t.mask);
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
{
pblendvb(uv1, xmm1); pblendvb(uv1, xmm1);
}
else #else
{
blendr(uv1, xmm1, xmm4); blendr(uv1, xmm1, xmm4);
}
#endif
} }
} }
@ -2568,8 +2574,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
{ {
case 0: case 0:
case 1: case 1:
movdqa(xmm7, m_sel.abc ? xmm1 : xmm6); pshuflw(xmm7, m_sel.abc ? xmm1 : xmm6, _MM_SHUFFLE(3, 3, 1, 1));
pshuflw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1)); pshufhw(xmm7, xmm7, _MM_SHUFFLE(3, 3, 1, 1));
psllw(xmm7, 7); psllw(xmm7, 7);
break; break;
@ -2682,14 +2687,15 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
if(m_sel.pabe) if(m_sel.pabe)
{ {
if(!m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE < 0x401
{
// doh, previous blend8r overwrote xmm0 (sse41 uses pblendvb)
movdqa(xmm0, xmm4); // doh, previous blend8r overwrote xmm0 (sse41 uses pblendvb)
pslld(xmm0, 8);
psrad(xmm0, 31); movdqa(xmm0, xmm4);
} pslld(xmm0, 8);
psrad(xmm0, 31);
#endif
psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16) psrld(xmm0, 16); // zero out high words to select the source alpha in blend (so it also does mix16)
@ -2851,48 +2857,49 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
{ {
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2]; Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
if(m_cpu.has(util::Cpu::tSSE41)) #if _M_SSE >= 0x401
switch(psm)
{ {
switch(psm) case 0:
{ if(i == 0) movd(dst, src);
case 0: else pextrd(dst, src, i);
if(i == 0) movd(dst, src); break;
else pextrd(dst, src, i); case 1:
break; if(i == 0) movd(eax, src);
case 1: else pextrd(eax, src, i);
if(i == 0) movd(eax, src); xor(eax, dst);
else pextrd(eax, src, i); and(eax, 0xffffff);
xor(eax, dst); xor(dst, eax);
and(eax, 0xffffff); break;
xor(dst, eax); case 2:
break; pextrw(eax, src, i * 2);
case 2: mov(dst, ax);
pextrw(eax, src, i * 2); break;
mov(dst, ax);
break;
}
} }
else
#else
switch(psm)
{ {
switch(psm) case 0:
{ if(i == 0) movd(dst, src);
case 0: else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);}
if(i == 0) movd(dst, src); break;
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(dst, xmm0);} case 1:
break; if(i == 0) movd(eax, src);
case 1: else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);}
if(i == 0) movd(eax, src); xor(eax, dst);
else {pshufd(xmm0, src, _MM_SHUFFLE(i, i, i, i)); movd(eax, xmm0);} and(eax, 0xffffff);
xor(eax, dst); xor(dst, eax);
and(eax, 0xffffff); break;
xor(dst, eax); case 2:
break; pextrw(eax, src, i * 2);
case 2: mov(dst, ax);
pextrw(eax, src, i * 2); break;
mov(dst, ax);
break;
}
} }
#endif
} }
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset) void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
@ -3123,10 +3130,11 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uin
{ {
const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_local.gd->clut] : ptr[ebx + eax * 4]; const Address& src = m_sel.tlu ? ptr[eax * 4 + (size_t)m_local.gd->clut] : ptr[ebx + eax * 4];
if(!m_cpu.has(util::Cpu::tSSE41) && i > 0) #if _M_SSE < 0x401
{
ASSERT(0); ASSERT(i == 0);
}
#endif
if(i == 0) movd(eax, addr); if(i == 0) movd(eax, addr);
else pextrd(eax, addr, i); else pextrd(eax, addr, i);