2010-04-25 00:31:27 +00:00
|
|
|
/*
|
2010-04-24 21:37:39 +00:00
|
|
|
* Copyright (C) 2007-2009 Gabest
|
|
|
|
* http://www.gabest.org
|
|
|
|
*
|
|
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
* any later version.
|
2010-04-25 00:31:27 +00:00
|
|
|
*
|
2010-04-24 21:37:39 +00:00
|
|
|
* This Program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
2010-04-25 00:31:27 +00:00
|
|
|
*
|
2010-04-24 21:37:39 +00:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
2012-09-09 18:16:11 +00:00
|
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
2010-04-24 21:37:39 +00:00
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2011-02-19 03:36:30 +00:00
|
|
|
#include "stdafx.h"
|
2010-04-24 21:37:39 +00:00
|
|
|
#include "GSDrawScanlineCodeGenerator.h"
|
|
|
|
|
2013-06-20 05:07:52 +00:00
|
|
|
#if _M_SSE >= 0x501
|
|
|
|
|
2013-06-23 10:46:24 +00:00
|
|
|
__aligned(const uint8, 8) GSDrawScanlineCodeGenerator::m_test[16][8] =
|
2013-06-20 05:07:52 +00:00
|
|
|
{
|
2013-06-23 10:46:24 +00:00
|
|
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
|
|
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
|
|
|
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
|
|
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
2013-06-20 05:07:52 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
|
|
|
{
|
|
|
|
GSVector8(0.204446009836232697516f),
|
|
|
|
GSVector8(-1.04913055217340124191f),
|
|
|
|
GSVector8(2.28330284476918490682f),
|
|
|
|
GSVector8(1.0f),
|
|
|
|
};
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
|
|
|
|
{
|
|
|
|
GSVector4i::zero(),
|
|
|
|
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
|
|
|
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
|
|
|
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
|
|
|
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
|
|
|
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
|
|
|
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
|
|
|
GSVector4i::zero(),
|
|
|
|
};
|
2011-02-17 03:24:37 +00:00
|
|
|
|
2011-03-14 03:32:28 +00:00
|
|
|
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
|
|
|
{
|
|
|
|
GSVector4(0.204446009836232697516f),
|
|
|
|
GSVector4(-1.04913055217340124191f),
|
|
|
|
GSVector4(2.28330284476918490682f),
|
|
|
|
GSVector4(1.0f),
|
|
|
|
};
|
|
|
|
|
2013-06-20 05:07:52 +00:00
|
|
|
#endif
|
|
|
|
|
2011-02-16 03:19:36 +00:00
|
|
|
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
|
|
|
: GSCodeGenerator(code, maxsize)
|
2011-02-17 03:24:37 +00:00
|
|
|
, m_local(*(GSScanlineLocalData*)param)
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
|
|
|
m_sel.key = key;
|
|
|
|
|
|
|
|
Generate();
|
|
|
|
}
|
|
|
|
|
2013-06-20 05:07:52 +00:00
|
|
|
#if _M_SSE >= 0x501
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::modulate16(const Ymm& a, const Operand& f, int shift)
|
|
|
|
{
|
|
|
|
if(shift == 0)
|
|
|
|
{
|
|
|
|
vpmulhrsw(a, f);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
vpsllw(a, (uint8)(shift + 1));
|
|
|
|
vpmulhw(a, f);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::lerp16(const Ymm& a, const Ymm& b, const Ymm& f, int shift)
|
|
|
|
{
|
|
|
|
vpsubw(a, b);
|
|
|
|
modulate16(a, f, shift);
|
|
|
|
vpaddw(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::lerp16_4(const Ymm& a, const Ymm& b, const Ymm& f)
|
|
|
|
{
|
|
|
|
vpsubw(a, b);
|
|
|
|
vpmullw(a, f);
|
|
|
|
vpsraw(a, 4);
|
|
|
|
vpaddw(a, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::mix16(const Ymm& a, const Ymm& b, const Ymm& temp)
|
|
|
|
{
|
|
|
|
vpblendw(a, b, 0xaa);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::clamp16(const Ymm& a, const Ymm& temp)
|
|
|
|
{
|
|
|
|
vpackuswb(a, a);
|
|
|
|
vpermq(a, a, _MM_SHUFFLE(3, 1, 2, 0)); // this sucks
|
|
|
|
vpmovzxbw(a, a);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::alltrue()
|
|
|
|
{
|
|
|
|
vpmovmskb(eax, ymm7);
|
|
|
|
cmp(eax, 0xffffffff);
|
|
|
|
je("step", T_NEAR);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::blend(const Ymm& a, const Ymm& b, const Ymm& mask)
|
|
|
|
{
|
|
|
|
vpand(b, mask);
|
|
|
|
vpandn(mask, a);
|
|
|
|
vpor(a, b, mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::blendr(const Ymm& b, const Ymm& a, const Ymm& mask)
|
|
|
|
{
|
|
|
|
vpand(b, mask);
|
|
|
|
vpandn(mask, a);
|
|
|
|
vpor(b, mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::blend8(const Ymm& a, const Ymm& b)
|
|
|
|
{
|
|
|
|
vpblendvb(a, a, b, xmm0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a)
|
|
|
|
{
|
|
|
|
vpblendvb(b, a, b, xmm0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift)
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
#if _M_SSE >= 0x500
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
if(shift == 0)
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
vpmulhrsw(a, f);
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
2011-02-28 11:08:52 +00:00
|
|
|
else
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
vpsllw(a, shift + 1);
|
|
|
|
vpmulhw(a, f);
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3))
|
2011-02-07 01:59:05 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
pmulhrsw(a, f);
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
2011-02-07 01:59:05 +00:00
|
|
|
else
|
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
psllw(a, shift + 1);
|
|
|
|
pmulhw(a, f);
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
2011-02-28 11:08:52 +00:00
|
|
|
|
|
|
|
#endif
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift)
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
#if _M_SSE >= 0x500
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
vpsubw(a, b);
|
|
|
|
modulate16(a, f, shift);
|
|
|
|
vpaddw(a, b);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
psubw(a, b);
|
|
|
|
modulate16(a, f, shift);
|
|
|
|
paddw(a, b);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
2011-02-07 01:59:05 +00:00
|
|
|
}
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2012-02-12 17:56:06 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f)
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
|
|
|
|
|
|
|
vpsubw(a, b);
|
|
|
|
vpmullw(a, f);
|
|
|
|
vpsraw(a, 4);
|
|
|
|
vpaddw(a, b);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
psubw(a, b);
|
|
|
|
pmullw(a, f);
|
|
|
|
psraw(a, 4);
|
|
|
|
paddw(a, b);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp)
|
2011-02-07 01:59:05 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
#if _M_SSE >= 0x500
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
vpblendw(a, b, 0xaa);
|
|
|
|
|
|
|
|
#elif _M_SSE >= 0x401
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pblendw(a, b, 0xaa);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pcmpeqd(temp, temp);
|
|
|
|
psrld(temp, 16);
|
|
|
|
pand(a, temp);
|
|
|
|
pandn(temp, b);
|
|
|
|
por(a, temp);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
|
|
|
|
|
|
|
vpackuswb(a, a);
|
|
|
|
vpmovzxbw(a, a);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#elif _M_SSE >= 0x401
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
packuswb(a, a);
|
|
|
|
pmovzxbw(a, a);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
packuswb(a, a);
|
|
|
|
pxor(temp, temp);
|
|
|
|
punpcklbw(a, temp);
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
|
|
|
}
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::alltrue()
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
|
|
|
|
|
|
|
vpmovmskb(eax, xmm7);
|
|
|
|
cmp(eax, 0xffff);
|
|
|
|
je("step", T_NEAR);
|
2010-04-25 00:31:27 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pmovmskb(eax, xmm7);
|
|
|
|
cmp(eax, 0xffff);
|
|
|
|
je("step", T_NEAR);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
|
|
|
}
|
2010-04-25 00:31:27 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask)
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
vpand(b, mask);
|
|
|
|
vpandn(mask, a);
|
|
|
|
vpor(a, b, mask);
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pand(b, mask);
|
|
|
|
pandn(mask, a);
|
|
|
|
por(b, mask);
|
|
|
|
movdqa(a, b);
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask)
|
2010-04-24 21:37:39 +00:00
|
|
|
{
|
2011-02-28 11:08:52 +00:00
|
|
|
#if _M_SSE >= 0x500
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
vpand(b, mask);
|
|
|
|
vpandn(mask, a);
|
|
|
|
vpor(b, mask);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pand(b, mask);
|
|
|
|
pandn(mask, a);
|
|
|
|
por(b, mask);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
|
|
|
}
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
|
|
|
|
2011-03-20 00:07:52 +00:00
|
|
|
vpblendvb(a, a, b, xmm0);
|
|
|
|
|
|
|
|
#elif _M_SSE >= 0x401
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pblendvb(a, b);
|
2011-02-07 01:59:05 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2011-02-16 03:19:36 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
blend(a, b, xmm0);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
|
|
|
}
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
|
|
|
|
{
|
|
|
|
#if _M_SSE >= 0x500
|
|
|
|
|
2011-03-20 00:07:52 +00:00
|
|
|
vpblendvb(b, a, b, xmm0);
|
|
|
|
|
|
|
|
#elif _M_SSE >= 0x401
|
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
pblendvb(a, b);
|
|
|
|
movdqa(b, a);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#else
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
blendr(b, a, xmm0);
|
2010-04-24 21:37:39 +00:00
|
|
|
|
2011-02-28 11:08:52 +00:00
|
|
|
#endif
|
2010-04-24 21:37:39 +00:00
|
|
|
}
|
2013-06-20 05:07:52 +00:00
|
|
|
|
|
|
|
#endif
|