/* * Copyright (C) 2007-2009 Gabest * http://www.gabest.org * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. * http://www.gnu.org/copyleft/gpl.html * */ #include "stdafx.h" #include "GSDrawScanlineCodeGenerator.h" #if _M_SSE >= 0x501 #else void GSDrawScanlineCodeGenerator::Generate() { if(m_cpu.has(util::Cpu::tAVX)) Generate_AVX(); else Generate_SSE(); } #endif GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) , m_local(*(GSScanlineLocalData*)param) , m_rip(false) { m_sel.key = key; if(m_sel.breakpoint) db(0xCC); try { Generate(); } catch (std::exception& e) { fprintf(stderr, "ERR:GSDrawScanlineCodeGenerator %s\n", e.what()); } } void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, uint8 shift) { if(m_cpu.has(util::Cpu::tAVX)) { if(shift == 0) { vpmulhrsw(a, f); } else { vpsllw(a, shift + 1); vpmulhw(a, f); } } else { if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3)) { pmulhrsw(a, f); } else { psllw(a, shift + 1); pmulhw(a, f); } } } void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, uint8 shift) { if(m_cpu.has(util::Cpu::tAVX)) { vpsubw(a, b); modulate16(a, f, shift); vpaddw(a, b); } else { psubw(a, b); modulate16(a, f, shift); paddw(a, b); } } void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f) { if(m_cpu.has(util::Cpu::tAVX)) { vpsubw(a, b); vpmullw(a, f); vpsraw(a, 4); vpaddw(a, b); } else { psubw(a, b); pmullw(a, f); psraw(a, 4); paddw(a, b); } } void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp) { if(m_cpu.has(util::Cpu::tAVX)) { vpblendw(a, b, 0xaa); } else { if(m_cpu.has(util::Cpu::tSSE41)) { pblendw(a, b, 0xaa); } else { pcmpeqd(temp, temp); psrld(temp, 16); pand(a, temp); pandn(temp, b); por(a, temp); } } } void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp) { if(m_cpu.has(util::Cpu::tAVX)) { vpackuswb(a, a); #if _M_SSE >= 0x501 // Greg: why ? if(m_cpu.has(util::Cpu::tAVX2)) { ASSERT(a.isYMM()); vpermq(Ymm(a.getIdx()), Ymm(a.getIdx()), _MM_SHUFFLE(3, 1, 2, 0)); // this sucks } #endif vpmovzxbw(a, a); } else { if(m_cpu.has(util::Cpu::tSSE41)) { packuswb(a, a); pmovzxbw(a, a); } else { packuswb(a, a); pxor(temp, temp); punpcklbw(a, temp); } } } void GSDrawScanlineCodeGenerator::alltrue(const Xmm& test) { uint32 mask = test.isYMM() ? 0xffffffff : 0xffff; if(m_cpu.has(util::Cpu::tAVX)) { vpmovmskb(eax, test); cmp(eax, mask); je("step", T_NEAR); } else { pmovmskb(eax, test); cmp(eax, mask); je("step", T_NEAR); } } void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask) { if(m_cpu.has(util::Cpu::tAVX)) { vpand(b, mask); vpandn(mask, a); vpor(a, b, mask); } else { pand(b, mask); pandn(mask, a); por(b, mask); movdqa(a, b); } } void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask) { if(m_cpu.has(util::Cpu::tAVX)) { vpand(b, mask); vpandn(mask, a); vpor(b, mask); } else { pand(b, mask); pandn(mask, a); por(b, mask); } } void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b) { if(m_cpu.has(util::Cpu::tAVX)) vpblendvb(a, a, b, xmm0); else if(m_cpu.has(util::Cpu::tSSE41)) pblendvb(a, b); else blend(a, b, xmm0); } void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) { if(m_cpu.has(util::Cpu::tAVX)) { vpblendvb(b, a, b, xmm0); } else if(m_cpu.has(util::Cpu::tSSE41)) { pblendvb(a, b); movdqa(b, a); } else { blendr(b, a, xmm0); } } void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src) { // l = src & 0xFF; (1 left shift + 1 right shift) // h = (src >> 8) & 0xFF; (1 right shift) if(m_cpu.has(util::Cpu::tAVX)) { if (src == h) { vpsllw(l, src, 8); vpsrlw(h, 8); } else if (src == l) { vpsrlw(h, src, 8); vpsllw(l, 8); } else { vpsllw(l, src, 8); vpsrlw(h, src, 8); } vpsrlw(l, 8); } else { if (src == h) { movdqa(l, src); } else if (src == l) { movdqa(h, src); } else { movdqa(l, src); movdqa(h, src); } psllw(l, 8); psrlw(l, 8); psrlw(h, 8); } }