gsdx: workaround AVX2 generated code by GCC

See commit for the full details.

v2: use a direct access to the union field instead of extract32
It gives us both optimal and working code.
This commit is contained in:
Gregory Hainaut 2017-03-27 21:39:33 +02:00
parent 82405982f7
commit df32564bef
1 changed files with 12 additions and 0 deletions

View File

@ -45,7 +45,19 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
if(m_global.sel.mmin && m_global.sel.lcm) if(m_global.sel.mmin && m_global.sel.lcm)
{ {
#if defined(__GNUC__) && _M_SSE >= 0x501
// GCC 4.9/5/6 doesn't generate correct AVX2 code for extract32<0>. GCC7 and upward are unknown.
// Intrinsic code is _mm_cvtsi128_si32(_mm256_castsi256_si128(m))
// It seems recent Clang got _mm256_cvtsi256_si32(m) instead. I don't know about GCC.
//
// Generated code keep the integer in an XMM register but bit [64:32] aren't cleared.
// So the srl16 shift will be huge and v will be 0.
//
int lod_x = m_global.lod.i.x0;
GSVector4i v = m_global.t.minmax.srl16(lod_x);
#else
GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.extract32<0>());//.x); GSVector4i v = m_global.t.minmax.srl16(m_global.lod.i.extract32<0>());//.x);
#endif
v = v.upl16(v); v = v.upl16(v);