mirror of https://github.com/PCSX2/pcsx2.git
gsdx: enable AVX with GCC
* Use overloaded function instead of specialized template => see http://stackoverflow.com/questions/3052579/explicit-specialization-in-non-namespace-scope * replace _mm256_slli_si128 by _mm256_slli_si256 I hope they're equivalent. I didn't find any info on _mm256_slli_si128, however srl use _mm256_srli_si256
This commit is contained in:
parent
679fa65b84
commit
f25e056914
|
@ -3810,7 +3810,8 @@ public:
|
||||||
|
|
||||||
template<int i> __forceinline GSVector8i sll() const
|
template<int i> __forceinline GSVector8i sll() const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_slli_si128(m, i));
|
return GSVector8i(_mm256_slli_si256(m, i));
|
||||||
|
//return GSVector8i(_mm256_slli_si128(m, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i sra16(int i) const
|
__forceinline GSVector8i sra16(int i) const
|
||||||
|
@ -4260,17 +4261,17 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint8>(const uint8* ptr) const
|
__forceinline GSVector8i gather32_32(const uint8* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint16>(const uint16* ptr) const
|
__forceinline GSVector8i gather32_32(const uint16* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint32>(const uint32* ptr) const
|
__forceinline GSVector8i gather32_32(const uint32* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
||||||
}
|
}
|
||||||
|
@ -4296,12 +4297,12 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint8, uint32>(const uint8* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint32, uint32>(const uint32* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,6 +263,14 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// sse
|
// sse
|
||||||
|
#ifndef _WINDOWS
|
||||||
|
// Convert gcc see define into GSdx (windows) define
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
#define _M_SSE 0x501
|
||||||
|
#elif defined(__AVX__)
|
||||||
|
#define _M_SSE 0x500
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue