Merge pull request #333 from PCSX2/linux-avx

Support of AVX build for linux
This commit is contained in:
ramapcsx2 2014-11-08 14:09:06 +01:00
commit b7e5e41afe
5 changed files with 32 additions and 8 deletions

View File

@ -37,6 +37,7 @@ for ARG in "$@"; do
--wx28 ) flags+=(-DWX28_API=TRUE) ;;
--wx30 ) flags+=(-DWX28_API=FALSE) ;;
--64-bit-dont-work ) flags+=(-D64BIT_BUILD_DONT_WORK=TRUE) ;;
--no-simd ) flags+=(-DDISABLE_ADVANCE_SIMD=TRUE) ;;
*)
# Unknown option
@ -58,6 +59,7 @@ for ARG in "$@"; do
echo "--gles : Replace openGL backend of GSdx by openGLES3"
echo
echo "--64-bit-dont-work : Don't use it!"
echo "--no-simd : Only allow sse2"
exit 1
esac
done

View File

@ -64,6 +64,7 @@ option(USE_ASAN "Enable address sanitizer")
# Select the architecture
#-------------------------------------------------------------------------------
option(64BIT_BUILD_DONT_WORK "Enable a x86_64 build instead of cross compiling (WARNING: NOTHING WORK)" OFF)
option(DISABLE_ADVANCE_SIMD "Disable advance use of SIMD (SSE2+ & AVX)" OFF)
# Architecture bitness detection
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
@ -99,7 +100,11 @@ if(_ARCH_64 AND 64BIT_BUILD_DONT_WORK)
# x86_64 requires -fPIC
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(ARCH_FLAG "-m64 -msse -msse2")
if (DISABLE_ADVANCE_SIMD)
set(ARCH_FLAG "-m64 -msse -msse2")
else()
set(ARCH_FLAG "-m64 -march=native -fabi-version=6")
endif()
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
set(_ARCH_64 1)
set(_M_X86 1)
@ -127,7 +132,13 @@ else()
# - Only plugins. No package will link to them.
set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
if (DISABLE_ADVANCE_SIMD)
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
else()
# AVX requires some fix of the ABI (mangling) (default 2)
# Note: V6 requires GCC 4.7
set(ARCH_FLAG "-m32 -march=native -fabi-version=6")
endif()
add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1)
set(_ARCH_32 1)
set(_M_X86 1)

View File

@ -92,6 +92,7 @@ set(GSdxSources
GSDrawScanline.cpp
GSDrawScanlineCodeGenerator.cpp
GSDrawScanlineCodeGenerator.x86.avx.cpp
GSDrawScanlineCodeGenerator.x86.avx2.cpp
GSDrawScanlineCodeGenerator.x64.cpp
GSDrawScanlineCodeGenerator.x86.cpp
GSDrawScanlineCodeGenerator.x64.avx.cpp
@ -109,6 +110,7 @@ set(GSdxSources
GSSetting.cpp
GSSetupPrimCodeGenerator.cpp
GSSetupPrimCodeGenerator.x86.avx.cpp
GSSetupPrimCodeGenerator.x86.avx2.cpp
GSSetupPrimCodeGenerator.x64.avx.cpp
GSSetupPrimCodeGenerator.x86.cpp
GSSetupPrimCodeGenerator.x64.cpp

View File

@ -3810,7 +3810,8 @@ public:
template<int i> __forceinline GSVector8i sll() const
{
return GSVector8i(_mm256_slli_si128(m, i));
return GSVector8i(_mm256_slli_si256(m, i));
//return GSVector8i(_mm256_slli_si128(m, i));
}
__forceinline GSVector8i sra16(int i) const
@ -4260,17 +4261,17 @@ public:
return cast(v0).insert<1>(v1);
}
template<> __forceinline GSVector8i gather32_32<uint8>(const uint8* ptr) const
__forceinline GSVector8i gather32_32(const uint8* ptr) const
{
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
}
template<> __forceinline GSVector8i gather32_32<uint16>(const uint16* ptr) const
__forceinline GSVector8i gather32_32(const uint16* ptr) const
{
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
}
template<> __forceinline GSVector8i gather32_32<uint32>(const uint32* ptr) const
__forceinline GSVector8i gather32_32(const uint32* ptr) const
{
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
}
@ -4296,12 +4297,12 @@ public:
return cast(v0).insert<1>(v1);
}
template<> __forceinline GSVector8i gather32_32<uint8, uint32>(const uint8* ptr1, const uint32* ptr2) const
__forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const
{
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
}
template<> __forceinline GSVector8i gather32_32<uint32, uint32>(const uint32* ptr1, const uint32* ptr2) const
__forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const
{
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
}

View File

@ -263,6 +263,14 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
#endif
// sse
#ifndef _WINDOWS
// Convert gcc see define into GSdx (windows) define
#if defined(__AVX2__)
#define _M_SSE 0x501
#elif defined(__AVX__)
#define _M_SSE 0x500
#endif
#endif
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)