mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #333 from PCSX2/linux-avx
Support of AVX build for linux
This commit is contained in:
commit
b7e5e41afe
2
build.sh
2
build.sh
|
@ -37,6 +37,7 @@ for ARG in "$@"; do
|
||||||
--wx28 ) flags+=(-DWX28_API=TRUE) ;;
|
--wx28 ) flags+=(-DWX28_API=TRUE) ;;
|
||||||
--wx30 ) flags+=(-DWX28_API=FALSE) ;;
|
--wx30 ) flags+=(-DWX28_API=FALSE) ;;
|
||||||
--64-bit-dont-work ) flags+=(-D64BIT_BUILD_DONT_WORK=TRUE) ;;
|
--64-bit-dont-work ) flags+=(-D64BIT_BUILD_DONT_WORK=TRUE) ;;
|
||||||
|
--no-simd ) flags+=(-DDISABLE_ADVANCE_SIMD=TRUE) ;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
# Unknown option
|
# Unknown option
|
||||||
|
@ -58,6 +59,7 @@ for ARG in "$@"; do
|
||||||
echo "--gles : Replace openGL backend of GSdx by openGLES3"
|
echo "--gles : Replace openGL backend of GSdx by openGLES3"
|
||||||
echo
|
echo
|
||||||
echo "--64-bit-dont-work : Don't use it!"
|
echo "--64-bit-dont-work : Don't use it!"
|
||||||
|
echo "--no-simd : Only allow sse2"
|
||||||
exit 1
|
exit 1
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
|
@ -64,6 +64,7 @@ option(USE_ASAN "Enable address sanitizer")
|
||||||
# Select the architecture
|
# Select the architecture
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
option(64BIT_BUILD_DONT_WORK "Enable a x86_64 build instead of cross compiling (WARNING: NOTHING WORK)" OFF)
|
option(64BIT_BUILD_DONT_WORK "Enable a x86_64 build instead of cross compiling (WARNING: NOTHING WORK)" OFF)
|
||||||
|
option(DISABLE_ADVANCE_SIMD "Disable advance use of SIMD (SSE2+ & AVX)" OFF)
|
||||||
|
|
||||||
# Architecture bitness detection
|
# Architecture bitness detection
|
||||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
|
@ -99,7 +100,11 @@ if(_ARCH_64 AND 64BIT_BUILD_DONT_WORK)
|
||||||
# x86_64 requires -fPIC
|
# x86_64 requires -fPIC
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
set(ARCH_FLAG "-m64 -msse -msse2")
|
if (DISABLE_ADVANCE_SIMD)
|
||||||
|
set(ARCH_FLAG "-m64 -msse -msse2")
|
||||||
|
else()
|
||||||
|
set(ARCH_FLAG "-m64 -march=native -fabi-version=6")
|
||||||
|
endif()
|
||||||
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
|
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
|
||||||
set(_ARCH_64 1)
|
set(_ARCH_64 1)
|
||||||
set(_M_X86 1)
|
set(_M_X86 1)
|
||||||
|
@ -127,7 +132,13 @@ else()
|
||||||
# - Only plugins. No package will link to them.
|
# - Only plugins. No package will link to them.
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
|
set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
|
||||||
|
|
||||||
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
|
if (DISABLE_ADVANCE_SIMD)
|
||||||
|
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
|
||||||
|
else()
|
||||||
|
# AVX requires some fix of the ABI (mangling) (default 2)
|
||||||
|
# Note: V6 requires GCC 4.7
|
||||||
|
set(ARCH_FLAG "-m32 -march=native -fabi-version=6")
|
||||||
|
endif()
|
||||||
add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1)
|
add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1)
|
||||||
set(_ARCH_32 1)
|
set(_ARCH_32 1)
|
||||||
set(_M_X86 1)
|
set(_M_X86 1)
|
||||||
|
|
|
@ -92,6 +92,7 @@ set(GSdxSources
|
||||||
GSDrawScanline.cpp
|
GSDrawScanline.cpp
|
||||||
GSDrawScanlineCodeGenerator.cpp
|
GSDrawScanlineCodeGenerator.cpp
|
||||||
GSDrawScanlineCodeGenerator.x86.avx.cpp
|
GSDrawScanlineCodeGenerator.x86.avx.cpp
|
||||||
|
GSDrawScanlineCodeGenerator.x86.avx2.cpp
|
||||||
GSDrawScanlineCodeGenerator.x64.cpp
|
GSDrawScanlineCodeGenerator.x64.cpp
|
||||||
GSDrawScanlineCodeGenerator.x86.cpp
|
GSDrawScanlineCodeGenerator.x86.cpp
|
||||||
GSDrawScanlineCodeGenerator.x64.avx.cpp
|
GSDrawScanlineCodeGenerator.x64.avx.cpp
|
||||||
|
@ -109,6 +110,7 @@ set(GSdxSources
|
||||||
GSSetting.cpp
|
GSSetting.cpp
|
||||||
GSSetupPrimCodeGenerator.cpp
|
GSSetupPrimCodeGenerator.cpp
|
||||||
GSSetupPrimCodeGenerator.x86.avx.cpp
|
GSSetupPrimCodeGenerator.x86.avx.cpp
|
||||||
|
GSSetupPrimCodeGenerator.x86.avx2.cpp
|
||||||
GSSetupPrimCodeGenerator.x64.avx.cpp
|
GSSetupPrimCodeGenerator.x64.avx.cpp
|
||||||
GSSetupPrimCodeGenerator.x86.cpp
|
GSSetupPrimCodeGenerator.x86.cpp
|
||||||
GSSetupPrimCodeGenerator.x64.cpp
|
GSSetupPrimCodeGenerator.x64.cpp
|
||||||
|
|
|
@ -3810,7 +3810,8 @@ public:
|
||||||
|
|
||||||
template<int i> __forceinline GSVector8i sll() const
|
template<int i> __forceinline GSVector8i sll() const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_slli_si128(m, i));
|
return GSVector8i(_mm256_slli_si256(m, i));
|
||||||
|
//return GSVector8i(_mm256_slli_si128(m, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i sra16(int i) const
|
__forceinline GSVector8i sra16(int i) const
|
||||||
|
@ -4260,17 +4261,17 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint8>(const uint8* ptr) const
|
__forceinline GSVector8i gather32_32(const uint8* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint16>(const uint16* ptr) const
|
__forceinline GSVector8i gather32_32(const uint16* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint32>(const uint32* ptr) const
|
__forceinline GSVector8i gather32_32(const uint32* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
||||||
}
|
}
|
||||||
|
@ -4296,12 +4297,12 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint8, uint32>(const uint8* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __forceinline GSVector8i gather32_32<uint32, uint32>(const uint32* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
|
@ -263,6 +263,14 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// sse
|
// sse
|
||||||
|
#ifndef _WINDOWS
|
||||||
|
// Convert gcc see define into GSdx (windows) define
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
#define _M_SSE 0x501
|
||||||
|
#elif defined(__AVX__)
|
||||||
|
#define _M_SSE 0x500
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue