mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #333 from PCSX2/linux-avx
Support of AVX build for linux
This commit is contained in:
commit
b7e5e41afe
2
build.sh
2
build.sh
|
@ -37,6 +37,7 @@ for ARG in "$@"; do
|
|||
--wx28 ) flags+=(-DWX28_API=TRUE) ;;
|
||||
--wx30 ) flags+=(-DWX28_API=FALSE) ;;
|
||||
--64-bit-dont-work ) flags+=(-D64BIT_BUILD_DONT_WORK=TRUE) ;;
|
||||
--no-simd ) flags+=(-DDISABLE_ADVANCE_SIMD=TRUE) ;;
|
||||
|
||||
*)
|
||||
# Unknown option
|
||||
|
@ -58,6 +59,7 @@ for ARG in "$@"; do
|
|||
echo "--gles : Replace openGL backend of GSdx by openGLES3"
|
||||
echo
|
||||
echo "--64-bit-dont-work : Don't use it!"
|
||||
echo "--no-simd : Only allow sse2"
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
|
|
@ -64,6 +64,7 @@ option(USE_ASAN "Enable address sanitizer")
|
|||
# Select the architecture
|
||||
#-------------------------------------------------------------------------------
|
||||
option(64BIT_BUILD_DONT_WORK "Enable a x86_64 build instead of cross compiling (WARNING: NOTHING WORK)" OFF)
|
||||
option(DISABLE_ADVANCE_SIMD "Disable advance use of SIMD (SSE2+ & AVX)" OFF)
|
||||
|
||||
# Architecture bitness detection
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
|
@ -99,7 +100,11 @@ if(_ARCH_64 AND 64BIT_BUILD_DONT_WORK)
|
|||
# x86_64 requires -fPIC
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(ARCH_FLAG "-m64 -msse -msse2")
|
||||
if (DISABLE_ADVANCE_SIMD)
|
||||
set(ARCH_FLAG "-m64 -msse -msse2")
|
||||
else()
|
||||
set(ARCH_FLAG "-m64 -march=native -fabi-version=6")
|
||||
endif()
|
||||
add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1)
|
||||
set(_ARCH_64 1)
|
||||
set(_M_X86 1)
|
||||
|
@ -127,7 +132,13 @@ else()
|
|||
# - Only plugins. No package will link to them.
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE OFF)
|
||||
|
||||
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
|
||||
if (DISABLE_ADVANCE_SIMD)
|
||||
set(ARCH_FLAG "-m32 -msse -msse2 -march=i686")
|
||||
else()
|
||||
# AVX requires some fix of the ABI (mangling) (default 2)
|
||||
# Note: V6 requires GCC 4.7
|
||||
set(ARCH_FLAG "-m32 -march=native -fabi-version=6")
|
||||
endif()
|
||||
add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1)
|
||||
set(_ARCH_32 1)
|
||||
set(_M_X86 1)
|
||||
|
|
|
@ -92,6 +92,7 @@ set(GSdxSources
|
|||
GSDrawScanline.cpp
|
||||
GSDrawScanlineCodeGenerator.cpp
|
||||
GSDrawScanlineCodeGenerator.x86.avx.cpp
|
||||
GSDrawScanlineCodeGenerator.x86.avx2.cpp
|
||||
GSDrawScanlineCodeGenerator.x64.cpp
|
||||
GSDrawScanlineCodeGenerator.x86.cpp
|
||||
GSDrawScanlineCodeGenerator.x64.avx.cpp
|
||||
|
@ -109,6 +110,7 @@ set(GSdxSources
|
|||
GSSetting.cpp
|
||||
GSSetupPrimCodeGenerator.cpp
|
||||
GSSetupPrimCodeGenerator.x86.avx.cpp
|
||||
GSSetupPrimCodeGenerator.x86.avx2.cpp
|
||||
GSSetupPrimCodeGenerator.x64.avx.cpp
|
||||
GSSetupPrimCodeGenerator.x86.cpp
|
||||
GSSetupPrimCodeGenerator.x64.cpp
|
||||
|
|
|
@ -3810,7 +3810,8 @@ public:
|
|||
|
||||
template<int i> __forceinline GSVector8i sll() const
|
||||
{
|
||||
return GSVector8i(_mm256_slli_si128(m, i));
|
||||
return GSVector8i(_mm256_slli_si256(m, i));
|
||||
//return GSVector8i(_mm256_slli_si128(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector8i sra16(int i) const
|
||||
|
@ -4260,17 +4261,17 @@ public:
|
|||
return cast(v0).insert<1>(v1);
|
||||
}
|
||||
|
||||
template<> __forceinline GSVector8i gather32_32<uint8>(const uint8* ptr) const
|
||||
__forceinline GSVector8i gather32_32(const uint8* ptr) const
|
||||
{
|
||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
||||
}
|
||||
|
||||
template<> __forceinline GSVector8i gather32_32<uint16>(const uint16* ptr) const
|
||||
__forceinline GSVector8i gather32_32(const uint16* ptr) const
|
||||
{
|
||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
||||
}
|
||||
|
||||
template<> __forceinline GSVector8i gather32_32<uint32>(const uint32* ptr) const
|
||||
__forceinline GSVector8i gather32_32(const uint32* ptr) const
|
||||
{
|
||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
||||
}
|
||||
|
@ -4296,12 +4297,12 @@ public:
|
|||
return cast(v0).insert<1>(v1);
|
||||
}
|
||||
|
||||
template<> __forceinline GSVector8i gather32_32<uint8, uint32>(const uint8* ptr1, const uint32* ptr2) const
|
||||
__forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const
|
||||
{
|
||||
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
||||
}
|
||||
|
||||
template<> __forceinline GSVector8i gather32_32<uint32, uint32>(const uint32* ptr1, const uint32* ptr2) const
|
||||
__forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const
|
||||
{
|
||||
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
||||
}
|
||||
|
|
|
@ -263,6 +263,14 @@ struct aligned_free_second {template<class T> void operator()(T& p) {_aligned_fr
|
|||
#endif
|
||||
|
||||
// sse
|
||||
#ifndef _WINDOWS
|
||||
// Convert gcc see define into GSdx (windows) define
|
||||
#if defined(__AVX2__)
|
||||
#define _M_SSE 0x501
|
||||
#elif defined(__AVX__)
|
||||
#define _M_SSE 0x500
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
||||
|
||||
|
|
Loading…
Reference in New Issue