diff --git a/build.sh b/build.sh index 863b692097..da75e3f57c 100755 --- a/build.sh +++ b/build.sh @@ -37,6 +37,7 @@ for ARG in "$@"; do --wx28 ) flags+=(-DWX28_API=TRUE) ;; --wx30 ) flags+=(-DWX28_API=FALSE) ;; --64-bit-dont-work ) flags+=(-D64BIT_BUILD_DONT_WORK=TRUE) ;; + --no-simd ) flags+=(-DDISABLE_ADVANCE_SIMD=TRUE) ;; *) # Unknown option @@ -58,6 +59,7 @@ for ARG in "$@"; do echo "--gles : Replace openGL backend of GSdx by openGLES3" echo echo "--64-bit-dont-work : Don't use it!" + echo "--no-simd : Only allow sse2" exit 1 esac done diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index 74495e046e..f75085e5ef 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -64,6 +64,7 @@ option(USE_ASAN "Enable address sanitizer") # Select the architecture #------------------------------------------------------------------------------- option(64BIT_BUILD_DONT_WORK "Enable a x86_64 build instead of cross compiling (WARNING: NOTHING WORK)" OFF) +option(DISABLE_ADVANCE_SIMD "Disable advance use of SIMD (SSE2+ & AVX)" OFF) # Architecture bitness detection if(CMAKE_SIZEOF_VOID_P EQUAL 8) @@ -99,7 +100,11 @@ if(_ARCH_64 AND 64BIT_BUILD_DONT_WORK) # x86_64 requires -fPIC set(CMAKE_POSITION_INDEPENDENT_CODE ON) - set(ARCH_FLAG "-m64 -msse -msse2") + if (DISABLE_ADVANCE_SIMD) + set(ARCH_FLAG "-m64 -msse -msse2") + else() + set(ARCH_FLAG "-m64 -march=native -fabi-version=6") + endif() add_definitions(-D_ARCH_64=1 -D_M_X86=1 -D_M_X86_64=1) set(_ARCH_64 1) set(_M_X86 1) @@ -127,7 +132,13 @@ else() # - Only plugins. No package will link to them. set(CMAKE_POSITION_INDEPENDENT_CODE OFF) - set(ARCH_FLAG "-m32 -msse -msse2 -march=i686") + if (DISABLE_ADVANCE_SIMD) + set(ARCH_FLAG "-m32 -msse -msse2 -march=i686") + else() + # AVX requires some fix of the ABI (mangling) (default 2) + # Note: V6 requires GCC 4.7 + set(ARCH_FLAG "-m32 -march=native -fabi-version=6") + endif() add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1) set(_ARCH_32 1) set(_M_X86 1) diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index f87270b227..e9d41d8f15 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -92,6 +92,7 @@ set(GSdxSources GSDrawScanline.cpp GSDrawScanlineCodeGenerator.cpp GSDrawScanlineCodeGenerator.x86.avx.cpp + GSDrawScanlineCodeGenerator.x86.avx2.cpp GSDrawScanlineCodeGenerator.x64.cpp GSDrawScanlineCodeGenerator.x86.cpp GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -109,6 +110,7 @@ set(GSdxSources GSSetting.cpp GSSetupPrimCodeGenerator.cpp GSSetupPrimCodeGenerator.x86.avx.cpp + GSSetupPrimCodeGenerator.x86.avx2.cpp GSSetupPrimCodeGenerator.x64.avx.cpp GSSetupPrimCodeGenerator.x86.cpp GSSetupPrimCodeGenerator.x64.cpp diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index 4fe05cfea7..a9d21c27c5 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -3810,7 +3810,8 @@ public: template __forceinline GSVector8i sll() const { - return GSVector8i(_mm256_slli_si128(m, i)); + return GSVector8i(_mm256_slli_si256(m, i)); + //return GSVector8i(_mm256_slli_si128(m, i)); } __forceinline GSVector8i sra16(int i) const @@ -4260,17 +4261,17 @@ public: return cast(v0).insert<1>(v1); } - template<> __forceinline GSVector8i gather32_32(const uint8* ptr) const + __forceinline GSVector8i gather32_32(const uint8* ptr) const { return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff(); } - template<> __forceinline GSVector8i gather32_32(const uint16* ptr) const + __forceinline GSVector8i gather32_32(const uint16* ptr) const { return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff(); } - template<> __forceinline GSVector8i gather32_32(const uint32* ptr) const + __forceinline GSVector8i gather32_32(const uint32* ptr) const { return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4)); } @@ -4296,12 +4297,12 @@ public: return cast(v0).insert<1>(v1); } - template<> __forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const + __forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const { return gather32_32(ptr1).gather32_32(ptr2); } - template<> __forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const + __forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const { return gather32_32(ptr1).gather32_32(ptr2); } diff --git a/plugins/GSdx/stdafx.h b/plugins/GSdx/stdafx.h index bec5ec3e00..02626f8dad 100644 --- a/plugins/GSdx/stdafx.h +++ b/plugins/GSdx/stdafx.h @@ -263,6 +263,14 @@ struct aligned_free_second {template void operator()(T& p) {_aligned_fr #endif // sse +#ifndef _WINDOWS +// Convert gcc see define into GSdx (windows) define +#if defined(__AVX2__) + #define _M_SSE 0x501 +#elif defined(__AVX__) + #define _M_SSE 0x500 +#endif +#endif #if !defined(_M_SSE) && (!defined(_WINDOWS) || defined(_M_AMD64) || defined(_M_IX86_FP) && _M_IX86_FP >= 2)