mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 19:40:45 +08:00
Enable FMA with MSVC (through /arch:AVX2). To make this possible, I also has to turn the #warning regarding AVX512-FMA to a #error.
This commit is contained in:
parent
f233c6194d
commit
7b6d0ff1f6
@ -250,7 +250,7 @@ if(NOT MSVC)
|
||||
|
||||
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX512)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -DEIGEN_ENABLE_AVX512")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -DEIGEN_ENABLE_AVX512")
|
||||
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6")
|
||||
endif()
|
||||
@ -350,6 +350,19 @@ else(NOT MSVC)
|
||||
endif(NOT CMAKE_CL_64)
|
||||
message(STATUS "Enabling SSE2 in tests/examples")
|
||||
endif(EIGEN_TEST_SSE2)
|
||||
|
||||
option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
|
||||
if(EIGEN_TEST_AVX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
|
||||
message(STATUS "Enabling AVX in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
||||
message(STATUS "Enabling FMA/AVX2 in tests/examples")
|
||||
endif()
|
||||
|
||||
endif(NOT MSVC)
|
||||
|
||||
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
|
||||
|
@ -181,7 +181,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
||||
return pset1<Packet8i>(0);
|
||||
}
|
||||
|
||||
#ifdef __FMA__
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
|
@ -266,7 +266,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
#ifdef __FMA__
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
@ -1013,7 +1013,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
|
||||
}
|
||||
|
||||
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
||||
#ifdef __FMA__
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
||||
return ::fmaf(a,b,c);
|
||||
}
|
||||
|
@ -250,15 +250,17 @@
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
|
||||
// MSVC does not expose a switch dedicated for FMA
|
||||
// For MSVC, AVX2 => FMA
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
#if defined(__AVX512F__)
|
||||
#ifndef __FMA__
|
||||
#if EIGEN_COMP_GNUC
|
||||
#warning Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
|
||||
#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
|
||||
#else
|
||||
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
|
||||
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
|
||||
#endif
|
||||
#endif
|
||||
#define EIGEN_VECTORIZE_AVX512
|
||||
|
Loading…
x
Reference in New Issue
Block a user