2
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2025-04-24 19:40:45 +08:00

Enable FMA with MSVC (through /arch:AVX2). To make this possible, I also has to turn the #warning regarding AVX512-FMA to a #error.

This commit is contained in:
Gael Guennebaud 2018-12-07 15:14:50 +01:00
parent f233c6194d
commit 7b6d0ff1f6
4 changed files with 22 additions and 7 deletions

@ -250,7 +250,7 @@ if(NOT MSVC)
option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF)
if(EIGEN_TEST_AVX512)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -DEIGEN_ENABLE_AVX512")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -DEIGEN_ENABLE_AVX512")
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fabi-version=6")
endif()
@ -350,6 +350,19 @@ else(NOT MSVC)
endif(NOT CMAKE_CL_64)
message(STATUS "Enabling SSE2 in tests/examples")
endif(EIGEN_TEST_SSE2)
option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF)
if(EIGEN_TEST_AVX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX")
message(STATUS "Enabling AVX in tests/examples")
endif()
option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF)
if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
message(STATUS "Enabling FMA/AVX2 in tests/examples")
endif()
endif(NOT MSVC)
option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)

@ -181,7 +181,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
return pset1<Packet8i>(0);
}
#ifdef __FMA__
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,

@ -266,7 +266,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
#ifdef __FMA__
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
#endif
@ -1013,7 +1013,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
}
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
#ifdef __FMA__
#ifdef EIGEN_VECTORIZE_FMA
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
return ::fmaf(a,b,c);
}

@ -250,15 +250,17 @@
#define EIGEN_VECTORIZE_SSE4_1
#define EIGEN_VECTORIZE_SSE4_2
#endif
#ifdef __FMA__
#if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
// MSVC does not expose a switch dedicated for FMA
// For MSVC, AVX2 => FMA
#define EIGEN_VECTORIZE_FMA
#endif
#if defined(__AVX512F__)
#ifndef __FMA__
#if EIGEN_COMP_GNUC
#warning Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
#error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
#else
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
#error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
#endif
#endif
#define EIGEN_VECTORIZE_AVX512