mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-02-17 18:09:55 +08:00
Provide "eigen" defines to decide which instruction set is used
(sse3, ssse3 and sse4), independantly from the compiler. Only those defines should be used in other places, and the user can rely on those to know which sets are used.
This commit is contained in:
parent
7c98c04412
commit
0f3d69b65e
35
Eigen/Core
35
Eigen/Core
@ -61,20 +61,45 @@
|
||||
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
|
||||
|
||||
// Defines symbols for compile-time detection of which instructions are
|
||||
// used.
|
||||
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_SSE
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE2
|
||||
|
||||
// Detect sse3/ssse3/sse4:
|
||||
// gcc and icc defines __SSE3__, ..,
|
||||
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
|
||||
// want to force the use of those instructions with msvc.
|
||||
#ifdef __SSE3__
|
||||
#include <pmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE3
|
||||
#endif
|
||||
#ifdef __SSSE3__
|
||||
#include <tmmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSSE3
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
|
||||
// include files
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_2
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#elif defined __ALTIVEC__
|
||||
|
@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_mullo_epi32(a,b);
|
||||
#else
|
||||
// this version is slightly faster than 4 scalar products
|
||||
@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
|
||||
{
|
||||
#ifdef __SSSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
return _mm_abs_epi32(a);
|
||||
#else
|
||||
Packet4i aux = _mm_srai_epi32(a,31);
|
||||
@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
|
||||
}
|
||||
|
||||
|
||||
#ifdef __SSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
// TODO implement SSE2 versions as well as integer versions
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
|
||||
{
|
||||
@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
|
||||
// }
|
||||
#endif
|
||||
|
||||
#ifdef __SSSE3__
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
// SSSE3 versions
|
||||
template<int Offset>
|
||||
struct ei_palign_impl<Offset,Packet4f>
|
||||
|
Loading…
Reference in New Issue
Block a user