Provide "eigen" defines to decide which instruction set is used

(sse3, ssse3 and sse4), independantly from the compiler.
Only those defines should be used in other places, and the user can
rely on those to know which sets are used.
This commit is contained in:
Thomas Capricelli 2010-02-24 21:43:30 +01:00
parent 7c98c04412
commit 0f3d69b65e
2 changed files with 34 additions and 9 deletions

View File

@ -61,20 +61,45 @@
#ifndef EIGEN_DONT_VECTORIZE
#if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
// Defines symbols for compile-time detection of which instructions are
// used.
// EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_SSE
#include <emmintrin.h>
#include <xmmintrin.h>
#define EIGEN_VECTORIZE_SSE2
// Detect sse3/ssse3/sse4:
// gcc and icc defines __SSE3__, ..,
// there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
// want to force the use of those instructions with msvc.
#ifdef __SSE3__
#include <pmmintrin.h>
#define EIGEN_VECTORIZE_SSE3
#endif
#ifdef __SSSE3__
#include <tmmintrin.h>
#define EIGEN_VECTORIZE_SSSE3
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#define EIGEN_VECTORIZE_SSE4_1
#endif
#ifdef __SSE4_2__
#define EIGEN_VECTORIZE_SSE4_2
#endif
// include files
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef EIGEN_VECTORIZE_SSE3
#include <pmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
#include <tmmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
#include <smmintrin.h>
#endif
#ifdef EIGEN_VECTORIZE_SSE4_2
#include <nmmintrin.h>
#endif
#elif defined __ALTIVEC__

View File

@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
#ifdef __SSE4_1__
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_mullo_epi32(a,b);
#else
// this version is slightly faster than 4 scalar products
@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
}
template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
{
#ifdef __SSSE3__
#ifdef EIGEN_VECTORIZE_SSSE3
return _mm_abs_epi32(a);
#else
Packet4i aux = _mm_srai_epi32(a,31);
@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
}
#ifdef __SSE3__
#ifdef EIGEN_VECTORIZE_SSE3
// TODO implement SSE2 versions as well as integer versions
template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
{
@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
// }
#endif
#ifdef __SSSE3__
#ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions
template<int Offset>
struct ei_palign_impl<Offset,Packet4f>