add SSE4 support, start with integer multiplication

This commit is contained in:
Benoit Jacob 2009-11-24 15:12:43 -05:00
parent abdb2a2bd5
commit 684d76eba3
4 changed files with 49 additions and 15 deletions

View File

@ -63,31 +63,43 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(NOT EIGEN_TEST_LIB)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
endif(NOT EIGEN_TEST_LIB)
endif()
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
if(EIGEN_TEST_SSE2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
message("Enabling SSE2 in tests/examples")
endif(EIGEN_TEST_SSE2)
endif()
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
message("Enabling SSE3 in tests/examples")
endif(EIGEN_TEST_SSE3)
endif()
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
if(EIGEN_TEST_SSSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
message("Enabling SSSE3 in tests/examples")
endif(EIGEN_TEST_SSSE3)
endif()
option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
message("Enabling SSE4.1 in tests/examples")
endif()
option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
if(EIGEN_TEST_SSE4_2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
message("Enabling SSE4.2 in tests/examples")
endif()
option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
if(EIGEN_TEST_ALTIVEC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
message("Enabling AltiVec in tests/examples")
endif(EIGEN_TEST_ALTIVEC)
endif()
endif(CMAKE_SYSTEM_NAME MATCHES Linux)
endif(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -67,6 +67,12 @@
#ifdef __SSSE3__
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef __SSE4_2__
#include <nmmintrin.h>
#endif
#elif defined __ALTIVEC__
#define EIGEN_VECTORIZE
#define EIGEN_VECTORIZE_ALTIVEC

View File

@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
{
#ifdef __SSE4_1__
return _mm_mullo_epi32(a,b);
#else
// this version is slightly faster than 4 scalar products
return ei_vec4i_swizzle1(
ei_vec4i_swizzle2(
@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
ei_vec4i_swizzle1(b,1,0,3,2)),
0,2,0,2),
0,2,1,3);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }

View File

@ -147,33 +147,45 @@ macro(ei_testing_print_summary)
if(EIGEN_TEST_SSE2)
message("SSE2: ON")
else(EIGEN_TEST_SSE2)
else()
message("SSE2: Using architecture defaults")
endif(EIGEN_TEST_SSE2)
endif()
if(EIGEN_TEST_SSE3)
message("SSE3: ON")
else(EIGEN_TEST_SSE3)
else()
message("SSE3: Using architecture defaults")
endif(EIGEN_TEST_SSE3)
endif()
if(EIGEN_TEST_SSSE3)
message("SSSE3: ON")
else(EIGEN_TEST_SSSE3)
else()
message("SSSE3: Using architecture defaults")
endif(EIGEN_TEST_SSSE3)
endif()
if(EIGEN_TEST_SSE4_1)
message("SSE4.1: ON")
else()
message("SSE4.1: Using architecture defaults")
endif()
if(EIGEN_TEST_SSE4_2)
message("SSE4.2: ON")
else()
message("SSE4.2: Using architecture defaults")
endif()
if(EIGEN_TEST_ALTIVEC)
message("Altivec: Using architecture defaults")
else(EIGEN_TEST_ALTIVEC)
else()
message("Altivec: Using architecture defaults")
endif(EIGEN_TEST_ALTIVEC)
endif()
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
message("Explicit vec: OFF")
else(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
else()
message("Explicit vec: Using architecture defaults")
endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
endif()
message("\n${EIGEN_TESTING_SUMMARY}")
# message("CXX: ${CMAKE_CXX_COMPILER}")