mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-11-21 03:11:25 +08:00
add SSE4 support, start with integer multiplication
This commit is contained in:
parent
abdb2a2bd5
commit
684d76eba3
@ -63,31 +63,43 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
if(NOT EIGEN_TEST_LIB)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
|
||||
endif(NOT EIGEN_TEST_LIB)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_SSE2)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
|
||||
message("Enabling SSE2 in tests/examples")
|
||||
endif(EIGEN_TEST_SSE2)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_SSE3)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
|
||||
message("Enabling SSE3 in tests/examples")
|
||||
endif(EIGEN_TEST_SSE3)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_SSSE3)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
|
||||
message("Enabling SSSE3 in tests/examples")
|
||||
endif(EIGEN_TEST_SSSE3)
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_SSE4_1)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
|
||||
message("Enabling SSE4.1 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
|
||||
if(EIGEN_TEST_SSE4_2)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||
message("Enabling SSE4.2 in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
|
||||
message("Enabling AltiVec in tests/examples")
|
||||
endif(EIGEN_TEST_ALTIVEC)
|
||||
endif()
|
||||
|
||||
endif(CMAKE_SYSTEM_NAME MATCHES Linux)
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
@ -67,6 +67,12 @@
|
||||
#ifdef __SSSE3__
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#ifdef __SSE4_2__
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
#elif defined __ALTIVEC__
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_ALTIVEC
|
||||
|
@ -118,6 +118,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef __SSE4_1__
|
||||
return _mm_mullo_epi32(a,b);
|
||||
#else
|
||||
// this version is slightly faster than 4 scalar products
|
||||
return ei_vec4i_swizzle1(
|
||||
ei_vec4i_swizzle2(
|
||||
@ -126,6 +129,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, con
|
||||
ei_vec4i_swizzle1(b,1,0,3,2)),
|
||||
0,2,0,2),
|
||||
0,2,1,3);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f ei_pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
|
||||
|
@ -147,33 +147,45 @@ macro(ei_testing_print_summary)
|
||||
|
||||
if(EIGEN_TEST_SSE2)
|
||||
message("SSE2: ON")
|
||||
else(EIGEN_TEST_SSE2)
|
||||
else()
|
||||
message("SSE2: Using architecture defaults")
|
||||
endif(EIGEN_TEST_SSE2)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_SSE3)
|
||||
message("SSE3: ON")
|
||||
else(EIGEN_TEST_SSE3)
|
||||
else()
|
||||
message("SSE3: Using architecture defaults")
|
||||
endif(EIGEN_TEST_SSE3)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_SSSE3)
|
||||
message("SSSE3: ON")
|
||||
else(EIGEN_TEST_SSSE3)
|
||||
else()
|
||||
message("SSSE3: Using architecture defaults")
|
||||
endif(EIGEN_TEST_SSSE3)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_SSE4_1)
|
||||
message("SSE4.1: ON")
|
||||
else()
|
||||
message("SSE4.1: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_SSE4_2)
|
||||
message("SSE4.2: ON")
|
||||
else()
|
||||
message("SSE4.2: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_ALTIVEC)
|
||||
message("Altivec: Using architecture defaults")
|
||||
else(EIGEN_TEST_ALTIVEC)
|
||||
else()
|
||||
message("Altivec: Using architecture defaults")
|
||||
endif(EIGEN_TEST_ALTIVEC)
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||
message("Explicit vec: OFF")
|
||||
else(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||
else()
|
||||
message("Explicit vec: Using architecture defaults")
|
||||
endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
|
||||
endif()
|
||||
|
||||
message("\n${EIGEN_TESTING_SUMMARY}")
|
||||
# message("CXX: ${CMAKE_CXX_COMPILER}")
|
||||
|
Loading…
Reference in New Issue
Block a user