From f422668d39c51a7281e55404cfed4a262747eb51 Mon Sep 17 00:00:00 2001 From: Marton Danoczy Date: Fri, 4 Nov 2011 16:37:10 +0100 Subject: [PATCH] Patches to support ARM NEON with Clang 3.0 and LLVM-GCC --- Eigen/src/Core/arch/NEON/Complex.h | 4 +- Eigen/src/Core/arch/NEON/PacketMath.h | 56 +++++++++++++++++---------- Eigen/src/Core/util/Macros.h | 2 +- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 8e55548c9..212887184 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -27,8 +27,8 @@ namespace internal { -static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 }; +static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); +static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); //---------- float ---------- struct Packet2cf diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 478ef8038..6c7cd1590 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -52,6 +52,16 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) +#if defined(__llvm__) && !defined(__clang__) + //Special treatment for Apple's llvm-gcc, its NEON packet types are unions + #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} + #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} +#else + //Default initializer for packets + #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} + #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} +#endif + #ifndef __pld #define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); #endif @@ -84,7 +94,7 @@ template<> struct packet_traits : default_packet_traits }; }; -#if EIGEN_GNUC_AT_MOST(4,4) +#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__) // workaround gcc 4.2, 4.3 and 4.4 compilatin issue EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } @@ -100,12 +110,12 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - Packet4f countdown = { 0, 1, 2, 3 }; + Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); return vaddq_f32(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { - Packet4i countdown = { 0, 1, 2, 3 }; + Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); return vaddq_s32(pset1(a), countdown); } @@ -395,25 +405,29 @@ template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) return s[0]; } -template -struct palign_impl -{ - EIGEN_STRONG_INLINE static void run(Packet4f& first, const Packet4f& second) - { - if (Offset!=0) - first = vextq_f32(first, second, Offset); - } -}; +// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, +// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 +#define PALIGN_NEON(Offset,Type,Command) \ +template<>\ +struct palign_impl\ +{\ + EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\ + {\ + if (Offset!=0)\ + first = Command(first, second, Offset);\ + }\ +};\ -template -struct palign_impl -{ - EIGEN_STRONG_INLINE static void run(Packet4i& first, const Packet4i& second) - { - if (Offset!=0) - first = vextq_s32(first, second, Offset); - } -}; +PALIGN_NEON(0,Packet4f,vextq_f32) +PALIGN_NEON(1,Packet4f,vextq_f32) +PALIGN_NEON(2,Packet4f,vextq_f32) +PALIGN_NEON(3,Packet4f,vextq_f32) +PALIGN_NEON(0,Packet4i,vextq_s32) +PALIGN_NEON(1,Packet4i,vextq_s32) +PALIGN_NEON(2,Packet4i,vextq_s32) +PALIGN_NEON(3,Packet4i,vextq_s32) + +#undef PALIGN_NEON } // end namespace internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 2b3810bcc..9676ef10e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -45,7 +45,7 @@ #define EIGEN_GNUC_AT_MOST(x,y) 0 #endif -#if EIGEN_GNUC_AT_MOST(4,3) +#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__) // see bug 89 #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 #else