diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index dda89897a..9027acee7 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -127,7 +127,7 @@ template class ei_matrix_storage inline explicit ei_matrix_storage() : m_data(0), m_rows(0), m_cols(0) {} inline ei_matrix_storage(int size, int rows, int cols) : m_data(ei_aligned_malloc(size)), m_rows(rows), m_cols(cols) {} - inline ~ei_matrix_storage() { ei_aligned_free(m_data); } + inline ~ei_matrix_storage() { ei_aligned_free(m_data, m_rows*m_cols); } inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } inline int rows(void) const {return m_rows;} @@ -136,7 +136,7 @@ template class ei_matrix_storage { if(size != m_rows*m_cols) { - ei_aligned_free(m_data); + ei_aligned_free(m_data, m_rows*m_cols); m_data = ei_aligned_malloc(size); } m_rows = rows; @@ -154,7 +154,7 @@ template class ei_matrix_storage(size)), m_cols(cols) {} - inline ~ei_matrix_storage() { ei_aligned_free(m_data); } + inline ~ei_matrix_storage() { ei_aligned_free(m_data, _Rows*m_cols); } inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } inline static int rows(void) {return _Rows;} inline int cols(void) const {return m_cols;} @@ -162,7 +162,7 @@ template class ei_matrix_storage(size); } m_cols = cols; @@ -179,7 +179,7 @@ template class ei_matrix_storage(size)), m_rows(rows) {} - inline ~ei_matrix_storage() { ei_aligned_free(m_data); } + inline ~ei_matrix_storage() { ei_aligned_free(m_data, _Cols*m_rows); } inline void swap(ei_matrix_storage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } inline int rows(void) const {return m_rows;} inline static int cols(void) {return _Cols;} @@ -187,7 +187,7 @@ template class ei_matrix_storage(size); } m_rows = rows; diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 6e9f9987c..36dd87632 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -26,7 +26,7 @@ #ifndef EIGEN_MEMORY_H #define EIGEN_MEMORY_H -#if defined(EIGEN_VECTORIZE) && !defined(_MSC_VER) +#ifdef __linux // it seems we cannot assume posix_memalign is defined in the stdlib header extern "C" int posix_memalign (void **, size_t, size_t) throw (); #endif @@ -40,13 +40,8 @@ template struct ei_aligned_array ei_aligned_array() { - #ifdef EIGEN_VECTORIZE // we only want this assertion if EIGEN_VECTORIZE is defined. - // indeed, if it's not defined then WithAlignedOperatorNew is empty and hence there's not much point - // requiring the user to inherit it! Would be best practice, but we already decided at several places - // to only do special alignment if vectorization is enabled. ei_assert((reinterpret_cast(array) & 0xf) == 0 && "this assertion is explained here: http://eigen.tuxfamily.org/api/UnalignedArrayAssert.html **** READ THIS WEB PAGE !!! ****"); - #endif } }; @@ -69,59 +64,64 @@ template<> struct ei_force_aligned_malloc { enum template inline T* ei_aligned_malloc(size_t size) { - T* result; - - #ifdef EIGEN_VECTORIZE if(ei_packet_traits::size>1 || ei_force_aligned_malloc::ret) { + void *void_result; #ifdef __linux #ifdef EIGEN_EXCEPTIONS const int failed = #endif - posix_memalign(reinterpret_cast(&result), 16, size*sizeof(T)); + posix_memalign(&void_result, 16, size*sizeof(T)); #else #ifdef _MSC_VER - result = static_cast(_aligned_malloc(size*sizeof(T), 16)); + void_result = _aligned_malloc(size*sizeof(T), 16); #else - result = static_cast(_mm_malloc(size*sizeof(T),16)); + void_result = _mm_malloc(size*sizeof(T), 16); #endif - #ifdef EIGEN_EXCEPTIONS - const int failed = (result == 0); + const int failed = (void_result == 0); #endif #endif #ifdef EIGEN_EXCEPTIONS if(failed) throw std::bad_alloc(); #endif + // if the user uses Eigen on some fancy scalar type such as multiple-precision numbers, + // and this type has a custom operator new, then we want to honor this operator new! + // so when we use C functions to allocate memory, we must be careful to call manually the constructor using + // the special placement-new syntax. + return new(void_result) T[size]; } else - #endif - result = new T[size]; // here we really want a new, not a malloc. Justification: if the user uses Eigen on + return new T[size]; // here we really want a new, not a malloc. Justification: if the user uses Eigen on // some fancy scalar type such as multiple-precision numbers, and this type has a custom operator new, // then we want to honor this operator new! Anyway this type won't have vectorization so the vectorizing path // is irrelevant here. Yes, we should say somewhere in the docs that if the user uses a custom scalar type then // he can't have both vectorization and a custom operator new on his scalar type. - - return result; } -/** \internal free memory allocated with ei_aligned_malloc */ +/** \internal free memory allocated with ei_aligned_malloc + * The \a size parameter is used to determine on how many elements to call the destructor. If you don't + * want any destructor to be called, just pass 0. + */ template -inline void ei_aligned_free(T* ptr) +inline void ei_aligned_free(T* ptr, size_t size) { - #ifdef EIGEN_VECTORIZE if (ei_packet_traits::size>1 || ei_force_aligned_malloc::ret) - #if defined(__linux) - free(ptr); - #elif defined(_MSC_VER) - _aligned_free(ptr); - #else - _mm_free(ptr); - #endif + { + // need to call manually the dtor in case T is some user-defined fancy numeric type. + // always destruct an array starting from the end. + while(size) ptr[--size].~T(); + #if defined(__linux) + free(ptr); + #elif defined(_MSC_VER) + _aligned_free(ptr); + #else + _mm_free(ptr); + #endif + } else - #endif - delete[] ptr; + delete[] ptr; } /** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */ @@ -153,10 +153,10 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) #define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \ ? ei_aligned_malloc(SIZE) \ : (TYPE*)alloca(sizeof(TYPE)*(SIZE))) - #define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR) + #define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE) #else #define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc(SIZE) - #define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR) + #define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE) #endif /** \class WithAlignedOperatorNew @@ -200,8 +200,6 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) */ struct WithAlignedOperatorNew { - #ifdef EIGEN_VECTORIZE - void *operator new(size_t size) throw() { return ei_aligned_malloc(size); @@ -212,10 +210,8 @@ struct WithAlignedOperatorNew return ei_aligned_malloc(size); } - void operator delete(void * ptr) { ei_aligned_free(static_cast(ptr)); } - void operator delete[](void * ptr) { ei_aligned_free(static_cast(ptr)); } - - #endif + void operator delete(void * ptr) { ei_aligned_free(static_cast(ptr), 0); } + void operator delete[](void * ptr) { ei_aligned_free(static_cast(ptr), 0); } }; template::gene_vector gene_vector; static void free_matrix(gene_matrix & A, int N){ - ei_aligned_free(A); + ei_aligned_free(A, 0); } static void free_vector(gene_vector & B){ - ei_aligned_free(B); + ei_aligned_free(B, 0); } static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ diff --git a/test/map.cpp b/test/map.cpp index ad0920139..83953b698 100644 --- a/test/map.cpp +++ b/test/map.cpp @@ -45,8 +45,8 @@ template void map_class(const VectorType& m) VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - ei_aligned_free(array1); - ei_aligned_free(array2); + ei_aligned_free(array1, size); + ei_aligned_free(array2, size); delete[] array3; } @@ -71,8 +71,8 @@ template void map_static_methods(const VectorType& m) VERIFY_IS_APPROX(ma1, ma2); VERIFY_IS_APPROX(ma1, ma3); - ei_aligned_free(array1); - ei_aligned_free(array2); + ei_aligned_free(array1, size); + ei_aligned_free(array2, size); delete[] array3; }