mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-03-13 18:37:27 +08:00
Revert PR-292. After further investigation, the memcpy->memmove change was only good for Haswell on older versions of glibc. Adding a switch for small sizes is perhaps useful for string copies, but also has an overhead for larger sizes, making it a poor trade-off for general memcpy.
This PR also removes a couple of unnecessary semi-colons in Eigen/src/Core/AssignEvaluator.h that caused compiler warning everywhere.
This commit is contained in:
parent
25a1703579
commit
edaa0fc5d1
@ -708,7 +708,7 @@ void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(dst);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(src);
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
};
|
||||
}
|
||||
|
||||
template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -719,7 +719,7 @@ void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::a
|
||||
if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
|
||||
dst.resize(dstRows, dstCols);
|
||||
eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
|
||||
};
|
||||
}
|
||||
|
||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||
|
@ -74,41 +74,6 @@ inline void throw_std_bad_alloc()
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline void fast_memcpy(void* dst, const void* src, size_t size) {
|
||||
#if defined(__CUDA__) || defined(__ANDROID__)
|
||||
::memcpy(dst, src, size);
|
||||
#else
|
||||
switch(size) {
|
||||
// Most compilers will generate inline code for fixed sizes,
|
||||
// which is significantly faster for small copies.
|
||||
case 1: memcpy(dst, src, 1); break;
|
||||
case 2: memcpy(dst, src, 2); break;
|
||||
case 3: memcpy(dst, src, 3); break;
|
||||
case 4: memcpy(dst, src, 4); break;
|
||||
case 5: memcpy(dst, src, 5); break;
|
||||
case 6: memcpy(dst, src, 6); break;
|
||||
case 7: memcpy(dst, src, 7); break;
|
||||
case 8: memcpy(dst, src, 8); break;
|
||||
case 9: memcpy(dst, src, 9); break;
|
||||
case 10: memcpy(dst, src, 10); break;
|
||||
case 11: memcpy(dst, src, 11); break;
|
||||
case 12: memcpy(dst, src, 12); break;
|
||||
case 13: memcpy(dst, src, 13); break;
|
||||
case 14: memcpy(dst, src, 14); break;
|
||||
case 15: memcpy(dst, src, 15); break;
|
||||
case 16: memcpy(dst, src, 16); break;
|
||||
#ifdef EIGEN_OS_LINUX
|
||||
// On Linux, memmove appears to be faster than memcpy for
|
||||
// large sizes, strangely enough.
|
||||
default: memmove(dst, src, size); break;
|
||||
#else
|
||||
default: memcpy(dst, src, size); break;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*** Implementation of handmade aligned functions ***
|
||||
*****************************************************************************/
|
||||
@ -528,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
|
||||
IntPtr size = IntPtr(end)-IntPtr(start);
|
||||
if(size==0) return;
|
||||
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
||||
fast_memcpy(target, start, size);
|
||||
memcpy(target, start, size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -56,7 +56,7 @@ void pack_simple(Scalar * dst, const Scalar * src, Index cols, Index rows, Index
|
||||
} else {
|
||||
// Naive memcpy calls
|
||||
for (Index col = 0; col < cols; ++col) {
|
||||
internal::fast_memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
|
||||
memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ struct DefaultDevice {
|
||||
internal::aligned_free(buffer);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||
internal::fast_memcpy(dst, src, n);
|
||||
::memcpy(dst, src, n);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
||||
memcpy(dst, src, n);
|
||||
|
@ -106,7 +106,7 @@ struct ThreadPoolDevice {
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||
internal::fast_memcpy(dst, src, n);
|
||||
::memcpy(dst, src, n);
|
||||
}
|
||||
EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
||||
memcpy(dst, src, n);
|
||||
|
Loading…
x
Reference in New Issue
Block a user