From e7094883610137e784e845c6e60c3ea920a91deb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 20 Apr 2015 17:39:45 -0700 Subject: [PATCH 1/2] Silenced a few compilation warnings --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index cb95755a7..bfd01ad6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -197,7 +197,7 @@ int get_random_seed() { #else timespec ts; clock_gettime(CLOCK_REALTIME, &ts); - return ts.tv_nsec; + return static_cast(ts.tv_nsec); #endif } } @@ -220,7 +220,7 @@ template class UniformRandomGenerator { return random(); } template - typename internal::packet_traits::type packetOp(Index i, Index j = 0) const { + typename internal::packet_traits::type packetOp(Index, Index = 0) const { const int packetSize = internal::packet_traits::size; EIGEN_ALIGN_DEFAULT T values[packetSize]; for (int i = 0; i < packetSize; ++i) { @@ -252,8 +252,8 @@ template <> class UniformRandomGenerator { typename internal::packet_traits::type packetOp(Index i, Index j = 0) const { const int packetSize = internal::packet_traits::size; EIGEN_ALIGN_DEFAULT float values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = this->operator()(i, j); + for (int k = 0; k < packetSize; ++k) { + values[k] = this->operator()(i, j); } return internal::pload::type>(values); } @@ -285,8 +285,8 @@ template <> class UniformRandomGenerator { typename internal::packet_traits::type packetOp(Index i, Index j = 0) const { const int packetSize = internal::packet_traits::size; EIGEN_ALIGN_DEFAULT double values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = this->operator()(i, j); + for (int k = 0; k < packetSize; ++k) { + values[k] = this->operator()(i, j); } return internal::pload::type>(values); } From dfa991cbae98cde7db5aef5ff1bb4b3d51cc362b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 21 Apr 2015 16:15:45 -0700 Subject: [PATCH 2/2] Make sure that the copy constructor of the evaluator is always called before launching the evaluation of a tensor expression on a cuda device. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index bb2f8b977..02e1667b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -157,7 +157,11 @@ class TensorExecutor template __global__ void __launch_bounds__(1024) -EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) { +EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) { + // Cuda memcopies the kernel arguments. That's fine for POD, but for more + // complex types such as evaluators we should really conform to the C++ + // standard and call a proper copy constructor. + Evaluator eval(memcopied_eval); const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x; @@ -171,7 +175,11 @@ EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) { template __global__ void __launch_bounds__(1024) -EigenMetaKernel_Vectorizable(Evaluator eval, Index size) { +EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { + // Cuda memcopies the kernel arguments. That's fine for POD, but for more + // complex types such as evaluators we should really conform to the C++ + // standard and call a proper copy constructor. + Evaluator eval(memcopied_eval); const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; const Index step_size = blockDim.x * gridDim.x;