From 96a24b05cc836072ce0fd2b50c4e94ea652bd1aa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 21 Feb 2016 11:16:15 -0800 Subject: [PATCH 1/2] Optimized casting of tensors in the case where the casting happens to be a no-op --- unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index e254c0b7b..21bb91d69 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -195,8 +195,11 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + if (internal::is_same::value) { + return m_impl.evalSubExprsIfNeeded((SrcType*)data); + } m_impl.evalSubExprsIfNeeded(NULL); return true; } From ed69cbeef00eceb15c77832967d7586bd20d6ef4 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 21 Feb 2016 11:20:20 -0800 Subject: [PATCH 2/2] Added some debugging information to the test to figure out why it fails sometimes --- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 7449d6f8c..98f5ad83d 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -201,6 +201,8 @@ void test_cxx11_tensor_of_float16_cuda() Eigen::GpuDevice device(&stream); if (device.majorDeviceVersion() > 5 || (device.majorDeviceVersion() == 5 && device.minorDeviceVersion() >= 3)) { + std::cout << "Running test on device with capability " << device.majorDeviceVersion() << "." << device.minorDeviceVersion() << std::endl; + CALL_SUBTEST_1(test_cuda_conversion()); CALL_SUBTEST_1(test_cuda_elementwise()); // CALL_SUBTEST_2(test_cuda_contractions());