diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 88d485f38..d6af6fedd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -273,7 +273,7 @@ struct TensorEvaluator= nm * nn; // Also do parallel packing if all data fits into L2$. - if (m * bk * sizeof(LhsScalar) + n * bk * sizeof(RhsScalar) <= + if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= l2CacheSize() * num_threads) parallel_pack = true; // But don't do it if we will use each rhs only once. Locality seems to be @@ -361,7 +361,7 @@ struct TensorEvaluator(internal::aligned_malloc( (nm0_ * lhs_size + nn0_ * rhs_size) * std::min(nk_, P - 1))); char* mem = static_cast(packed_mem_); - for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { + for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { packed_lhs_[x].resize(nm0_); for (Index m = 0; m < nm0_; m++) { packed_lhs_[x][m] = reinterpret_cast(mem);