Fixed a typo in the reduction code that could prevent large full reductionsx from running properly on old cuda devices.

This commit is contained in:
Benoit Steiner 2016-02-24 17:07:25 -08:00
parent 7a01cb8e4b
commit c36c09169e

View File

@ -515,7 +515,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
// Use the FullReducer if possible.
if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
(internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) {
(!RunningOnGPU && (internal::array_prod(m_impl.dimensions()) > 1024 * 1024)))) {
bool need_assign = false;
if (!data) {