Don't crash when attempting to reduce empty tensors.

This commit is contained in:
Benoit Steiner 2016-04-20 18:08:20 -07:00
parent a792cd357d
commit 2dde1b1028
3 changed files with 17 additions and 4 deletions

View File

@ -238,7 +238,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
device.maxCudaThreadsPerMultiProcessor() / block_size;
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
LAUNCH_CUDA_KERNEL(
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>),

View File

@ -24,9 +24,17 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
return second;
}
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
template <typename T, typename X, typename Y>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T divup(const X x, const Y y) {
return static_cast<T>((x + y - 1) / y);
}
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T divup(const T x, const T y) {
return (x + y - 1) / y;
return static_cast<T>((x + y - 1) / y);
}
template <size_t n> struct max_n_1 {

View File

@ -134,9 +134,14 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
typedef typename Self::Index Index;
const Index num_coeffs = array_prod(self.m_impl.dimensions());
// Don't crash when we're called with an input tensor of size 0.
if (num_coeffs == 0) {
return;
}
const int block_size = 256;
const int num_per_thread = 128;
const int num_blocks = numext::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
if (num_blocks > 1) {
// We initialize the outputs outside the reduction kernel when we can't be sure that there