mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
Don't crash when attempting to reduce empty tensors.
This commit is contained in:
parent
a792cd357d
commit
2dde1b1028
@ -238,7 +238,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable>::run(
|
||||
device.maxCudaThreadsPerMultiProcessor() / block_size;
|
||||
const Index size = array_prod(evaluator.dimensions());
|
||||
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, divup<int>(size, block_size)), 1);
|
||||
|
||||
LAUNCH_CUDA_KERNEL(
|
||||
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, Index>),
|
||||
|
@ -24,9 +24,17 @@ const T2& choose(Cond<false>, const T1&, const T2& second) {
|
||||
return second;
|
||||
}
|
||||
|
||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
|
||||
template <typename T, typename X, typename Y>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T divup(const X x, const Y y) {
|
||||
return static_cast<T>((x + y - 1) / y);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T divup(const T x, const T y) {
|
||||
return (x + y - 1) / y;
|
||||
return static_cast<T>((x + y - 1) / y);
|
||||
}
|
||||
|
||||
template <size_t n> struct max_n_1 {
|
||||
|
@ -134,9 +134,14 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
typedef typename Self::Index Index;
|
||||
|
||||
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
// Don't crash when we're called with an input tensor of size 0.
|
||||
if (num_coeffs == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int block_size = 256;
|
||||
const int num_per_thread = 128;
|
||||
const int num_blocks = numext::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
|
||||
const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
|
||||
|
||||
if (num_blocks > 1) {
|
||||
// We initialize the outputs outside the reduction kernel when we can't be sure that there
|
||||
|
Loading…
Reference in New Issue
Block a user