Merged in ezhulenev/eigen/moar_eigen_fixes_1 (pull request PR-492)

Explicitly construct tensor block dimensions from evaluator dimensions
2024-12-21 07:19:46 +08:00 · 2018-09-15 01:36:21 +00:00 · 2018-09-15 01:36:21 +00:00 · 601e289d27
commit 601e289d27
parent 14e35855e1 4863375723
2 changed files with 5 additions and 4 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -282,8 +282,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
      TensorOpCost cost = evaluator.costPerCoeff(Vectorizable);
      double taskSize = TensorCostModel<ThreadPoolDevice>::taskSize(1, cost);
      size_t block_size = static_cast<size_t>(1.0 / taskSize);
-      TensorBlockMapper block_mapper(evaluator.dimensions(), block_shape,
-                                     block_size);
+      TensorBlockMapper block_mapper(
+          typename TensorBlockMapper::Dimensions(evaluator.dimensions()),
+          block_shape, block_size);
      block_size = block_mapper.block_dims_total_size();
      const size_t aligned_blocksize =
          EIGEN_MAX_ALIGN_BYTES *
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -976,7 +976,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
        // find that scattered reads are not worth supporting in
        // TensorSliceBlockMapper.
        TensorSliceBlockMapper block_mapper(
-            input_tensor_dims, tensor_slice_offsets, tensor_slice_extents,
+            typename TensorSliceBlockMapper::Dimensions(input_tensor_dims),
+            tensor_slice_offsets, tensor_slice_extents,
            target_input_block_sizes, DimensionList<Index, NumInputDims>());

        const Index num_outputs_to_update =
@ -1232,7 +1233,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
      } else if (!first_preserved_dim_allocated) {
        // TODO(andydavis) Include output block size in this L1 working set
        // calculation.
-        const Index allocated = max_coeff_count - coeff_to_allocate;
        const Index alloc_size = numext::maxi(
            static_cast<Index>(1), coeff_to_allocate / reducer_overhead);
        (*target_input_block_sizes)[dim] =