Added benchmarks for full reduction

2024-12-15 07:10:37 +08:00 · 2016-02-29 14:57:52 -08:00 · 2016-02-29 14:57:52 -08:00 · 56a3ada670
commit 56a3ada670
parent b2075cb7a2
2 changed files with 25 additions and 2 deletions
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
    input_size[0] = k_;
    input_size[1] = n_;
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{n_}};
+    Eigen::array<TensorIndex, 1> output_size;
    output_size[0] = n_;
    TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
 #ifndef EIGEN_HAS_INDEX_LIST
@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
    input_size[1] = n_;
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
        b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{k_}};
+    Eigen::array<TensorIndex, 1> output_size;
    output_size[0] = k_;
    TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
        c_, output_size);
@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
    finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
  }
  // Full reduction
  void fullReduction(int num_iters) {
    Eigen::array<TensorIndex, 2> input_size;
    input_size[0] = k_;
    input_size[1] = n_;
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
        b_, input_size);
    const Eigen::array<TensorIndex, 0> output_size;
    TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
        c_, output_size);
    StartBenchmarkTiming();
    for (int iter = 0; iter < num_iters; ++iter) {
      C.device(device_) = B.sum();
    }
    // Record the number of FLOP executed per second (assuming one operation
    // per value)
    finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
  }
  // do a contraction which is equivalent to a matrix multiplication
  void contraction(int num_iters) {
    Eigen::array<TensorIndex, 2> sizeA;
--- a/bench/tensors/tensor_benchmarks_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_gpu.cu
@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
 BM_FuncGPU(transcendentalFunc);
 BM_FuncGPU(rowReduction);
 BM_FuncGPU(colReduction);
 BM_FuncGPU(fullReduction);
 // Contractions