Added benchmarks for full reduction

2024-12-15 07:10:37 +08:00 · 2016-02-29 14:57:52 -08:00 · 2016-02-29 14:57:52 -08:00 · 56a3ada670
commit 56a3ada670
parent b2075cb7a2
2 changed files with 25 additions and 2 deletions
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
    input_size[0] = k_;
    input_size[1] = n_;
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{n_}};
+    Eigen::array<TensorIndex, 1> output_size;
+    output_size[0] = n_;
    TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);

 #ifndef EIGEN_HAS_INDEX_LIST
@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
    input_size[1] = n_;
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
        b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{k_}};
+    Eigen::array<TensorIndex, 1> output_size;
+    output_size[0] = k_;
    TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
        c_, output_size);

@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
    finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
  }

+  // Full reduction
+  void fullReduction(int num_iters) {
+    Eigen::array<TensorIndex, 2> input_size;
+    input_size[0] = k_;
+    input_size[1] = n_;
+    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
+        b_, input_size);
+    const Eigen::array<TensorIndex, 0> output_size;
+    TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
+        c_, output_size);
+
+    StartBenchmarkTiming();
+    for (int iter = 0; iter < num_iters; ++iter) {
+      C.device(device_) = B.sum();
+    }
+    // Record the number of FLOP executed per second (assuming one operation
+    // per value)
+    finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
+  }
+
  // do a contraction which is equivalent to a matrix multiplication
  void contraction(int num_iters) {
    Eigen::array<TensorIndex, 2> sizeA;
--- a/bench/tensors/tensor_benchmarks_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_gpu.cu
@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
 BM_FuncGPU(transcendentalFunc);
 BM_FuncGPU(rowReduction);
 BM_FuncGPU(colReduction);
+BM_FuncGPU(fullReduction);


 // Contractions