From 56a3ada6701b8e8645df4e00a2ef93d45a4f970a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 29 Feb 2016 14:57:52 -0800 Subject: [PATCH] Added benchmarks for full reduction --- bench/tensors/tensor_benchmarks.h | 26 ++++++++++++++++++++++++-- bench/tensors/tensor_benchmarks_gpu.cu | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 131d056b4..d916f787e 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -297,7 +297,8 @@ template class BenchmarkSuite { input_size[0] = k_; input_size[1] = n_; const TensorMap, Eigen::Aligned> B(b_, input_size); - const Eigen::array output_size = {{n_}}; + Eigen::array output_size; + output_size[0] = n_; TensorMap, Eigen::Aligned> C(c_, output_size); #ifndef EIGEN_HAS_INDEX_LIST @@ -325,7 +326,8 @@ template class BenchmarkSuite { input_size[1] = n_; const TensorMap, Eigen::Aligned> B( b_, input_size); - const Eigen::array output_size = {{k_}}; + Eigen::array output_size; + output_size[0] = k_; TensorMap, Eigen::Aligned> C( c_, output_size); @@ -347,6 +349,26 @@ template class BenchmarkSuite { finalizeBenchmark(static_cast(k_) * n_ * num_iters); } + // Full reduction + void fullReduction(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B( + b_, input_size); + const Eigen::array output_size; + TensorMap, Eigen::Aligned> C( + c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(k_) * n_ * num_iters); + } + // do a contraction which is equivalent to a matrix multiplication void contraction(int num_iters) { Eigen::array sizeA; diff --git a/bench/tensors/tensor_benchmarks_gpu.cu b/bench/tensors/tensor_benchmarks_gpu.cu index a6f594382..76d68c5c1 100644 --- a/bench/tensors/tensor_benchmarks_gpu.cu +++ b/bench/tensors/tensor_benchmarks_gpu.cu @@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc); BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); // Contractions