mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
Added benchmarks for full reduction
This commit is contained in:
parent
b2075cb7a2
commit
56a3ada670
@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
|
|||||||
input_size[0] = k_;
|
input_size[0] = k_;
|
||||||
input_size[1] = n_;
|
input_size[1] = n_;
|
||||||
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
|
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
|
||||||
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
|
Eigen::array<TensorIndex, 1> output_size;
|
||||||
|
output_size[0] = n_;
|
||||||
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
|
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_INDEX_LIST
|
#ifndef EIGEN_HAS_INDEX_LIST
|
||||||
@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
|
|||||||
input_size[1] = n_;
|
input_size[1] = n_;
|
||||||
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
|
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
|
||||||
b_, input_size);
|
b_, input_size);
|
||||||
const Eigen::array<TensorIndex, 1> output_size = {{k_}};
|
Eigen::array<TensorIndex, 1> output_size;
|
||||||
|
output_size[0] = k_;
|
||||||
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
|
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
|
||||||
c_, output_size);
|
c_, output_size);
|
||||||
|
|
||||||
@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
|
|||||||
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Full reduction
|
||||||
|
void fullReduction(int num_iters) {
|
||||||
|
Eigen::array<TensorIndex, 2> input_size;
|
||||||
|
input_size[0] = k_;
|
||||||
|
input_size[1] = n_;
|
||||||
|
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
|
||||||
|
b_, input_size);
|
||||||
|
const Eigen::array<TensorIndex, 0> output_size;
|
||||||
|
TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
|
||||||
|
c_, output_size);
|
||||||
|
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
C.device(device_) = B.sum();
|
||||||
|
}
|
||||||
|
// Record the number of FLOP executed per second (assuming one operation
|
||||||
|
// per value)
|
||||||
|
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
|
||||||
|
}
|
||||||
|
|
||||||
// do a contraction which is equivalent to a matrix multiplication
|
// do a contraction which is equivalent to a matrix multiplication
|
||||||
void contraction(int num_iters) {
|
void contraction(int num_iters) {
|
||||||
Eigen::array<TensorIndex, 2> sizeA;
|
Eigen::array<TensorIndex, 2> sizeA;
|
||||||
|
@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
|
|||||||
BM_FuncGPU(transcendentalFunc);
|
BM_FuncGPU(transcendentalFunc);
|
||||||
BM_FuncGPU(rowReduction);
|
BM_FuncGPU(rowReduction);
|
||||||
BM_FuncGPU(colReduction);
|
BM_FuncGPU(colReduction);
|
||||||
|
BM_FuncGPU(fullReduction);
|
||||||
|
|
||||||
|
|
||||||
// Contractions
|
// Contractions
|
||||||
|
Loading…
Reference in New Issue
Block a user