mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-24 14:45:14 +08:00
Add performance monitoring for LLT
This commit is contained in:
parent
18de92329e
commit
1ff1d4a124
@ -31,7 +31,6 @@ before-evaluators
|
|||||||
6845:7333ed40c6ef # change prefetching in gebp
|
6845:7333ed40c6ef # change prefetching in gebp
|
||||||
#6856:b5be5e10eb7f # merge index conversion
|
#6856:b5be5e10eb7f # merge index conversion
|
||||||
6893:c3a64aba7c70 # clean blocking size computation
|
6893:c3a64aba7c70 # clean blocking size computation
|
||||||
6898:6fb31ebe6492 # rotating kernel for ARM
|
|
||||||
6899:877facace746 # rotating kernel for ARM only
|
6899:877facace746 # rotating kernel for ARM only
|
||||||
#6904:c250623ae9fa # result_of
|
#6904:c250623ae9fa # result_of
|
||||||
6921:915f1b1fc158 # fix prefetching change for ARM
|
6921:915f1b1fc158 # fix prefetching change for ARM
|
||||||
@ -50,7 +49,7 @@ before-evaluators
|
|||||||
7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code
|
7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code
|
||||||
7591:09a8e2186610 # 3.3-alpha1
|
7591:09a8e2186610 # 3.3-alpha1
|
||||||
7650:b0f3c8f43025 # help clang inlining
|
7650:b0f3c8f43025 # help clang inlining
|
||||||
8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
|
#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
|
||||||
8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
|
8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
|
||||||
8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
|
8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
|
||||||
8985:d935df21a082 # Remove the rotating kernel.
|
8985:d935df21a082 # Remove the rotating kernel.
|
||||||
@ -65,5 +64,5 @@ before-evaluators
|
|||||||
9942:b1d3eba60130 # Operators += and -= do not resize!
|
9942:b1d3eba60130 # Operators += and -= do not resize!
|
||||||
9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec
|
9943:79bb9887afd4 # Ease compiler job to generate clean and efficient code in mat*vec
|
||||||
9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU.
|
9946:2213991340ea # Complete rewrite of column-major-matrix * vector product to deliver higher performance of modern CPU.
|
||||||
9953:21acc0e8d782 # Improve performance of row-major-dense-matrix * vector products for recent CPUs.
|
9955:630471c3298c # Improve performance of row-major-dense-matrix * vector products for recent CPUs. (this is the next changeset fixing a typo)
|
||||||
|
|
||||||
|
@ -1,17 +1,4 @@
|
|||||||
#include <iostream>
|
#include "gemm_common.h"
|
||||||
#include <fstream>
|
|
||||||
#include <vector>
|
|
||||||
#include <Eigen/Core>
|
|
||||||
#include "../../BenchTimer.h"
|
|
||||||
using namespace Eigen;
|
|
||||||
|
|
||||||
#ifndef SCALAR
|
|
||||||
#error SCALAR must be defined
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef SCALAR Scalar;
|
|
||||||
|
|
||||||
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
|
||||||
|
|
||||||
EIGEN_DONT_INLINE
|
EIGEN_DONT_INLINE
|
||||||
void gemm(const Mat &A, const Mat &B, Mat &C)
|
void gemm(const Mat &A, const Mat &B, Mat &C)
|
||||||
@ -19,52 +6,7 @@ void gemm(const Mat &A, const Mat &B, Mat &C)
|
|||||||
C.noalias() += A * B;
|
C.noalias() += A * B;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DONT_INLINE
|
|
||||||
double bench(long m, long n, long k)
|
|
||||||
{
|
|
||||||
Mat A(m,k);
|
|
||||||
Mat B(k,n);
|
|
||||||
Mat C(m,n);
|
|
||||||
A.setRandom();
|
|
||||||
B.setRandom();
|
|
||||||
C.setZero();
|
|
||||||
|
|
||||||
BenchTimer t;
|
|
||||||
|
|
||||||
double up = 1e8*4/sizeof(Scalar);
|
|
||||||
double tm0 = 4, tm1 = 10;
|
|
||||||
if(NumTraits<Scalar>::IsComplex)
|
|
||||||
{
|
|
||||||
up /= 4;
|
|
||||||
tm0 = 2;
|
|
||||||
tm1 = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
double flops = 2. * m * n * k;
|
|
||||||
long rep = std::max(1., std::min(100., up/flops) );
|
|
||||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
|
||||||
|
|
||||||
BENCH(t, tries, rep, gemm(A,B,C));
|
|
||||||
|
|
||||||
return 1e-9 * rep * flops / t.best();
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
std::vector<double> results;
|
return main_gemm(argc, argv, gemm);
|
||||||
|
|
||||||
std::string filename = std::string("gemm_settings.txt");
|
|
||||||
if(argc>1)
|
|
||||||
filename = std::string(argv[1]);
|
|
||||||
std::ifstream settings(filename);
|
|
||||||
long m, n, k;
|
|
||||||
while(settings >> m >> n >> k)
|
|
||||||
{
|
|
||||||
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
|
||||||
results.push_back( bench(m, n, k) );
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
66
bench/perf_monitoring/gemm/gemm_common.h
Normal file
66
bench/perf_monitoring/gemm/gemm_common.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <Eigen/Core>
|
||||||
|
#include "../../BenchTimer.h"
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
#ifndef SCALAR
|
||||||
|
#error SCALAR must be defined
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef SCALAR Scalar;
|
||||||
|
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
double bench(long m, long n, long k, const Func& f)
|
||||||
|
{
|
||||||
|
Mat A(m,k);
|
||||||
|
Mat B(k,n);
|
||||||
|
Mat C(m,n);
|
||||||
|
A.setRandom();
|
||||||
|
B.setRandom();
|
||||||
|
C.setZero();
|
||||||
|
|
||||||
|
BenchTimer t;
|
||||||
|
|
||||||
|
double up = 1e8*4/sizeof(Scalar);
|
||||||
|
double tm0 = 4, tm1 = 10;
|
||||||
|
if(NumTraits<Scalar>::IsComplex)
|
||||||
|
{
|
||||||
|
up /= 4;
|
||||||
|
tm0 = 2;
|
||||||
|
tm1 = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
double flops = 2. * m * n * k;
|
||||||
|
long rep = std::max(1., std::min(100., up/flops) );
|
||||||
|
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||||
|
|
||||||
|
BENCH(t, tries, rep, f(A,B,C));
|
||||||
|
|
||||||
|
return 1e-9 * rep * flops / t.best();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Func>
|
||||||
|
int main_gemm(int argc, char **argv, const Func& f)
|
||||||
|
{
|
||||||
|
std::vector<double> results;
|
||||||
|
|
||||||
|
std::string filename = std::string("gemm_settings.txt");
|
||||||
|
if(argc>1)
|
||||||
|
filename = std::string(argv[1]);
|
||||||
|
std::ifstream settings(filename);
|
||||||
|
long m, n, k;
|
||||||
|
while(settings >> m >> n >> k)
|
||||||
|
{
|
||||||
|
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
||||||
|
results.push_back( bench(m, n, k, f) );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
11
bench/perf_monitoring/gemm/gemm_square_settings.txt
Normal file
11
bench/perf_monitoring/gemm/gemm_square_settings.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
8 8 8
|
||||||
|
9 9 9
|
||||||
|
12 12 12
|
||||||
|
15 15 15
|
||||||
|
16 16 16
|
||||||
|
24 24 24
|
||||||
|
102 102 102
|
||||||
|
239 239 239
|
||||||
|
240 240 240
|
||||||
|
2400 2400 2400
|
||||||
|
2463 2463 2463
|
@ -47,14 +47,14 @@ double bench(long m, long n, Func &f)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Func>
|
template<typename Func>
|
||||||
int main_gemv(int argc, char **argv, Func& f, const std::string &setting_filename)
|
int main_gemv(int argc, char **argv, Func& f)
|
||||||
{
|
{
|
||||||
std::vector<double> results;
|
std::vector<double> results;
|
||||||
|
|
||||||
std::string filename = std::string("gemv_settings.txt");
|
std::string filename = std::string("gemv_settings.txt");
|
||||||
if(argc>1)
|
if(argc>1)
|
||||||
filename = std::string(argv[1]);
|
filename = std::string(argv[1]);
|
||||||
std::ifstream settings(setting_filename);
|
std::ifstream settings(filename);
|
||||||
long m, n;
|
long m, n;
|
||||||
while(settings >> m >> n)
|
while(settings >> m >> n)
|
||||||
{
|
{
|
||||||
|
13
bench/perf_monitoring/gemm/gemv_square_settings.txt
Normal file
13
bench/perf_monitoring/gemm/gemv_square_settings.txt
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
8 8
|
||||||
|
9 9
|
||||||
|
12 12
|
||||||
|
15 15
|
||||||
|
16 16
|
||||||
|
24 24
|
||||||
|
53 53
|
||||||
|
74 74
|
||||||
|
102 102
|
||||||
|
239 239
|
||||||
|
240 240
|
||||||
|
2400 2400
|
||||||
|
2463 2463
|
15
bench/perf_monitoring/gemm/llt.cpp
Normal file
15
bench/perf_monitoring/gemm/llt.cpp
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#include "gemm_common.h"
|
||||||
|
#include <Eigen/Cholesky>
|
||||||
|
|
||||||
|
EIGEN_DONT_INLINE
|
||||||
|
void llt(const Mat &A, const Mat &B, Mat &C)
|
||||||
|
{
|
||||||
|
C = A;
|
||||||
|
C.diagonal().array() += 1000;
|
||||||
|
Eigen::internal::llt_inplace<Mat::Scalar, Lower>::blocked(C);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
return main_gemm(argc, argv, llt);
|
||||||
|
}
|
@ -19,4 +19,5 @@
|
|||||||
./run.sh trmv_lo gemv_square_settings.txt $*
|
./run.sh trmv_lo gemv_square_settings.txt $*
|
||||||
./run.sh trmv_upt gemv_square_settings.txt $*
|
./run.sh trmv_upt gemv_square_settings.txt $*
|
||||||
./run.sh trmv_lot gemv_square_settings.txt $*
|
./run.sh trmv_lot gemv_square_settings.txt $*
|
||||||
|
./run.sh llt gemm_square_settings.txt $*
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include "gemv_common.h"
|
#include "gemv_common.h"
|
||||||
|
|
||||||
EIGEN_DONT_INLINE
|
EIGEN_DONT_INLINE
|
||||||
void gemv(const Mat &A, Vec &B, const Vec &C)
|
void trmv(const Mat &A, Vec &B, const Vec &C)
|
||||||
{
|
{
|
||||||
B.noalias() += A.transpose().triangularView<Lower>() * C;
|
B.noalias() += A.transpose().triangularView<Lower>() * C;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user